From ea479a11d2c2682e1f4168e2367cfc4b0cdaeb4d Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Tue, 22 Feb 2022 13:41:06 -0800 Subject: [PATCH 01/38] CADC-10809 - interim commit - add the classes WCSParser, FitsWcsParser, BlueprintParser to the inheritance hierarchy. --- caom2utils/caom2utils/data_util.py | 2 +- caom2utils/caom2utils/fits2caom2.py | 2131 ++++++++++++++------------- 2 files changed, 1081 insertions(+), 1052 deletions(-) diff --git a/caom2utils/caom2utils/data_util.py b/caom2utils/caom2utils/data_util.py index 07ea7c76..8a33690b 100644 --- a/caom2utils/caom2utils/data_util.py +++ b/caom2utils/caom2utils/data_util.py @@ -401,7 +401,7 @@ def get_local_file_info(fqn): meta = FileInfo( id=path.basename(fqn), size=s.st_size, - md5sum=hash_md5.hexdigest(), + md5sum=f'md5:{hash_md5.hexdigest()}', file_type=get_file_type(fqn), ) return meta diff --git a/caom2utils/caom2utils/fits2caom2.py b/caom2utils/caom2utils/fits2caom2.py index 168fcef2..8ad4ef4c 100755 --- a/caom2utils/caom2utils/fits2caom2.py +++ b/caom2utils/caom2utils/fits2caom2.py @@ -76,7 +76,10 @@ from astropy.time import Time from cadcutils import version from caom2.caom_util import int_32 -from caom2 import Artifact, Part, Chunk, Plane, Observation, CoordError +from caom2 import ( + Artifact, Part, Chunk, Plane, Observation, CoordError, + RefCoord, CoordRange1D, CoordRange2D, Coord2D, +) from caom2 import SpectralWCS, CoordAxis1D, Axis, CoordFunction1D, RefCoord from caom2 import SpatialWCS, Dimension2D, Coord2D, CoordFunction2D from caom2 import CoordAxis2D, CoordRange1D, PolarizationWCS, TemporalWCS @@ -109,7 +112,7 @@ APP_NAME = 'caom2gen' -__all__ = ['FitsParser', 'WcsParser', 'DispatchingFormatter', +__all__ = ['FitsParser', 'FitsWcsParser', 'DispatchingFormatter', 'ObsBlueprint', 'get_arg_parser', 'proc', 'POLARIZATION_CTYPES', 'gen_proc', 'get_gen_proc_arg_parser', 'GenericParser', 'augment', 'get_vos_headers', @@ -1486,16 +1489,15 @@ class GenericParser: """ Extract CAOM2 metadata from files with no WCS information. """ - def __init__(self, obs_blueprint=None, logging_name=None, uri=None): + def __init__(self, obs_blueprint=None, uri=None): if obs_blueprint: self._blueprint = obs_blueprint else: self._blueprint = ObsBlueprint() self._errors = [] - self.logging_name = logging_name self.logger = logging.getLogger(__name__) self.uri = uri - self._apply_blueprint_to_generic() + self.apply_blueprint() @property def blueprint(self): @@ -1504,7 +1506,28 @@ def blueprint(self): @blueprint.setter def blueprint(self, value): self._blueprint = value - self._apply_blueprint_to_generic() + self.apply_blueprint() + + def apply_blueprint(self): + plan = self.blueprint._plan + + # first apply the functions + if (self.blueprint._module is not None or + self.blueprint._module_instance is not None): + for key, value in plan.items(): + if ObsBlueprint.is_function(value): + if self._blueprint._module_instance is None: + plan[key] = self._execute_external(value, key, 0) + else: + plan[key] = self._execute_external_instance( + value, key, 0) + + # apply defaults + for key, value in plan.items(): + if ObsBlueprint.is_fits(value) and value[1]: + # there is a default value set + if key in plan: + plan[key] = value[1] def augment_observation(self, observation, artifact_uri, product_id=None): """ @@ -1594,9 +1617,8 @@ def augment_artifact(self, artifact): Augments a given CAOM2 artifact with available FITS information :param artifact: existing CAOM2 artifact to be augmented """ - self.logger.debug( - 'Begin generic CAOM2 artifact augmentation for {}.'.format( - self.logging_name)) + self.logger.debug('Begin generic CAOM2 artifact augmentation for ' + '{}.'.format(self.uri)) if artifact is None or not isinstance(artifact, Artifact): raise ValueError( f'Artifact type mis-match for {artifact}') @@ -1622,7 +1644,7 @@ def augment_artifact(self, artifact): 'Artifact.metaProducer', index=0, current=artifact.meta_producer) self.logger.debug( 'End generic CAOM2 artifact augmentation for {}.'.format( - self.logging_name)) + self.uri)) def _get_from_list(self, lookup, index, current=None): value = None @@ -1684,28 +1706,6 @@ def _to_enum_type(self, value, to_enum_type): else: return to_enum_type(value) - def _apply_blueprint_to_generic(self): - - plan = self.blueprint._plan - - # first apply the functions - if (self.blueprint._module is not None or - self.blueprint._module_instance is not None): - for key, value in plan.items(): - if ObsBlueprint.is_function(value): - if self._blueprint._module_instance is None: - plan[key] = self._execute_external(value, key, 0) - else: - plan[key] = self._execute_external_instance( - value, key, 0) - - # apply defaults - for key, value in plan.items(): - if ObsBlueprint.is_fits(value) and value[1]: - # there is a default value set - if key in plan: - plan[key] = value[1] - def _execute_external(self, value, key, extension): """Execute a function supplied by a user, assign a value to a blueprint entry. The input parameters passed to the function are the @@ -1832,92 +1832,10 @@ def _get_datetime(self, from_value): return None -class FitsParser(GenericParser): - """ - Parses a FITS file and extracts the CAOM2 related information which can - be used to augment an existing CAOM2 observation, plane or artifact. The - constructor takes either a FITS file as argument or a list of dictionaries - (FITS keyword=value) corresponding to each extension. - - The WCS-related keywords of the FITS file are consumed by the astropy.wcs - package which might display warnings with regards to compliance. - - Example 1: - parser = FitsParser(input = '/staging/700000o.fits.gz') - ... - # customize parser.headers by deleting, changing or adding attributes - - obs = Observation(collection='TEST', observation_id='700000', - algorithm='exposure') - plane = Plane(plane_id='700000-1') - obs.plane.add(plane) - - artifact = Artifact(uri='ad:CFHT/700000o.fits.gz', product_type='science', - release_type='data') - plane.artifacts.add(artifact) - - parser.augment_observation(obs) - - # further update obs - - - Example 2: - - headers = [] # list of dictionaries headers - # populate headers - parser = FitsParser(input=headers) - - parser.augment_observation(obs) - ... - - """ - - def __init__(self, src, obs_blueprint=None, uri=None): - """ - Ctor - :param src: List of headers (dictionary of FITS keywords:value) with - one header for each extension or a FITS input file. - :param obs_blueprint: externally provided blueprint - :param uri: which artifact augmentation is based on - """ - self.logger = logging.getLogger(__name__) - self._headers = [] - self.parts = 0 - self.file = '' - if isinstance(src, list): - # assume this is the list of headers - self._headers = src - else: - # assume file - self.file = src - self._headers = data_util.get_local_headers_from_fits(self.file) - if obs_blueprint: - self._blueprint = obs_blueprint - else: - self._blueprint = ObsBlueprint() - self._errors = [] - self.logging_name = self.file - # for command-line parameter to module execution - self.uri = uri - self.apply_blueprint_to_fits() - - @property - def headers(self): - """ - List of headers where each header should allow dictionary like - access to the FITS attribute in that header - :return: - """ - return self._headers - - @property - def blueprint(self): - return self._blueprint +class BlueprintParser(GenericParser): - @blueprint.setter - def blueprint(self, value): - self._blueprint = value - self.apply_blueprint_to_fits() + def __init__(self, obs_blueprint=None, uri=None): + super().__init__(obs_blueprint, uri) def augment_artifact(self, artifact): """ @@ -1938,16 +1856,18 @@ def augment_artifact(self, artifact): for i, header in enumerate(self.headers): ii = str(i) - # there is one Part per extension, the name is the extension number - if self._has_data_array(header) and self.blueprint.has_chunk(i): - if ii not in artifact.parts.keys(): - # TODO use extension name? - artifact.parts.add(Part(ii)) - self.logger.debug(f'Part created for HDU {ii}.') - else: - artifact.parts.add(Part(ii)) - self.logger.debug(f'Create empty part for HDU {ii}') + if self.ignore_chunks(artifact, i, ii): continue + # # there is one Part per extension, the name is the extension number + # if self._has_data_array(header) and self.blueprint.has_chunk(i): + # if ii not in artifact.parts.keys(): + # # TODO use extension name? + # artifact.parts.add(Part(ii)) + # self.logger.debug(f'Part created for HDU {ii}.') + # else: + # artifact.parts.add(Part(ii)) + # self.logger.debug(f'Create empty part for HDU {ii}') + # continue part = artifact.parts[ii] part.product_type = self._get_from_list('Part.productType', i) @@ -1962,7 +1882,7 @@ def augment_artifact(self, artifact): chunk.meta_producer = self._get_from_list( 'Chunk.metaProducer', index=0, current=chunk.meta_producer) - wcs_parser = WcsParser(header, self.file, ii) + wcs_parser = FitsWcsParser(header, self.file, ii) # NOTE: astropy.wcs does not distinguished between WCS axes and # data array axes. naxis in astropy.wcs represents in fact the # number of WCS axes, whereas chunk.axis represents the naxis @@ -1977,6 +1897,7 @@ def augment_artifact(self, artifact): wcs_parser.wcs.wcs.naxis) if self.blueprint._pos_axes_configed: wcs_parser.augment_position(chunk) + logging.error(chunk.position) if chunk.position is None: self._try_position_with_blueprint(chunk, i) if chunk.position: @@ -2016,90 +1937,718 @@ def augment_artifact(self, artifact): self.logger.debug( f'End artifact augmentation for {artifact.uri}.') - def _try_range_with_blueprint(self, chunk, index): - """Use the blueprint to set elements and attributes that - are not in the scope of astropy and fits, and therefore are not - covered by the WcsParser class. Per PD 19/04/18, bounds and - range are not covered by WCS keywords.""" - - for i in ['energy', 'time', 'polarization']: - axis_configed = getattr(self.blueprint, - f'_{i}_axis_configed') - if axis_configed: - wcs = getattr(chunk, i) - if wcs is not None and wcs.axis is not None: - if wcs.axis.range is None: - self._try_range(wcs, index, i) - self._try_position_range(chunk, index) - - def _try_range(self, wcs, index, lookup): - self.logger.debug(f'Try to set the range for {lookup}') - aug_range_start = self._two_param_constructor( - f'Chunk.{lookup}.axis.range.start.pix', - f'Chunk.{lookup}.axis.range.start.val', - index, _to_float, RefCoord) - aug_range_end = self._two_param_constructor( - f'Chunk.{lookup}.axis.range.end.pix', - f'Chunk.{lookup}.axis.range.end.val', - index, _to_float, RefCoord) - if aug_range_start and aug_range_end: - wcs.axis.range = CoordRange1D(aug_range_start, aug_range_end) - self.logger.debug(f'Completed setting range for {lookup}') + def augment_observation(self, observation, artifact_uri, product_id=None): + """ + Augments a given observation with available FITS information. + :param observation: existing CAOM2 observation to be augmented. + :param artifact_uri: the key for finding the artifact to augment + :param product_id: the key for finding for the plane to augment + """ + super().augment_observation(observation, artifact_uri, product_id) + self.logger.debug( + 'Begin observation augmentation for URI {}.'.format( + artifact_uri)) + members = self._get_members(observation) + if members: + if isinstance(members, TypedSet): + for m in members: + observation.members.add(m) + else: + for m in members.split(): + observation.members.add(ObservationURI(m)) + observation.algorithm = self._get_algorithm(observation) - def _try_position_range(self, chunk, index): - self.logger.debug('Try to set the range for position from blueprint') - if (self.blueprint._pos_axes_configed and chunk.position is not None - and chunk.position.axis is not None): - aug_range_c1_start = self._two_param_constructor( - 'Chunk.position.axis.range.start.coord1.pix', - 'Chunk.position.axis.range.start.coord1.val', - index, _to_float, RefCoord) - aug_range_c1_end = self._two_param_constructor( - 'Chunk.position.axis.range.end.coord1.pix', - 'Chunk.position.axis.range.end.coord1.val', - index, _to_float, RefCoord) - aug_range_c2_start = self._two_param_constructor( - 'Chunk.position.axis.range.start.coord2.pix', - 'Chunk.position.axis.range.start.coord2.val', - index, _to_float, RefCoord) - aug_range_c2_end = self._two_param_constructor( - 'Chunk.position.axis.range.end.coord2.pix', - 'Chunk.position.axis.range.end.coord2.val', - index, _to_float, RefCoord) - if (aug_range_c1_start and aug_range_c1_end and aug_range_c2_start - and aug_range_c2_end): - chunk.position.axis.range = CoordRange2D( - Coord2D(aug_range_c1_start, aug_range_c1_end), - Coord2D(aug_range_c2_start, aug_range_c2_end)) - self.logger.debug('Completed setting range for position') + observation.sequence_number = _to_int(self._get_from_list( + 'Observation.sequenceNumber', index=0)) + observation.intent = self._get_from_list( + 'Observation.intent', 0, (ObservationIntentType.SCIENCE if + observation.intent is None else + observation.intent)) + observation.type = self._get_from_list('Observation.type', 0, + current=observation.type) + observation.meta_release = self._get_datetime( + self._get_from_list('Observation.metaRelease', 0, + current=observation.meta_release)) + observation.meta_read_groups = self._get_from_list( + 'Observation.metaReadGroups', 0) + observation.meta_producer = self._get_from_list( + 'Observation.metaProducer', 0, current=observation.meta_producer) + observation.requirements = self._get_requirements( + observation.requirements) + observation.instrument = self._get_instrument(observation.instrument) + observation.proposal = self._get_proposal(observation.proposal) + observation.target = self._get_target(observation.target) + observation.target_position = self._get_target_position( + observation.target_position) + observation.telescope = self._get_telescope(observation.telescope) + observation.environment = self._get_environment( + observation.environment) + self.logger.debug( + f'End observation augmentation for {artifact_uri}.') - def _try_position_with_blueprint(self, chunk, index): + def augment_plane(self, plane, artifact_uri): """ - A mechanism to augment the Position WCS completely from the blueprint. - Do nothing if the WCS information cannot be correctly created. - - :param chunk: The chunk to modify with the addition of position - information. - :param index: The index in the blueprint for looking up plan - information. + Augments a given plane with available FITS information. + :param plane: existing CAOM2 plane to be augmented. + :param artifact_uri: """ - self.logger.debug('Begin augmentation with blueprint for position.') + super().augment_plane(plane, artifact_uri) + self.logger.debug( + f'Begin plane augmentation for {artifact_uri}.') - aug_x_axis = self._two_param_constructor( - 'Chunk.position.axis.axis1.ctype', - 'Chunk.position.axis.axis1.cunit', index, _to_str, Axis) - aug_y_axis = self._two_param_constructor( - 'Chunk.position.axis.axis2.ctype', - 'Chunk.position.axis.axis2.cunit', index, _to_str, Axis) - aug_x_error = self._two_param_constructor( - 'Chunk.position.axis.error1.syser', - 'Chunk.position.axis.error1.rnder', index, _to_float, CoordError) - aug_y_error = self._two_param_constructor( - 'Chunk.position.axis.error2.syser', - 'Chunk.position.axis.error2.rnder', index, _to_float, CoordError) - aug_dimension = self._two_param_constructor( - 'Chunk.position.axis.function.dimension.naxis1', - 'Chunk.position.axis.function.dimension.naxis2', + plane.meta_release = self._get_datetime(self._get_from_list( + 'Plane.metaRelease', index=0, current=plane.meta_release)) + plane.data_release = self._get_datetime(self._get_from_list( + 'Plane.dataRelease', index=0)) + plane.data_product_type = self._to_data_product_type( + self._get_from_list('Plane.dataProductType', index=0, + current=plane.data_product_type)) + plane.calibration_level = self._to_calibration_level(_to_int_32( + self._get_from_list('Plane.calibrationLevel', index=0, + current=plane.calibration_level))) + plane.meta_producer = self._get_from_list( + 'Plane.metaProducer', index=0, current=plane.meta_producer) + plane.observable = self._get_observable(current=plane.observable) + plane.provenance = self._get_provenance(plane.provenance) + plane.metrics = self._get_metrics(current=plane.metrics) + plane.quality = self._get_quality(current=plane.quality) + + self.logger.debug( + f'End plane augmentation for {artifact_uri}.') + + def _get_algorithm(self, obs): + """ + Create an Algorithm instance populated with available FITS information. + :return: Algorithm + """ + self.logger.debug('Begin Algorithm augmentation.') + # TODO DEFAULT VALUE + name = self._get_from_list('Observation.algorithm.name', index=0, + current=obs.algorithm.name) + result = Algorithm(str(name)) if name else None + self.logger.debug('End Algorithm augmentation.') + return result + + def _get_energy_transition(self, current): + """ + Create an EnergyTransition instance populated with available FITS + information. + :return: EnergyTransition + """ + self.logger.debug('Begin EnergyTransition augmentation.') + species = self._get_from_list( + 'Chunk.energy.transition.species', index=0, + current=None if current is None else current.species) + transition = self._get_from_list( + 'Chunk.energy.transition.transition', index=0, + current=None if current is None else current.transition) + result = None + if species is not None and transition is not None: + result = EnergyTransition(species, transition) + self.logger.debug('End EnergyTransition augmentation.') + return result + + def _get_environment(self, current): + """ + Create an Environment instance populated with available FITS + information. + :current Environment instance, if one already exists in the + Observation + :return: Environment + """ + self.logger.debug('Begin Environment augmentation.') + seeing = self._get_from_list( + 'Observation.environment.seeing', index=0, + current=None if current is None else current.seeing) + humidity = _to_float( + self._get_from_list( + 'Observation.environment.humidity', index=0, + current=None if current is None else current.humidity)) + elevation = self._get_from_list( + 'Observation.environment.elevation', index=0, + current=None if current is None else current.elevation) + tau = self._get_from_list( + 'Observation.environment.tau', index=0, + current=None if current is None else current.tau) + wavelength_tau = self._get_from_list( + 'Observation.environment.wavelengthTau', index=0, + current=None if current is None else current.wavelength_tau) + ambient = _to_float( + self._get_from_list( + 'Observation.environment.ambientTemp', index=0, + current=None if current is None else current.ambient_temp)) + photometric = self._cast_as_bool(self._get_from_list( + 'Observation.environment.photometric', index=0, + current=None if current is None else current.photometric)) + enviro = None + if seeing or humidity or elevation or tau or wavelength_tau or ambient: + enviro = Environment() + enviro.seeing = seeing + enviro.humidity = humidity + enviro.elevation = elevation + enviro.tau = tau + enviro.wavelength_tau = wavelength_tau + enviro.ambient_temp = ambient + enviro.photometric = photometric + self.logger.debug('End Environment augmentation.') + return enviro + + def _get_instrument(self, current): + """ + Create an Instrument instance populated with available FITS + information. + :return: Instrument + """ + self.logger.debug('Begin Instrument augmentation.') + name = self._get_from_list( + 'Observation.instrument.name', index=0, + current=None if current is None else current.name) + keywords = self._get_set_from_list( + 'Observation.instrument.keywords', index=0) + instr = None + if name: + instr = Instrument(str(name)) + FitsParser._add_keywords(keywords, current, instr) + self.logger.debug('End Instrument augmentation.') + return instr + + def _get_members(self, obs): + """ + Returns the members of a derived observation (if specified) + :param obs: observation to augment + :return: members value + """ + members = None + self.logger.debug('Begin Members augmentation.') + if (isinstance(obs, SimpleObservation) and + (self.blueprint._get('DerivedObservation.members') or + self.blueprint._get('CompositeObservation.members'))): + raise TypeError( + 'Cannot apply blueprint for DerivedObservation to a ' + 'simple observation') + elif isinstance(obs, DerivedObservation): + lookup = self.blueprint._get('DerivedObservation.members', + extension=1) + if ObsBlueprint.is_table(lookup) and len(self.headers) > 1: + member_list = self._get_from_table( + 'DerivedObservation.members', 1) + # ensure the members are good little ObservationURIs + if member_list.startswith('caom:'): + members = member_list + else: + members = ' '.join(['caom:{}/{}'.format( + obs.collection, i) if not i.startswith('caom') else i + for i in member_list.split()]) + else: + if obs.members is None: + members = self._get_from_list( + 'DerivedObservation.members', index=0) + else: + members = self._get_from_list( + 'DerivedObservation.members', index=0, + current=obs.members) + elif isinstance(obs, CompositeObservation): + lookup = self.blueprint._get('CompositeObservation.members', + extension=1) + if ObsBlueprint.is_table(lookup) and len(self.headers) > 1: + member_list = self._get_from_table( + 'CompositeObservation.members', 1) + # ensure the members are good little ObservationURIs + if member_list.startswith('caom:'): + members = member_list + else: + members = ' '.join(['caom:{}/{}'.format( + obs.collection, i) if not i.startswith('caom') else i + for i in member_list.split()]) + else: + if obs.members is None: + members = self._get_from_list( + 'CompositeObservation.members', index=0) + else: + members = self._get_from_list( + 'CompositeObservation.members', index=0, + current=obs.members) + self.logger.debug('End Members augmentation.') + return members + + def _get_metrics(self, current): + """ + Create a Metrics instance populated with available FITS information. + :return: Metrics + """ + self.logger.debug('Begin Metrics augmentation.') + source_number_density = self._get_from_list( + 'Plane.metrics.sourceNumberDensity', index=0, + current=None if current is None else current.source_number_density) + background = self._get_from_list( + 'Plane.metrics.background', index=0, + current=None if current is None else current.background) + background_stddev = self._get_from_list( + 'Plane.metrics.backgroundStddev', index=0, + current=None if current is None else current.background_std_dev) + flux_density_limit = self._get_from_list( + 'Plane.metrics.fluxDensityLimit', index=0, + current=None if current is None else current.flux_density_limit) + mag_limit = self._get_from_list( + 'Plane.metrics.magLimit', index=0, + current=None if current is None else current.mag_limit) + sample_snr = self._get_from_list( + 'Plane.metrics.sampleSNR', index=0, + current=None if current is None else current.sample_snr) + + metrics = None + if (source_number_density or background or background_stddev or + flux_density_limit or mag_limit or sample_snr): + metrics = Metrics() + metrics.source_number_density = source_number_density + metrics.background = background + metrics.background_std_dev = background_stddev + metrics.flux_density_limit = flux_density_limit + metrics.mag_limit = mag_limit + metrics.sample_snr = sample_snr + self.logger.debug('End Metrics augmentation.') + return metrics + + def _get_naxis(self, label, index): + """Helper function to construct a CoordAxis1D instance, with all + it's members, from the blueprint. + + :param label: axis name - must be one of 'energy', 'time', or + 'polarization', as it's used for the blueprint lookup. + :param index: which blueprint index to find a value in + :return an instance of CoordAxis1D + """ + self.logger.debug( + f'Begin {label} naxis construction from blueprint.') + + aug_axis_ctype = self._get_from_list( + f'Chunk.{label}.axis.axis.ctype', index) + aug_axis_cunit = self._get_from_list( + f'Chunk.{label}.axis.axis.cunit', index) + aug_axis = None + if aug_axis_ctype is not None: + aug_axis = Axis(aug_axis_ctype, aug_axis_cunit) + self.logger.debug( + 'Creating polarization Axis for {} from blueprint'. + format(self.uri)) + + aug_error = self._two_param_constructor( + f'Chunk.{label}.axis.error.syser', + f'Chunk.{label}.axis.error.rnder', + index, _to_float, CoordError) + aug_ref_coord = self._two_param_constructor( + f'Chunk.{label}.axis.function.refCoord.pix', + f'Chunk.{label}.axis.function.refCoord.val', + index, _to_float, RefCoord) + aug_delta = _to_float( + self._get_from_list(f'Chunk.{label}.axis.function.delta', + index)) + aug_length = _to_int( + self._get_from_list(f'Chunk.{label}.axis.function.naxis', + index)) + + aug_function = None + if (aug_length is not None and aug_delta is not None and + aug_ref_coord is not None): + aug_function = \ + CoordFunction1D(aug_length, aug_delta, aug_ref_coord) + self.logger.debug( + 'Creating {} function for {} from blueprint'. + format(label, self.uri)) + + aug_naxis = None + if aug_axis is not None and aug_function is not None: + aug_naxis = CoordAxis1D(aug_axis, aug_error, None, None, + aug_function) + self.logger.debug( + 'Creating {} CoordAxis1D for {} from blueprint'. + format(label, self.uri)) + self.logger.debug( + f'End {label} naxis construction from blueprint.') + return aug_naxis + + def _get_observable(self, current): + """ + Create a Observable instance populated with available FITS information. + :return: Observable + """ + self.logger.debug('Begin Observable augmentation.') + ucd = self._get_from_list( + 'Plane.observable.ucd', index=0, + current=None if current is None else current.ucd) + observable = Observable(ucd) if ucd else None + self.logger.debug('End Observable augmentation.') + return observable + + def _get_proposal(self, current): + """ + Create a Proposal instance populated with available FITS information. + :return: Proposal + """ + self.logger.debug('Begin Proposal augmentation.') + prop_id = self._get_from_list( + 'Observation.proposal.id', index=0, + current=None if current is None else current.id) + pi = self._get_from_list( + 'Observation.proposal.pi', index=0, + current=None if current is None else current.pi_name) + project = self._get_from_list( + 'Observation.proposal.project', index=0, + current=None if current is None else current.project) + title = self._get_from_list( + 'Observation.proposal.title', index=0, + current=None if current is None else current.title) + keywords = self._get_set_from_list( + 'Observation.proposal.keywords', index=0) + proposal = current + if prop_id: + proposal = Proposal(str(prop_id), pi, project, title) + FitsParser._add_keywords(keywords, current, proposal) + self.logger.debug(f'End Proposal augmentation {prop_id}.') + return proposal + + def _get_provenance(self, current): + """ + Create a Provenance instance populated with available FITS information. + :return: Provenance + """ + self.logger.debug('Begin Provenance augmentation.') + name = _to_str( + self._get_from_list( + 'Plane.provenance.name', index=0, + current=None if current is None else current.name)) + p_version = _to_str(self._get_from_list( + 'Plane.provenance.version', index=0, + current=None if current is None else current.version)) + project = _to_str( + self._get_from_list( + 'Plane.provenance.project', index=0, + current=None if current is None else current.project)) + producer = _to_str( + self._get_from_list( + 'Plane.provenance.producer', index=0, + current=None if current is None else current.producer)) + run_id = _to_str( + self._get_from_list( + 'Plane.provenance.runID', index=0, + current=None if current is None else current.run_id)) + reference = _to_str( + self._get_from_list( + 'Plane.provenance.reference', index=0, + current=None if current is None else current.reference)) + last_executed = self._get_datetime( + self._get_from_list( + 'Plane.provenance.lastExecuted', index=0, + current=None if current is None else current.last_executed)) + keywords = self._get_set_from_list( + 'Plane.provenance.keywords', index=0) + inputs = self._get_set_from_list('Plane.provenance.inputs', index=0) + prov = None + if name: + prov = Provenance(name, p_version, project, producer, run_id, + reference, last_executed) + FitsParser._add_keywords(keywords, current, prov) + if inputs: + if isinstance(inputs, TypedSet): + for i in inputs: + prov.inputs.add(i) + else: + for i in inputs.split(): + prov.inputs.add(PlaneURI(str(i))) + else: + if current is not None and len(current.inputs) > 0: + # preserve the original value + prov.inputs.update(current.inputs) + self.logger.debug('End Provenance augmentation.') + return prov + + def _get_quality(self, current): + """ + Create a Quality instance populated with available FITS information. + :return: Quality + """ + self.logger.debug('Begin Quality augmentation.') + flag = self._get_from_list( + 'Plane.dataQuality', index=0, + current=None if current is None else current.flag) + quality = DataQuality(flag) if flag else None + self.logger.debug('End Quality augmentation.') + return quality + + def _get_requirements(self, current): + """ + Create a Requirements instance populated with available FITS + information. + :return: Requirements + """ + self.logger.debug('Begin Requirement augmentation.') + flag = self._get_from_list( + 'Observation.requirements.flag', index=0, + current=None if current is None else current.flag) + reqts = Requirements(flag) if flag else None + self.logger.debug('End Requirement augmentation.') + return reqts + + def _get_target(self, current): + """ + Create a Target instance populated with available FITS information. + :return: Target + """ + self.logger.debug('Begin Target augmentation.') + name = self._get_from_list( + 'Observation.target.name', index=0, + current=None if current is None else current.name) + target_type = self._get_from_list( + 'Observation.target.type', index=0, + current=None if current is None else current.target_type) + standard = self._cast_as_bool(self._get_from_list( + 'Observation.target.standard', index=0, + current=None if current is None else current.standard)) + redshift = self._get_from_list( + 'Observation.target.redshift', index=0, + current=None if current is None else current.redshift) + keywords = self._get_set_from_list( + 'Observation.target.keywords', index=0) + moving = self._cast_as_bool( + self._get_from_list( + 'Observation.target.moving', index=0, + current=None if current is None else current.moving)) + target_id = self._get_from_list( + 'Observation.target.targetID', index=0, + current=None if current is None else current.target_id) + target = None + if name: + target = Target(str(name), target_type, standard, redshift, + moving=moving, target_id=target_id) + FitsParser._add_keywords(keywords, current, target) + self.logger.debug('End Target augmentation.') + return target + + def _get_target_position(self, current): + """ + Create a Target Position instance populated with available FITS + information. + :return: Target Position + """ + self.logger.debug('Begin CAOM2 TargetPosition augmentation.') + x = self._get_from_list( + 'Observation.target_position.point.cval1', index=0, + current=None if current is None else current.coordinates.cval1) + y = self._get_from_list( + 'Observation.target_position.point.cval2', index=0, + current=None if current is None else current.coordinates.cval2) + coordsys = self._get_from_list( + 'Observation.target_position.coordsys', index=0, + current=None if current is None else current.coordsys) + equinox = self._get_from_list( + 'Observation.target_position.equinox', index=0, + current=None if current is None else current.equinox) + aug_target_position = None + if x and y: + aug_point = Point(x, y) + aug_target_position = TargetPosition(aug_point, coordsys) + aug_target_position.equinox = _to_float(equinox) + self.logger.debug('End CAOM2 TargetPosition augmentation.') + return aug_target_position + + def _get_telescope(self, current): + """ + Create a Telescope instance populated with available FITS information. + :return: Telescope + """ + self.logger.debug('Begin Telescope augmentation.') + name = self._get_from_list( + 'Observation.telescope.name', index=0, + current=None if current is None else current.name) + geo_x = _to_float( + self._get_from_list( + 'Observation.telescope.geoLocationX', index=0, + current=None if current is None else current.geo_location_x)) + geo_y = _to_float( + self._get_from_list( + 'Observation.telescope.geoLocationY', index=0, + current=None if current is None else current.geo_location_y)) + geo_z = _to_float( + self._get_from_list( + 'Observation.telescope.geoLocationZ', index=0, + current=None if current is None else current.geo_location_z)) + keywords = self._get_set_from_list( + 'Observation.telescope.keywords', index=0) + aug_tel = None + if name: + aug_tel = Telescope(str(name), geo_x, geo_y, geo_z) + FitsParser._add_keywords(keywords, current, aug_tel) + self.logger.debug('End Telescope augmentation.') + return aug_tel + + def _cast_as_bool(self, from_value): + """ + Make lower case Java booleans into capitalized python booleans. + :param from_value: Something that represents a boolean value + :return: a python boolean value + """ + if isinstance(from_value, bool): + return from_value + result = None + # so far, these are the only options that are coming in from the + # config files - may need to add more as more types are experienced + if from_value == 'false': + result = False + elif from_value == 'true': + result = True + return result + + def _try_energy_with_blueprint(self, chunk, index): + """ + A mechanism to augment the Energy WCS completely from the blueprint. + Do nothing if the WCS information cannot be correctly created. + + :param chunk: The chunk to modify with the addition of energy + information. + :param index: The index in the blueprint for looking up plan + information. + """ + self.logger.debug('Begin augmentation with blueprint for energy.') + aug_naxis = self._get_naxis('energy', index) + + specsys = _to_str(self._get_from_list('Chunk.energy.specsys', index)) + if aug_naxis is None: + self.logger.debug('No blueprint energy information.') + else: + if not chunk.energy: + chunk.energy = SpectralWCS(aug_naxis, specsys) + else: + chunk.energy.naxis = aug_naxis + chunk.energy.specsys = specsys + + if chunk.energy is not None: + chunk.energy.ssysobs = self._get_from_list( + 'Chunk.energy.ssysobs', index) + chunk.energy.restfrq = self._get_from_list( + 'Chunk.energy.restfrq', index) + chunk.energy.restwav = self._get_from_list( + 'Chunk.energy.restwav', index) + chunk.energy.velosys = self._get_from_list( + 'Chunk.energy.velosys', index) + chunk.energy.zsource = self._get_from_list( + 'Chunk.energy.zsource', index) + chunk.energy.ssyssrc = self._get_from_list( + 'Chunk.energy.ssyssrc', index) + chunk.energy.velang = self._get_from_list( + 'Chunk.energy.velang', index) + chunk.energy.bandpass_name = self._get_from_list( + 'Chunk.energy.bandpassName', index) + chunk.energy.transition = self._get_from_list( + 'Chunk.energy.transition', index) + chunk.energy.resolving_power = _to_float(self._get_from_list( + 'Chunk.energy.resolvingPower', index)) + self.logger.debug('End augmentation with blueprint for energy.') + + def _try_observable_with_blueprint(self, chunk, index): + """ + A mechanism to augment the Observable WCS completely from the + blueprint. Do nothing if the WCS information cannot be correctly + created. + + :param chunk: The chunk to modify with the addition of observable + information. + :param index: The index in the blueprint for looking up plan + information. + """ + self.logger.debug('Begin augmentation with blueprint for ' + 'observable.') + chunk.observable_axis = _to_int( + self._get_from_list('Chunk.observableAxis', index)) + aug_axis = self._two_param_constructor( + 'Chunk.observable.dependent.axis.ctype', + 'Chunk.observable.dependent.axis.cunit', index, _to_str, Axis) + aug_bin = _to_int( + self._get_from_list('Chunk.observable.dependent.bin', index)) + if aug_axis is not None and aug_bin is not None: + chunk.observable = ObservableAxis(Slice(aug_axis, aug_bin)) + self.logger.debug('End augmentation with blueprint for polarization.') + + def _try_polarization_with_blueprint(self, chunk, index): + """ + A mechanism to augment the Polarization WCS completely from the + blueprint. Do nothing if the WCS information cannot be correctly + created. + + :param chunk: The chunk to modify with the addition of polarization + information. + :param index: The index in the blueprint for looking up plan + information. + """ + self.logger.debug('Begin augmentation with blueprint for ' + 'polarization.') + chunk.polarization_axis = _to_int( + self._get_from_list('Chunk.polarizationAxis', index)) + aug_naxis = self._get_naxis('polarization', index) + if aug_naxis is not None: + if chunk.polarization: + chunk.polarization.naxis = aug_naxis + else: + chunk.polarization = PolarizationWCS(aug_naxis) + self.logger.debug( + 'Creating PolarizationWCS for {} from blueprint'. + format(self.uri)) + + self.logger.debug('End augmentation with blueprint for polarization.') + + def _try_position_range(self, chunk, index): + self.logger.debug('Try to set the range for position from blueprint') + if (self.blueprint._pos_axes_configed and chunk.position is not None + and chunk.position.axis is not None): + aug_range_c1_start = self._two_param_constructor( + 'Chunk.position.axis.range.start.coord1.pix', + 'Chunk.position.axis.range.start.coord1.val', + index, _to_float, RefCoord) + aug_range_c1_end = self._two_param_constructor( + 'Chunk.position.axis.range.end.coord1.pix', + 'Chunk.position.axis.range.end.coord1.val', + index, _to_float, RefCoord) + aug_range_c2_start = self._two_param_constructor( + 'Chunk.position.axis.range.start.coord2.pix', + 'Chunk.position.axis.range.start.coord2.val', + index, _to_float, RefCoord) + aug_range_c2_end = self._two_param_constructor( + 'Chunk.position.axis.range.end.coord2.pix', + 'Chunk.position.axis.range.end.coord2.val', + index, _to_float, RefCoord) + if (aug_range_c1_start and aug_range_c1_end and aug_range_c2_start + and aug_range_c2_end): + chunk.position.axis.range = CoordRange2D( + Coord2D(aug_range_c1_start, aug_range_c1_end), + Coord2D(aug_range_c2_start, aug_range_c2_end)) + self.logger.debug('Completed setting range for position') + + def _try_position_with_blueprint(self, chunk, index): + """ + A mechanism to augment the Position WCS completely from the blueprint. + Do nothing if the WCS information cannot be correctly created. + + :param chunk: The chunk to modify with the addition of position + information. + :param index: The index in the blueprint for looking up plan + information. + """ + self.logger.debug('Begin augmentation with blueprint for position.') + + aug_x_axis = self._two_param_constructor( + 'Chunk.position.axis.axis1.ctype', + 'Chunk.position.axis.axis1.cunit', index, _to_str, Axis) + aug_y_axis = self._two_param_constructor( + 'Chunk.position.axis.axis2.ctype', + 'Chunk.position.axis.axis2.cunit', index, _to_str, Axis) + aug_x_error = self._two_param_constructor( + 'Chunk.position.axis.error1.syser', + 'Chunk.position.axis.error1.rnder', index, _to_float, CoordError) + aug_y_error = self._two_param_constructor( + 'Chunk.position.axis.error2.syser', + 'Chunk.position.axis.error2.rnder', index, _to_float, CoordError) + aug_dimension = self._two_param_constructor( + 'Chunk.position.axis.function.dimension.naxis1', + 'Chunk.position.axis.function.dimension.naxis2', index, _to_int, Dimension2D) aug_x_ref_coord = self._two_param_constructor( 'Chunk.position.axis.function.refCoord.coord1.pix', @@ -2126,8 +2675,8 @@ def _try_position_with_blueprint(self, chunk, index): aug_function = None if (aug_dimension is not None and aug_ref_coord is not None and - aug_cd11 is not None and aug_cd12 is not None and - aug_cd21 is not None and aug_cd22 is not None): + aug_cd11 is not None and aug_cd12 is not None and + aug_cd21 is not None and aug_cd22 is not None): aug_function = CoordFunction2D(aug_dimension, aug_ref_coord, aug_cd11, aug_cd12, aug_cd21, aug_cd22) @@ -2136,7 +2685,7 @@ def _try_position_with_blueprint(self, chunk, index): aug_axis = None if (aug_x_axis is not None and aug_y_axis is not None and - aug_function is not None): + aug_function is not None): aug_axis = CoordAxis2D(aug_x_axis, aug_y_axis, aug_x_error, aug_y_error, None, None, aug_function) self.logger.debug( @@ -2157,137 +2706,70 @@ def _try_position_with_blueprint(self, chunk, index): 'Chunk.position.resolution', index) self.logger.debug('End augmentation with blueprint for position.') - def _try_time_with_blueprint(self, chunk, index): - """ - A mechanism to augment the Time WCS completely from the blueprint. - Do nothing if the WCS information cannot be correctly created. - - :param chunk: The chunk to modify with the addition of time - information. - :param index: The index in the blueprint for looking up plan - information. - """ - self.logger.debug('Begin augmentation with blueprint for temporal.') - - chunk.time_axis = self._get_from_list('Chunk.energyAxis', index) - - aug_naxis = self._get_naxis('time', index) - if aug_naxis is not None: - if chunk.time: - chunk.time.naxis = aug_naxis - else: - chunk.time = TemporalWCS(aug_naxis) - self.logger.debug('Creating TemporalWCS for {} from blueprint'. - format(self.uri)) - if chunk.time is not None: - chunk.time.exposure = _to_float( - self._get_from_list('Chunk.time.exposure', index)) - chunk.time.resolution = _to_float( - self._get_from_list('Chunk.time.resolution', index)) - chunk.time.timesys = _to_str( - self._get_from_list('Chunk.time.timesys', index)) - chunk.time.trefpos = self._get_from_list('Chunk.time.trefpos', - index) - chunk.time.mjdref = self._get_from_list('Chunk.time.mjdref', index) - - self.logger.debug('End augmentation with blueprint for temporal.') - - def _try_polarization_with_blueprint(self, chunk, index): - """ - A mechanism to augment the Polarization WCS completely from the - blueprint. Do nothing if the WCS information cannot be correctly - created. - - :param chunk: The chunk to modify with the addition of polarization - information. - :param index: The index in the blueprint for looking up plan - information. - """ - self.logger.debug('Begin augmentation with blueprint for ' - 'polarization.') - chunk.polarization_axis = _to_int( - self._get_from_list('Chunk.polarizationAxis', index)) - aug_naxis = self._get_naxis('polarization', index) - if aug_naxis is not None: - if chunk.polarization: - chunk.polarization.naxis = aug_naxis - else: - chunk.polarization = PolarizationWCS(aug_naxis) - self.logger.debug( - 'Creating PolarizationWCS for {} from blueprint'. - format(self.uri)) - - self.logger.debug('End augmentation with blueprint for polarization.') + def _try_range(self, wcs, index, lookup): + self.logger.debug(f'Try to set the range for {lookup}') + aug_range_start = self._two_param_constructor( + f'Chunk.{lookup}.axis.range.start.pix', + f'Chunk.{lookup}.axis.range.start.val', + index, _to_float, RefCoord) + aug_range_end = self._two_param_constructor( + f'Chunk.{lookup}.axis.range.end.pix', + f'Chunk.{lookup}.axis.range.end.val', + index, _to_float, RefCoord) + if aug_range_start and aug_range_end: + wcs.axis.range = CoordRange1D(aug_range_start, aug_range_end) + self.logger.debug(f'Completed setting range for {lookup}') - def _try_observable_with_blueprint(self, chunk, index): - """ - A mechanism to augment the Observable WCS completely from the - blueprint. Do nothing if the WCS information cannot be correctly - created. + def _try_range_with_blueprint(self, chunk, index): + """Use the blueprint to set elements and attributes that + are not in the scope of astropy and fits, and therefore are not + covered by the FitsWcsParser class. Per PD 19/04/18, bounds and + range are not covered by WCS keywords.""" - :param chunk: The chunk to modify with the addition of observable - information. - :param index: The index in the blueprint for looking up plan - information. - """ - self.logger.debug('Begin augmentation with blueprint for ' - 'observable.') - chunk.observable_axis = _to_int( - self._get_from_list('Chunk.observableAxis', index)) - aug_axis = self._two_param_constructor( - 'Chunk.observable.dependent.axis.ctype', - 'Chunk.observable.dependent.axis.cunit', index, _to_str, Axis) - aug_bin = _to_int( - self._get_from_list('Chunk.observable.dependent.bin', index)) - if aug_axis is not None and aug_bin is not None: - chunk.observable = ObservableAxis(Slice(aug_axis, aug_bin)) - self.logger.debug('End augmentation with blueprint for polarization.') + for i in ['energy', 'time', 'polarization']: + axis_configed = getattr(self.blueprint, + f'_{i}_axis_configed') + if axis_configed: + wcs = getattr(chunk, i) + if wcs is not None and wcs.axis is not None: + if wcs.axis.range is None: + self._try_range(wcs, index, i) + self._try_position_range(chunk, index) - def _try_energy_with_blueprint(self, chunk, index): + def _try_time_with_blueprint(self, chunk, index): """ - A mechanism to augment the Energy WCS completely from the blueprint. + A mechanism to augment the Time WCS completely from the blueprint. Do nothing if the WCS information cannot be correctly created. - :param chunk: The chunk to modify with the addition of energy + :param chunk: The chunk to modify with the addition of time information. :param index: The index in the blueprint for looking up plan information. """ - self.logger.debug('Begin augmentation with blueprint for energy.') - aug_naxis = self._get_naxis('energy', index) + self.logger.debug('Begin augmentation with blueprint for temporal.') - specsys = _to_str(self._get_from_list('Chunk.energy.specsys', index)) - if aug_naxis is None: - self.logger.debug('No blueprint energy information.') - else: - if not chunk.energy: - chunk.energy = SpectralWCS(aug_naxis, specsys) + chunk.time_axis = self._get_from_list('Chunk.energyAxis', index) + + aug_naxis = self._get_naxis('time', index) + if aug_naxis is not None: + if chunk.time: + chunk.time.naxis = aug_naxis else: - chunk.energy.naxis = aug_naxis - chunk.energy.specsys = specsys + chunk.time = TemporalWCS(aug_naxis) + self.logger.debug('Creating TemporalWCS for {} from blueprint'. + format(self.uri)) + if chunk.time is not None: + chunk.time.exposure = _to_float( + self._get_from_list('Chunk.time.exposure', index)) + chunk.time.resolution = _to_float( + self._get_from_list('Chunk.time.resolution', index)) + chunk.time.timesys = _to_str( + self._get_from_list('Chunk.time.timesys', index)) + chunk.time.trefpos = self._get_from_list('Chunk.time.trefpos', + index) + chunk.time.mjdref = self._get_from_list('Chunk.time.mjdref', index) - if chunk.energy is not None: - chunk.energy.ssysobs = self._get_from_list( - 'Chunk.energy.ssysobs', index) - chunk.energy.restfrq = self._get_from_list( - 'Chunk.energy.restfrq', index) - chunk.energy.restwav = self._get_from_list( - 'Chunk.energy.restwav', index) - chunk.energy.velosys = self._get_from_list( - 'Chunk.energy.velosys', index) - chunk.energy.zsource = self._get_from_list( - 'Chunk.energy.zsource', index) - chunk.energy.ssyssrc = self._get_from_list( - 'Chunk.energy.ssyssrc', index) - chunk.energy.velang = self._get_from_list( - 'Chunk.energy.velang', index) - chunk.energy.bandpass_name = self._get_from_list( - 'Chunk.energy.bandpassName', index) - chunk.energy.transition = self._get_from_list( - 'Chunk.energy.transition', index) - chunk.energy.resolving_power = _to_float(self._get_from_list( - 'Chunk.energy.resolvingPower', index)) - self.logger.debug('End augmentation with blueprint for energy.') + self.logger.debug('End augmentation with blueprint for temporal.') def _two_param_constructor(self, lookup1, lookup2, index, to_type, ctor): """ @@ -2312,144 +2794,127 @@ def _two_param_constructor(self, lookup1, lookup2, index, to_type, ctor): new_object = ctor(param1, param2) return new_object - def _get_naxis(self, label, index): - """Helper function to construct a CoordAxis1D instance, with all - it's members, from the blueprint. + @staticmethod + def _add_keywords(keywords, current, to_set): + """ + Common code for adding keywords to a CAOM2 entity, capturing all + the weird metadata cases that happen at CADC. - :param label: axis name - must be one of 'energy', 'time', or - 'polarization', as it's used for the blueprint lookup. - :param index: which blueprint index to find a value in - :return an instance of CoordAxis1D + :param keywords: Keywords to add to a CAOM2 set. + :param current: Existing CAOM2 entity with a keywords attribute. + :param to_set: A CAOM2 entity with a keywords attribute. """ - self.logger.debug( - f'Begin {label} naxis construction from blueprint.') + if keywords: + if isinstance(keywords, set): + to_set.keywords.update(keywords) + else: + for k in keywords.split(): + to_set.keywords.add(k) + else: + if current is not None: + # preserve the original value + to_set.keywords.update(current.keywords) + if to_set.keywords is not None and None in to_set.keywords: + to_set.keywords.remove(None) + if to_set.keywords is not None and 'none' in to_set.keywords: + to_set.keywords.remove('none') - aug_axis_ctype = self._get_from_list( - f'Chunk.{label}.axis.axis.ctype', index) - aug_axis_cunit = self._get_from_list( - f'Chunk.{label}.axis.axis.cunit', index) - aug_axis = None - if aug_axis_ctype is not None: - aug_axis = Axis(aug_axis_ctype, aug_axis_cunit) - self.logger.debug( - 'Creating polarization Axis for {} from blueprint'. - format(self.uri)) - aug_error = self._two_param_constructor( - f'Chunk.{label}.axis.error.syser', - f'Chunk.{label}.axis.error.rnder', - index, _to_float, CoordError) - aug_ref_coord = self._two_param_constructor( - f'Chunk.{label}.axis.function.refCoord.pix', - f'Chunk.{label}.axis.function.refCoord.val', - index, _to_float, RefCoord) - aug_delta = _to_float( - self._get_from_list(f'Chunk.{label}.axis.function.delta', - index)) - aug_length = _to_int( - self._get_from_list(f'Chunk.{label}.axis.function.naxis', - index)) +class FitsParser(BlueprintParser): + """ + Parses a FITS file and extracts the CAOM2 related information which can + be used to augment an existing CAOM2 observation, plane or artifact. The + constructor takes either a FITS file as argument or a list of dictionaries + (FITS keyword=value) corresponding to each extension. - aug_function = None - if (aug_length is not None and aug_delta is not None and - aug_ref_coord is not None): - aug_function = \ - CoordFunction1D(aug_length, aug_delta, aug_ref_coord) - self.logger.debug( - 'Creating {} function for {} from blueprint'. - format(label, self.uri)) + The WCS-related keywords of the FITS file are consumed by the astropy.wcs + package which might display warnings with regards to compliance. - aug_naxis = None - if aug_axis is not None and aug_function is not None: - aug_naxis = CoordAxis1D(aug_axis, aug_error, None, None, - aug_function) - self.logger.debug( - 'Creating {} CoordAxis1D for {} from blueprint'. - format(label, self.uri)) - self.logger.debug( - f'End {label} naxis construction from blueprint.') - return aug_naxis + Example 1: + parser = FitsParser(input = '/staging/700000o.fits.gz') + ... + # customize parser.headers by deleting, changing or adding attributes - def augment_observation(self, observation, artifact_uri, product_id=None): - """ - Augments a given observation with available FITS information. - :param observation: existing CAOM2 observation to be augmented. - :param artifact_uri: the key for finding the artifact to augment - :param product_id: the key for finding for the plane to augment - """ - super().augment_observation(observation, artifact_uri, product_id) - self.logger.debug( - 'Begin observation augmentation for URI {}.'.format( - artifact_uri)) - members = self._get_members(observation) - if members: - if isinstance(members, TypedSet): - for m in members: - observation.members.add(m) - else: - for m in members.split(): - observation.members.add(ObservationURI(m)) - observation.algorithm = self._get_algorithm(observation) + obs = Observation(collection='TEST', observation_id='700000', + algorithm='exposure') + plane = Plane(plane_id='700000-1') + obs.plane.add(plane) - observation.sequence_number = _to_int(self._get_from_list( - 'Observation.sequenceNumber', index=0)) - observation.intent = self._get_from_list( - 'Observation.intent', 0, (ObservationIntentType.SCIENCE if - observation.intent is None else - observation.intent)) - observation.type = self._get_from_list('Observation.type', 0, - current=observation.type) - observation.meta_release = self._get_datetime( - self._get_from_list('Observation.metaRelease', 0, - current=observation.meta_release)) - observation.meta_read_groups = self._get_from_list( - 'Observation.metaReadGroups', 0) - observation.meta_producer = self._get_from_list( - 'Observation.metaProducer', 0, current=observation.meta_producer) - observation.requirements = self._get_requirements( - observation.requirements) - observation.instrument = self._get_instrument(observation.instrument) - observation.proposal = self._get_proposal(observation.proposal) - observation.target = self._get_target(observation.target) - observation.target_position = self._get_target_position( - observation.target_position) - observation.telescope = self._get_telescope(observation.telescope) - observation.environment = self._get_environment( - observation.environment) - self.logger.debug( - f'End observation augmentation for {artifact_uri}.') + artifact = Artifact(uri='ad:CFHT/700000o.fits.gz', product_type='science', + release_type='data') + plane.artifacts.add(artifact) - def augment_plane(self, plane, artifact_uri): + parser.augment_observation(obs) + + # further update obs + + + Example 2: + + headers = [] # list of dictionaries headers + # populate headers + parser = FitsParser(input=headers) + + parser.augment_observation(obs) + ... + + """ + + def __init__(self, src, obs_blueprint=None, uri=None): """ - Augments a given plane with available FITS information. - :param plane: existing CAOM2 plane to be augmented. - :param artifact_uri: + Ctor + :param src: List of headers (dictionary of FITS keywords:value) with + one header for each extension or a FITS input file. + :param obs_blueprint: externally provided blueprint + :param uri: which artifact augmentation is based on """ - super().augment_plane(plane, artifact_uri) - self.logger.debug( - f'Begin plane augmentation for {artifact_uri}.') - - plane.meta_release = self._get_datetime(self._get_from_list( - 'Plane.metaRelease', index=0, current=plane.meta_release)) - plane.data_release = self._get_datetime(self._get_from_list( - 'Plane.dataRelease', index=0)) - plane.data_product_type = self._to_data_product_type( - self._get_from_list('Plane.dataProductType', index=0, - current=plane.data_product_type)) - plane.calibration_level = self._to_calibration_level(_to_int_32( - self._get_from_list('Plane.calibrationLevel', index=0, - current=plane.calibration_level))) - plane.meta_producer = self._get_from_list( - 'Plane.metaProducer', index=0, current=plane.meta_producer) - plane.observable = self._get_observable(current=plane.observable) - plane.provenance = self._get_provenance(plane.provenance) - plane.metrics = self._get_metrics(current=plane.metrics) - plane.quality = self._get_quality(current=plane.quality) + self.logger = logging.getLogger(__name__) + self._headers = [] + self.parts = 0 + self.file = '' + if isinstance(src, list): + # assume this is the list of headers + self._headers = src + else: + # assume file + self.file = src + self._headers = data_util.get_local_headers_from_fits(self.file) + if obs_blueprint: + self._blueprint = obs_blueprint + else: + self._blueprint = ObsBlueprint() + self._errors = [] + # for command-line parameter to module execution + self.uri = uri + self.apply_blueprint() - self.logger.debug( - f'End plane augmentation for {artifact_uri}.') + @property + def headers(self): + """ + List of headers where each header should allow dictionary like + access to the FITS attribute in that header + :return: + """ + return self._headers + + def ignore_chunks(self, artifact, i, ii): + # there is one Part per extension, the name is the extension number + if ( + FitsParser._has_data_array(self._headers[i]) + and self.blueprint.has_chunk(i) + ): + if ii not in artifact.parts.keys(): + # TODO use extension name? + artifact.parts.add(Part(ii)) + self.logger.debug(f'Part created for HDU {ii}.') + result = False + else: + artifact.parts.add(Part(ii)) + self.logger.debug(f'Create empty part for HDU {ii}') + result = True + return result - def apply_blueprint_to_fits(self): + def apply_blueprint(self): # pointers that are short to type exts = self.blueprint._extensions @@ -2535,337 +3000,49 @@ def apply_blueprint_to_fits(self): # apply a default if a value does not already # exist, and all possible values of # keywords have been checked - _set_by_type(header, keyword.strip(), value[1]) - logging.debug( - '{}: set default value of {} in HDU {}.'. - format(keyword, value[1], index)) - - # TODO wcs in astropy ignores cdelt attributes when it finds a cd - # attribute even if it's in a different axis - for header in self.headers: - cd_present = False - for i in range(1, 6): - if 'CD{0}_{0}'.format(i) in header: - cd_present = True - break - if cd_present: - for i in range(1, 6): - if f'CDELT{i}' in header and \ - 'CD{0}_{0}'.format(i) not in header: - header['CD{0}_{0}'.format(i)] = \ - header[f'CDELT{i}'] - - # TODO When a projection is specified, wcslib expects corresponding - # DP arguments with NAXES attributes. Normally, omitting the attribute - # signals no distortion which is the assumption in fits2caom2 for - # energy and polarization axes. Following is a workaround for - # SIP projections. - # For more details see: - # http://www.atnf.csiro.au/people/mcalabre/WCS/dcs_20040422.pdf - for header in self.headers: - sip = False - for i in range(1, 6): - if ((f'CTYPE{i}' in header) and - isinstance(header[f'CTYPE{i}'], str) and - ('-SIP' in header[f'CTYPE{i}'])): - sip = True - break - if sip: - for i in range(1, 6): - if (f'CTYPE{i}' in header) and \ - ('-SIP' not in header[f'CTYPE{i}']) and \ - (f'DP{i}' not in header): - header[f'DP{i}'] = 'NAXES: 1' - - return - - def _get_members(self, obs): - """ - Returns the members of a derived observation (if specified) - :param obs: observation to augment - :return: members value - """ - members = None - self.logger.debug('Begin Members augmentation.') - if (isinstance(obs, SimpleObservation) and - (self.blueprint._get('DerivedObservation.members') or - self.blueprint._get('CompositeObservation.members'))): - raise TypeError( - 'Cannot apply blueprint for DerivedObservation to a ' - 'simple observation') - elif isinstance(obs, DerivedObservation): - lookup = self.blueprint._get('DerivedObservation.members', - extension=1) - if ObsBlueprint.is_table(lookup) and len(self.headers) > 1: - member_list = self._get_from_table( - 'DerivedObservation.members', 1) - # ensure the members are good little ObservationURIs - if member_list.startswith('caom:'): - members = member_list - else: - members = ' '.join(['caom:{}/{}'.format( - obs.collection, i) if not i.startswith('caom') else i - for i in member_list.split()]) - else: - if obs.members is None: - members = self._get_from_list( - 'DerivedObservation.members', index=0) - else: - members = self._get_from_list( - 'DerivedObservation.members', index=0, - current=obs.members) - elif isinstance(obs, CompositeObservation): - lookup = self.blueprint._get('CompositeObservation.members', - extension=1) - if ObsBlueprint.is_table(lookup) and len(self.headers) > 1: - member_list = self._get_from_table( - 'CompositeObservation.members', 1) - # ensure the members are good little ObservationURIs - if member_list.startswith('caom:'): - members = member_list - else: - members = ' '.join(['caom:{}/{}'.format( - obs.collection, i) if not i.startswith('caom') else i - for i in member_list.split()]) - else: - if obs.members is None: - members = self._get_from_list( - 'CompositeObservation.members', index=0) - else: - members = self._get_from_list( - 'CompositeObservation.members', index=0, - current=obs.members) - self.logger.debug('End Members augmentation.') - return members - - def _get_algorithm(self, obs): - """ - Create an Algorithm instance populated with available FITS information. - :return: Algorithm - """ - self.logger.debug('Begin Algorithm augmentation.') - # TODO DEFAULT VALUE - name = self._get_from_list('Observation.algorithm.name', index=0, - current=obs.algorithm.name) - result = Algorithm(str(name)) if name else None - self.logger.debug('End Algorithm augmentation.') - return result - - def _get_energy_transition(self, current): - """ - Create an EnergyTransition instance populated with available FITS - information. - :return: EnergyTransition - """ - self.logger.debug('Begin EnergyTransition augmentation.') - species = self._get_from_list( - 'Chunk.energy.transition.species', index=0, - current=None if current is None else current.species) - transition = self._get_from_list( - 'Chunk.energy.transition.transition', index=0, - current=None if current is None else current.transition) - result = None - if species is not None and transition is not None: - result = EnergyTransition(species, transition) - self.logger.debug('End EnergyTransition augmentation.') - return result - - def _get_instrument(self, current): - """ - Create an Instrument instance populated with available FITS - information. - :return: Instrument - """ - self.logger.debug('Begin Instrument augmentation.') - name = self._get_from_list( - 'Observation.instrument.name', index=0, - current=None if current is None else current.name) - keywords = self._get_set_from_list( - 'Observation.instrument.keywords', index=0) - instr = None - if name: - instr = Instrument(str(name)) - FitsParser._add_keywords(keywords, current, instr) - self.logger.debug('End Instrument augmentation.') - return instr - - def _get_proposal(self, current): - """ - Create a Proposal instance populated with available FITS information. - :return: Proposal - """ - self.logger.debug('Begin Proposal augmentation.') - prop_id = self._get_from_list( - 'Observation.proposal.id', index=0, - current=None if current is None else current.id) - pi = self._get_from_list( - 'Observation.proposal.pi', index=0, - current=None if current is None else current.pi_name) - project = self._get_from_list( - 'Observation.proposal.project', index=0, - current=None if current is None else current.project) - title = self._get_from_list( - 'Observation.proposal.title', index=0, - current=None if current is None else current.title) - keywords = self._get_set_from_list( - 'Observation.proposal.keywords', index=0) - proposal = current - if prop_id: - proposal = Proposal(str(prop_id), pi, project, title) - FitsParser._add_keywords(keywords, current, proposal) - self.logger.debug(f'End Proposal augmentation {prop_id}.') - return proposal - - def _get_target(self, current): - """ - Create a Target instance populated with available FITS information. - :return: Target - """ - self.logger.debug('Begin Target augmentation.') - name = self._get_from_list( - 'Observation.target.name', index=0, - current=None if current is None else current.name) - target_type = self._get_from_list( - 'Observation.target.type', index=0, - current=None if current is None else current.target_type) - standard = self._cast_as_bool(self._get_from_list( - 'Observation.target.standard', index=0, - current=None if current is None else current.standard)) - redshift = self._get_from_list( - 'Observation.target.redshift', index=0, - current=None if current is None else current.redshift) - keywords = self._get_set_from_list( - 'Observation.target.keywords', index=0) - moving = self._cast_as_bool( - self._get_from_list( - 'Observation.target.moving', index=0, - current=None if current is None else current.moving)) - target_id = self._get_from_list( - 'Observation.target.targetID', index=0, - current=None if current is None else current.target_id) - target = None - if name: - target = Target(str(name), target_type, standard, redshift, - moving=moving, target_id=target_id) - FitsParser._add_keywords(keywords, current, target) - self.logger.debug('End Target augmentation.') - return target - - def _get_target_position(self, current): - """ - Create a Target Position instance populated with available FITS - information. - :return: Target Position - """ - self.logger.debug('Begin CAOM2 TargetPosition augmentation.') - x = self._get_from_list( - 'Observation.target_position.point.cval1', index=0, - current=None if current is None else current.coordinates.cval1) - y = self._get_from_list( - 'Observation.target_position.point.cval2', index=0, - current=None if current is None else current.coordinates.cval2) - coordsys = self._get_from_list( - 'Observation.target_position.coordsys', index=0, - current=None if current is None else current.coordsys) - equinox = self._get_from_list( - 'Observation.target_position.equinox', index=0, - current=None if current is None else current.equinox) - aug_target_position = None - if x and y: - aug_point = Point(x, y) - aug_target_position = TargetPosition(aug_point, coordsys) - aug_target_position.equinox = _to_float(equinox) - self.logger.debug('End CAOM2 TargetPosition augmentation.') - return aug_target_position + _set_by_type(header, keyword.strip(), value[1]) + logging.debug( + '{}: set default value of {} in HDU {}.'. + format(keyword, value[1], index)) - def _get_telescope(self, current): - """ - Create a Telescope instance populated with available FITS information. - :return: Telescope - """ - self.logger.debug('Begin Telescope augmentation.') - name = self._get_from_list( - 'Observation.telescope.name', index=0, - current=None if current is None else current.name) - geo_x = _to_float( - self._get_from_list( - 'Observation.telescope.geoLocationX', index=0, - current=None if current is None else current.geo_location_x)) - geo_y = _to_float( - self._get_from_list( - 'Observation.telescope.geoLocationY', index=0, - current=None if current is None else current.geo_location_y)) - geo_z = _to_float( - self._get_from_list( - 'Observation.telescope.geoLocationZ', index=0, - current=None if current is None else current.geo_location_z)) - keywords = self._get_set_from_list( - 'Observation.telescope.keywords', index=0) - aug_tel = None - if name: - aug_tel = Telescope(str(name), geo_x, geo_y, geo_z) - FitsParser._add_keywords(keywords, current, aug_tel) - self.logger.debug('End Telescope augmentation.') - return aug_tel + # TODO wcs in astropy ignores cdelt attributes when it finds a cd + # attribute even if it's in a different axis + for header in self.headers: + cd_present = False + for i in range(1, 6): + if 'CD{0}_{0}'.format(i) in header: + cd_present = True + break + if cd_present: + for i in range(1, 6): + if f'CDELT{i}' in header and \ + 'CD{0}_{0}'.format(i) not in header: + header['CD{0}_{0}'.format(i)] = \ + header[f'CDELT{i}'] - def _get_environment(self, current): - """ - Create an Environment instance populated with available FITS - information. - :current Environment instance, if one already exists in the - Observation - :return: Environment - """ - self.logger.debug('Begin Environment augmentation.') - seeing = self._get_from_list( - 'Observation.environment.seeing', index=0, - current=None if current is None else current.seeing) - humidity = _to_float( - self._get_from_list( - 'Observation.environment.humidity', index=0, - current=None if current is None else current.humidity)) - elevation = self._get_from_list( - 'Observation.environment.elevation', index=0, - current=None if current is None else current.elevation) - tau = self._get_from_list( - 'Observation.environment.tau', index=0, - current=None if current is None else current.tau) - wavelength_tau = self._get_from_list( - 'Observation.environment.wavelengthTau', index=0, - current=None if current is None else current.wavelength_tau) - ambient = _to_float( - self._get_from_list( - 'Observation.environment.ambientTemp', index=0, - current=None if current is None else current.ambient_temp)) - photometric = self._cast_as_bool(self._get_from_list( - 'Observation.environment.photometric', index=0, - current=None if current is None else current.photometric)) - enviro = None - if seeing or humidity or elevation or tau or wavelength_tau or ambient: - enviro = Environment() - enviro.seeing = seeing - enviro.humidity = humidity - enviro.elevation = elevation - enviro.tau = tau - enviro.wavelength_tau = wavelength_tau - enviro.ambient_temp = ambient - enviro.photometric = photometric - self.logger.debug('End Environment augmentation.') - return enviro + # TODO When a projection is specified, wcslib expects corresponding + # DP arguments with NAXES attributes. Normally, omitting the attribute + # signals no distortion which is the assumption in fits2caom2 for + # energy and polarization axes. Following is a workaround for + # SIP projections. + # For more details see: + # http://www.atnf.csiro.au/people/mcalabre/WCS/dcs_20040422.pdf + for header in self.headers: + sip = False + for i in range(1, 6): + if ((f'CTYPE{i}' in header) and + isinstance(header[f'CTYPE{i}'], str) and + ('-SIP' in header[f'CTYPE{i}'])): + sip = True + break + if sip: + for i in range(1, 6): + if (f'CTYPE{i}' in header) and \ + ('-SIP' not in header[f'CTYPE{i}']) and \ + (f'DP{i}' not in header): + header[f'DP{i}'] = 'NAXES: 1' - def _get_requirements(self, current): - """ - Create a Requirements instance populated with available FITS - information. - :return: Requirements - """ - self.logger.debug('Begin Requirement augmentation.') - flag = self._get_from_list( - 'Observation.requirements.flag', index=0, - current=None if current is None else current.flag) - reqts = Requirements(flag) if flag else None - self.logger.debug('End Requirement augmentation.') - return reqts + return def _get_from_list(self, lookup, index, current=None): value = None @@ -2959,182 +3136,47 @@ def _get_from_table(self, lookup, extension): with fits.open(self.file) as fits_data: if fits_data[extension].header['XTENSION'] != 'BINTABLE': raise ValueError( - 'Got {} when looking for a BINTABLE ' - 'extension.'.format( - fits_data[extension].header['XTENSION'])) - for ii in keywords[1]: - for jj in fits_data[extension].data[keywords[2]][ii]: - value = f'{jj} {value}' - - self.logger.debug(f'{lookup}: value is {value}') - return value - - def _get_set_from_list(self, lookup, index): - value = None - keywords = None - try: - keywords = self.blueprint._get(lookup) - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - self.logger.debug( - 'Could not find \'{}\' in fits2caom2 configuration.'.format( - lookup)) - - if isinstance(keywords, tuple): - for ii in keywords[0]: - try: - value = self.headers[index].get(ii) - break - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - if keywords[1]: - value = keywords[1] - self.logger.debug( - '{}: assigned default value {}.'.format(lookup, - value)) - elif keywords: - value = keywords - self.logger.debug(f'{lookup}: assigned value {value}.') - - return value - - def _get_provenance(self, current): - """ - Create a Provenance instance populated with available FITS information. - :return: Provenance - """ - self.logger.debug('Begin Provenance augmentation.') - name = _to_str( - self._get_from_list( - 'Plane.provenance.name', index=0, - current=None if current is None else current.name)) - p_version = _to_str(self._get_from_list( - 'Plane.provenance.version', index=0, - current=None if current is None else current.version)) - project = _to_str( - self._get_from_list( - 'Plane.provenance.project', index=0, - current=None if current is None else current.project)) - producer = _to_str( - self._get_from_list( - 'Plane.provenance.producer', index=0, - current=None if current is None else current.producer)) - run_id = _to_str( - self._get_from_list( - 'Plane.provenance.runID', index=0, - current=None if current is None else current.run_id)) - reference = _to_str( - self._get_from_list( - 'Plane.provenance.reference', index=0, - current=None if current is None else current.reference)) - last_executed = self._get_datetime( - self._get_from_list( - 'Plane.provenance.lastExecuted', index=0, - current=None if current is None else current.last_executed)) - keywords = self._get_set_from_list( - 'Plane.provenance.keywords', index=0) - inputs = self._get_set_from_list('Plane.provenance.inputs', index=0) - prov = None - if name: - prov = Provenance(name, p_version, project, producer, run_id, - reference, last_executed) - FitsParser._add_keywords(keywords, current, prov) - if inputs: - if isinstance(inputs, TypedSet): - for i in inputs: - prov.inputs.add(i) - else: - for i in inputs.split(): - prov.inputs.add(PlaneURI(str(i))) - else: - if current is not None and len(current.inputs) > 0: - # preserve the original value - prov.inputs.update(current.inputs) - self.logger.debug('End Provenance augmentation.') - return prov - - def _get_metrics(self, current): - """ - Create a Metrics instance populated with available FITS information. - :return: Metrics - """ - self.logger.debug('Begin Metrics augmentation.') - source_number_density = self._get_from_list( - 'Plane.metrics.sourceNumberDensity', index=0, - current=None if current is None else current.source_number_density) - background = self._get_from_list( - 'Plane.metrics.background', index=0, - current=None if current is None else current.background) - background_stddev = self._get_from_list( - 'Plane.metrics.backgroundStddev', index=0, - current=None if current is None else current.background_std_dev) - flux_density_limit = self._get_from_list( - 'Plane.metrics.fluxDensityLimit', index=0, - current=None if current is None else current.flux_density_limit) - mag_limit = self._get_from_list( - 'Plane.metrics.magLimit', index=0, - current=None if current is None else current.mag_limit) - sample_snr = self._get_from_list( - 'Plane.metrics.sampleSNR', index=0, - current=None if current is None else current.sample_snr) - - metrics = None - if (source_number_density or background or background_stddev or - flux_density_limit or mag_limit or sample_snr): - metrics = Metrics() - metrics.source_number_density = source_number_density - metrics.background = background - metrics.background_std_dev = background_stddev - metrics.flux_density_limit = flux_density_limit - metrics.mag_limit = mag_limit - metrics.sample_snr = sample_snr - self.logger.debug('End Metrics augmentation.') - return metrics + 'Got {} when looking for a BINTABLE ' + 'extension.'.format( + fits_data[extension].header['XTENSION'])) + for ii in keywords[1]: + for jj in fits_data[extension].data[keywords[2]][ii]: + value = f'{jj} {value}' - def _get_quality(self, current): - """ - Create a Quality instance populated with available FITS information. - :return: Quality - """ - self.logger.debug('Begin Quality augmentation.') - flag = self._get_from_list( - 'Plane.dataQuality', index=0, - current=None if current is None else current.flag) - quality = DataQuality(flag) if flag else None - self.logger.debug('End Quality augmentation.') - return quality + self.logger.debug(f'{lookup}: value is {value}') + return value - def _get_observable(self, current): - """ - Create a Observable instance populated with available FITS information. - :return: Observable - """ - self.logger.debug('Begin Observable augmentation.') - ucd = self._get_from_list( - 'Plane.observable.ucd', index=0, - current=None if current is None else current.ucd) - observable = Observable(ucd) if ucd else None - self.logger.debug('End Observable augmentation.') - return observable + def _get_set_from_list(self, lookup, index): + value = None + keywords = None + try: + keywords = self.blueprint._get(lookup) + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + self.logger.debug( + 'Could not find \'{}\' in fits2caom2 configuration.'.format( + lookup)) - def _cast_as_bool(self, from_value): - """ - Make lower case Java booleans into capitalized python booleans. - :param from_value: Something that represents a boolean value - :return: a python boolean value - """ - if isinstance(from_value, bool): - return from_value - result = None - # so far, these are the only options that are coming in from the - # config files - may need to add more as more types are experienced - if from_value == 'false': - result = False - elif from_value == 'true': - result = True - return result + if isinstance(keywords, tuple): + for ii in keywords[0]: + try: + value = self.headers[index].get(ii) + break + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + if keywords[1]: + value = keywords[1] + self.logger.debug( + '{}: assigned default value {}.'.format(lookup, + value)) + elif keywords: + value = keywords + self.logger.debug(f'{lookup}: assigned value {value}.') + + return value - def _has_data_array(self, header): + @staticmethod + def _has_data_array(header): """ :param header: @@ -3167,47 +3209,13 @@ def _has_data_array(self, header): return False return True - @staticmethod - def _add_keywords(keywords, current, to_set): - """ - Common code for adding keywords to a CAOM2 entity, capturing all - the weird metadata cases that happen at CADC. - - :param keywords: Keywords to add to a CAOM2 set. - :param current: Existing CAOM2 entity with a keywords attribute. - :param to_set: A CAOM2 entity with a keywords attribute. - """ - if keywords: - if isinstance(keywords, set): - to_set.keywords.update(keywords) - else: - for k in keywords.split(): - to_set.keywords.add(k) - else: - if current is not None: - # preserve the original value - to_set.keywords.update(current.keywords) - if to_set.keywords is not None and None in to_set.keywords: - to_set.keywords.remove(None) - if to_set.keywords is not None and 'none' in to_set.keywords: - to_set.keywords.remove('none') - class WcsParser: - """ - Parser to augment chunks with positional, temporal, energy and polarization - information based on the WCS keywords in an extension of a FITS header. - - Note: Under the hood, this class uses the astropy.wcs package to parse the - header and any inconsistencies or missing keywords are reported back as - warnings. - """ - ENERGY_AXIS = 'energy' POLARIZATION_AXIS = 'polarization' TIME_AXIS = 'time' - def __init__(self, header, file, extension): + def __init__(self): """ :param header: FITS extension header @@ -3217,20 +3225,7 @@ def __init__(self, header, file, extension): """ # add the HDU extension to logging messages from this class - self.logger = logging.getLogger(__name__ + '.WcsParser') - self.log_filter = HDULoggingFilter() - self.log_filter.extension(extension) - self.logger.addFilter(self.log_filter) - logastro = logging.getLogger('astropy') - logastro.addFilter(self.log_filter) - logastro.propagate = False - header_string = header.tostring().rstrip() - header_string = header_string.replace('END' + ' ' * 77, '') - self.wcs = Wcsprm(header_string.encode('ascii')) - self.wcs.fix() - self.header = header - self.file = file - self.extension = extension + self.logger = logging.getLogger(self.__class__.__name__) def augment_custom(self, chunk): """ @@ -3456,6 +3451,89 @@ def augment_observable(self, chunk): Slice(self._get_axis(0, ctype, cunit), pix_bin)) self.logger.debug('End Observable WCS augmentation.') + def _get_cd(self, x_index, y_index): + """ returns cd info""" + + try: + if self.wcs.has_cd(): + cd11 = self.wcs.cd[x_index][x_index] + cd12 = self.wcs.cd[x_index][y_index] + cd21 = self.wcs.cd[y_index][x_index] + cd22 = self.wcs.cd[y_index][y_index] + else: + cd11 = self.wcs.cdelt[x_index] + cd12 = self.wcs.crota[x_index] + cd21 = self.wcs.crota[y_index] + cd22 = self.wcs.cdelt[y_index] + except AttributeError: + self.logger.debug( + f'Error searching for CD* values {sys.exc_info()[1]}') + cd11 = None + cd12 = None + cd21 = None + cd22 = None + + return cd11, cd12, cd21, cd22 + + def _get_coord_error(self, index): + aug_coord_error = None + aug_csyer = self._sanitize(self.wcs.csyer[index]) + aug_crder = self._sanitize(self.wcs.crder[index]) + if aug_csyer is not None and aug_crder is not None: + aug_coord_error = CoordError(aug_csyer, aug_crder) + return aug_coord_error + + def _get_dimension(self, xindex, yindex): + aug_dimension = None + aug_dim1 = _to_int(self._get_axis_length(xindex + 1)) + aug_dim2 = _to_int(self._get_axis_length(yindex + 1)) + if aug_dim1 and aug_dim2: + aug_dimension = Dimension2D(aug_dim1, aug_dim2) + self.logger.debug('End 2D dimension augmentation.') + return aug_dimension + + def _get_ref_coord(self, index): + aug_crpix = _to_float(self._sanitize(self.wcs.crpix[index])) + aug_crval = _to_float(self._sanitize(self.wcs.crval[index])) + aug_ref_coord = None + if aug_crpix is not None and aug_crval is not None: + aug_ref_coord = RefCoord(aug_crpix, aug_crval) + return aug_ref_coord + + +class FitsWcsParser(WcsParser): + """ + Parser to augment chunks with positional, temporal, energy and polarization + information based on the WCS keywords in an extension of a FITS header. + + Note: Under the hood, this class uses the astropy.wcs package to parse the + header and any inconsistencies or missing keywords are reported back as + warnings. + """ + + def __init__(self, header, file, extension): + """ + + :param header: FITS extension header + :param file: name of FITS file + :param extension: which HDU + WCS axes methods of this class. + """ + super().__init__() + self.log_filter = HDULoggingFilter() + self.log_filter.extension(extension) + self.logger.addFilter(self.log_filter) + logastro = logging.getLogger('astropy') + logastro.addFilter(self.log_filter) + logastro.propagate = False + header_string = header.tostring().rstrip() + header_string = header_string.replace('END' + ' ' * 77, '') + self.wcs = Wcsprm(header_string.encode('ascii')) + self.wcs.fix() + self.header = header + self.file = file + self.extension = extension + def _get_axis_index(self, keywords): """ Return the index of a specific axis type or None of it doesn't exist @@ -3518,47 +3596,6 @@ def _get_spatial_axis(self, xindex, yindex): self.logger.debug('End CoordAxis2D augmentation.') return aug_axis - def _get_cd(self, x_index, y_index): - """ returns cd info""" - - try: - if self.wcs.has_cd(): - cd11 = self.wcs.cd[x_index][x_index] - cd12 = self.wcs.cd[x_index][y_index] - cd21 = self.wcs.cd[y_index][x_index] - cd22 = self.wcs.cd[y_index][y_index] - else: - cd11 = self.wcs.cdelt[x_index] - cd12 = self.wcs.crota[x_index] - cd21 = self.wcs.crota[y_index] - cd22 = self.wcs.cdelt[y_index] - except AttributeError: - self.logger.debug( - f'Error searching for CD* values {sys.exc_info()[1]}') - cd11 = None - cd12 = None - cd21 = None - cd22 = None - - return cd11, cd12, cd21, cd22 - - def _get_coord_error(self, index): - aug_coord_error = None - aug_csyer = self._sanitize(self.wcs.csyer[index]) - aug_crder = self._sanitize(self.wcs.crder[index]) - if aug_csyer is not None and aug_crder is not None: - aug_coord_error = CoordError(aug_csyer, aug_crder) - return aug_coord_error - - def _get_dimension(self, xindex, yindex): - aug_dimension = None - aug_dim1 = _to_int(self._get_axis_length(xindex + 1)) - aug_dim2 = _to_int(self._get_axis_length(yindex + 1)) - if aug_dim1 and aug_dim2: - aug_dimension = Dimension2D(aug_dim1, aug_dim2) - self.logger.debug('End 2D dimension augmentation.') - return aug_dimension - def _get_position_axis(self): # there are two celestial axes, get the applicable indices from # the axis_types @@ -3574,14 +3611,6 @@ def _get_position_axis(self): '{}'. format(xindex, yindex, self.file)) - def _get_ref_coord(self, index): - aug_crpix = _to_float(self._sanitize(self.wcs.crpix[index])) - aug_crval = _to_float(self._sanitize(self.wcs.crval[index])) - aug_ref_coord = None - if aug_crpix is not None and aug_crval is not None: - aug_ref_coord = RefCoord(aug_crpix, aug_crval) - return aug_ref_coord - def _get_axis_length(self, for_axis): # try ZNAXIS first in order to get the size of the original # image in case it was FITS compressed @@ -4098,7 +4127,7 @@ def _set_logging(verbose, debug, quiet): handler = logging.StreamHandler() handler.setFormatter(DispatchingFormatter({ - 'caom2utils.fits2caom2.WcsParser': logging.Formatter( + 'caom2utils.fits2caom2.FitsWcsParser': logging.Formatter( '%(asctime)s:%(levelname)s:%(name)-12s:HDU:%(hdu)-2s:' '%(lineno)d:%(message)s'), 'astropy': logging.Formatter( From 1dcc5fa9620d6500a3ad65509afe42a47a531284 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Tue, 22 Feb 2022 13:42:34 -0800 Subject: [PATCH 02/38] CADC-10809 - interim commit - add the classes WCSParser, FitsWcsParser, BlueprintParser to the inheritance hierarchy. --- caom2utils/caom2utils/tests/test_data_util.py | 2 +- caom2utils/caom2utils/tests/test_fits2caom2.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/caom2utils/caom2utils/tests/test_data_util.py b/caom2utils/caom2utils/tests/test_data_util.py index 886fcbab..1c936285 100644 --- a/caom2utils/caom2utils/tests/test_data_util.py +++ b/caom2utils/caom2utils/tests/test_data_util.py @@ -369,7 +369,7 @@ def _check_put_result(client_mock): replace=True, file_type='application/fits', file_encoding=None, - md5_checksum='3c66ee2cb6e0c2cfb5cd6824d353dc11', + md5_checksum='md5:3c66ee2cb6e0c2cfb5cd6824d353dc11', ) diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index bd33d532..e3063b19 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -71,7 +71,7 @@ from astropy.wcs import WCS as awcs from cadcutils import net from cadcdata import FileInfo -from caom2utils import FitsParser, WcsParser, main_app, update_blueprint +from caom2utils import FitsParser, FitsWcsParser, main_app, update_blueprint from caom2utils import ObsBlueprint, GenericParser, gen_proc from caom2utils import get_gen_proc_arg_parser, augment from caom2utils.legacy import load_config @@ -402,7 +402,7 @@ def test_augment_artifact_time_from_blueprint(): def test_get_wcs_values(): w = get_test_wcs(sample_file_4axes) - test_parser = WcsParser(get_test_header(sample_file_4axes)[0].header, + test_parser = FitsWcsParser(get_test_header(sample_file_4axes)[0].header, sample_file_4axes, 0) result = test_parser._sanitize(w.wcs.equinox) assert result is None @@ -418,7 +418,7 @@ def test_get_wcs_values(): def test_wcs_parser_augment_failures(): - test_parser = WcsParser(get_test_header(sample_file_4axes)[0].header, + test_parser = FitsWcsParser(get_test_header(sample_file_4axes)[0].header, sample_file_4axes, 0) test_obs = SimpleObservation('collection', 'MA1_DRAO-ST', Algorithm('exposure')) From 76e21f4fcc098a0de9ce3da6f9d957254d16f921 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Tue, 22 Feb 2022 13:49:26 -0800 Subject: [PATCH 03/38] CADC-10809 - interim commit - rename the fits2caom2 module to caom2blueprint. --- caom2utils/caom2utils/__init__.py | 2 +- .../{fits2caom2.py => caom2blueprint.py} | 0 caom2utils/caom2utils/legacy.py | 22 +++++++++---------- .../caom2utils/tests/test_collections.py | 10 ++++----- .../caom2utils/tests/test_fits2caom2.py | 20 ++++++++--------- caom2utils/caom2utils/tests/test_si_uris.py | 4 ++-- 6 files changed, 29 insertions(+), 29 deletions(-) rename caom2utils/caom2utils/{fits2caom2.py => caom2blueprint.py} (100%) diff --git a/caom2utils/caom2utils/__init__.py b/caom2utils/caom2utils/__init__.py index 135259fd..e6fdda70 100755 --- a/caom2utils/caom2utils/__init__.py +++ b/caom2utils/caom2utils/__init__.py @@ -5,7 +5,7 @@ """ from .data_util import * # noqa -from .fits2caom2 import * # noqa +from .caom2blueprint import * # noqa from .legacy import * # noqa from .wcs_util import * # noqa from .wcsvalidator import * # noqa diff --git a/caom2utils/caom2utils/fits2caom2.py b/caom2utils/caom2utils/caom2blueprint.py similarity index 100% rename from caom2utils/caom2utils/fits2caom2.py rename to caom2utils/caom2utils/caom2blueprint.py diff --git a/caom2utils/caom2utils/legacy.py b/caom2utils/caom2utils/legacy.py index 3640761c..e347a04e 100755 --- a/caom2utils/caom2utils/legacy.py +++ b/caom2utils/caom2utils/legacy.py @@ -69,7 +69,7 @@ import logging import sys -from . import fits2caom2 +from . import caom2blueprint import traceback APP_NAME = 'fits2caom2' @@ -104,7 +104,7 @@ def __init__(self, blueprint, user_supplied_config): self._inverse_user_supplied_config[v] = [k] def get_caom2_elements(self, lookup): - if lookup in fits2caom2.ObsBlueprint._CAOM2_ELEMENTS: + if lookup in caom2blueprint.ObsBlueprint._CAOM2_ELEMENTS: return [lookup] elif lookup in self._inverse_user_supplied_config.keys(): return self._inverse_user_supplied_config[lookup] @@ -370,17 +370,17 @@ def _update_axis_info(parser, defaults, overrides, config): for key, value in i.items(): if (key.startswith('CTYPE')) and key[-1].isdigit(): value = value.split('-')[0] - if value in fits2caom2.ENERGY_CTYPES: + if value in caom2blueprint.ENERGY_CTYPES: energy_axis = key[-1] - elif value in fits2caom2.POLARIZATION_CTYPES: + elif value in caom2blueprint.POLARIZATION_CTYPES: polarization_axis = key[-1] - elif value in fits2caom2.TIME_KEYWORDS: + elif value in caom2blueprint.TIME_KEYWORDS: time_axis = key[-1] - elif value in fits2caom2.POSITION_CTYPES[0]: + elif value in caom2blueprint.POSITION_CTYPES[0]: ra_axis = key[-1] - elif value in fits2caom2.POSITION_CTYPES[1]: + elif value in caom2blueprint.POSITION_CTYPES[1]: dec_axis = key[-1] - elif value in fits2caom2.OBSERVABLE_CTYPES: + elif value in caom2blueprint.OBSERVABLE_CTYPES: obs_axis = key[-1] else: raise ValueError(f'Unrecognized CTYPE: {value}') @@ -508,7 +508,7 @@ def update_blueprint(obs_blueprint, artifact_uri=None, config=None, def main_app(): - parser = fits2caom2.get_arg_parser() + parser = caom2blueprint.get_arg_parser() # add legacy fits2caom2 arguments parser.add_argument('--config', required=False, @@ -545,7 +545,7 @@ def main_app(): obs_blueprint = {} for i, uri in enumerate(args.fileURI): - obs_blueprint[uri] = fits2caom2.ObsBlueprint() + obs_blueprint[uri] = caom2blueprint.ObsBlueprint() if config: result = update_blueprint(obs_blueprint[uri], uri, config, defaults, overrides) @@ -554,7 +554,7 @@ def main_app(): f'Errors parsing the config files: {result}') try: - fits2caom2.proc(args, obs_blueprint) + caom2blueprint.proc(args, obs_blueprint) except Exception as e: logging.error(e) tb = traceback.format_exc() diff --git a/caom2utils/caom2utils/tests/test_collections.py b/caom2utils/caom2utils/tests/test_collections.py index c89cea4c..24e6bc61 100644 --- a/caom2utils/caom2utils/tests/test_collections.py +++ b/caom2utils/caom2utils/tests/test_collections.py @@ -67,7 +67,7 @@ # from cadcdata import FileInfo -from caom2utils import legacy, fits2caom2, data_util +from caom2utils import legacy, caom2blueprint, data_util from caom2 import ObservationReader, ObservationWriter from caom2.diff import get_differences @@ -123,14 +123,14 @@ def test_differences(directory): else: inputs = blueprints application = '{} {} '.format('caom2gen', data_files_parameter) - app_cmd = fits2caom2.caom2gen + app_cmd = caom2blueprint.caom2gen else: defaults = _get_parameter('default', directory) assert defaults overrides = _get_parameter('override', directory) assert overrides inputs = f'{config} {defaults} {overrides}' - application = '{} {}'.format('fits2caom2', data_files_parameter) + application = '{} {}'.format('caom2blueprint', data_files_parameter) app_cmd = legacy.main_app temp = ' '.join(file_meta[0]) cardinality = f'{product_id} {temp}' @@ -140,8 +140,8 @@ def test_differences(directory): swc_si_mock,\ patch('cadcutils.net.ws.WsCapabilities.get_access_url', autospec=True) as cap_mock,\ - patch('caom2utils.fits2caom2.get_vos_headers') as gvh_mock, \ - patch('caom2utils.fits2caom2._get_vos_meta') as gvm_mock, \ + patch('caom2utils.caom2blueprint.get_vos_headers') as gvh_mock, \ + patch('caom2utils.caom2blueprint._get_vos_meta') as gvm_mock, \ patch('caom2utils.data_util.get_local_headers_from_fits') as \ header_mock: def info_mock(uri): diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index e3063b19..7df41f7f 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -75,8 +75,8 @@ from caom2utils import ObsBlueprint, GenericParser, gen_proc from caom2utils import get_gen_proc_arg_parser, augment from caom2utils.legacy import load_config -from caom2utils.fits2caom2 import _visit, _load_plugin -from caom2utils.fits2caom2 import _get_and_update_artifact_meta +from caom2utils.caom2blueprint import _visit, _load_plugin +from caom2utils.caom2blueprint import _get_and_update_artifact_meta from caom2 import ObservationWriter, SimpleObservation, Algorithm from caom2 import Artifact, ProductType, ReleaseType, ObservationIntentType @@ -1256,7 +1256,7 @@ def test_visit_generic_parser(): assert False, f'should not get here {e}' -@patch('caom2utils.fits2caom2.Client') +@patch('caom2utils.caom2blueprint.Client') def test_get_vos_headers(vos_mock): test_uri = 'vos://cadc.nrc.ca!vospace/CAOMworkshop/Examples/DAO/' \ 'dao_c122_2016_012725.fits' @@ -1274,7 +1274,7 @@ def test_get_vos_headers(vos_mock): caom2utils.data_util.get_local_file_headers = get_orig -@patch('caom2utils.fits2caom2.Client') +@patch('caom2utils.caom2blueprint.Client') def test_get_vos_meta(vos_mock): get_orig = caom2utils.get_vos_headers try: @@ -1318,7 +1318,7 @@ def test_get_external_headers(): with patch('requests.Session.get') as session_get_mock: session_get_mock.return_value.status_code = 200 session_get_mock.return_value.text = TEST_TEXT - test_headers = caom2utils.fits2caom2.get_external_headers(test_uri) + test_headers = caom2utils.caom2blueprint.get_external_headers(test_uri) assert test_headers is not None assert len(test_headers) == 2 assert test_headers[0]['SIMPLE'] is True, 'SIMPLE header not found' @@ -1326,18 +1326,18 @@ def test_get_external_headers(): assert session_get_mock.is_called_with(test_uri) -@patch('caom2utils.fits2caom2.get_external_headers') +@patch('caom2utils.caom2blueprint.get_external_headers') def test_get_external_headers_fails(get_external_mock): get_external_mock.return_value = None test_collection = 'TEST_COLLECTION' test_obs_id = 'TEST_OBS_ID' test_uri = f'gemini:{test_collection}/abc.fits' test_product_id = 'TEST_PRODUCT_ID' - test_blueprint = caom2utils.fits2caom2.ObsBlueprint() + test_blueprint = caom2utils.caom2blueprint.ObsBlueprint() test_observation = SimpleObservation(collection=test_collection, observation_id=test_obs_id, algorithm=Algorithm(name='exposure')) - test_result = caom2utils.fits2caom2._augment( + test_result = caom2utils.caom2blueprint._augment( obs=test_observation, product_id=test_product_id, uri=test_uri, @@ -1527,7 +1527,7 @@ def test_update_artifact_meta_errors(): @patch('caom2utils.data_util.StorageInventoryClient', autospec=True) @patch('cadcutils.net.ws.WsCapabilities.get_access_url', autospec=True) @patch('sys.stdout', new_callable=BytesIO) -@patch('caom2utils.fits2caom2._augment') +@patch('caom2utils.caom2blueprint._augment') def test_gen_proc_failure(augment_mock, stdout_mock, cap_mock, client_mock): """ Tests that gen_proc can return -1.""" @@ -1550,7 +1550,7 @@ def test_gen_proc_failure(augment_mock, stdout_mock, cap_mock, client_mock): @patch('sys.stdout', new_callable=io.StringIO) -@patch('caom2utils.fits2caom2.Client') +@patch('caom2utils.caom2blueprint.Client') def test_parser_construction(vos_mock, stdout_mock): vos_mock.get_node.side_effect = _get_node test_uri = 'vos:goliaths/abc.fits.gz' diff --git a/caom2utils/caom2utils/tests/test_si_uris.py b/caom2utils/caom2utils/tests/test_si_uris.py index 455a6c16..44e4ed72 100644 --- a/caom2utils/caom2utils/tests/test_si_uris.py +++ b/caom2utils/caom2utils/tests/test_si_uris.py @@ -70,7 +70,7 @@ import sys from cadcdata import FileInfo from caom2 import obs_reader_writer -from caom2utils import fits2caom2 +from caom2utils import caom2blueprint from unittest.mock import patch from . import test_collections as tc @@ -108,7 +108,7 @@ def _info_mock(uri): '--observation TEST_COLLECTION TEST_OBS_ID ' '--lineage test_product_id/cadc:TEST/test_file.fits ' '--blueprint {}'.format(out_fqn, bp_fqn)).split() - fits2caom2.caom2gen() + caom2blueprint.caom2gen() assert os.path.exists(out_fqn), 'expect output file' obs_reader = obs_reader_writer.ObservationReader() From 639a1f4455c022bd9383a45f43c773725bbdb2fc Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 24 Feb 2022 13:00:07 -0800 Subject: [PATCH 04/38] CADC-10809 - interim commit. --- caom2utils/caom2utils/caom2blueprint.py | 1031 ++++++++++++----- caom2utils/caom2utils/legacy.py | 5 +- .../20220201T200117/taos.blueprint | 9 + .../caom2utils/tests/test_collections.py | 9 +- .../caom2utils/tests/test_fits2caom2.py | 21 +- 5 files changed, 778 insertions(+), 297 deletions(-) create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 8ad4ef4c..2f7e04bc 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -71,7 +71,7 @@ from logging.handlers import TimedRotatingFileHandler import math -from astropy.wcs import Wcsprm +from astropy.wcs import Wcsprm, WCS from astropy.io import fits from astropy.time import Time from cadcutils import version @@ -1445,13 +1445,18 @@ def is_fits(value): def is_table(value): """Hide the blueprint structure from clients - they shouldn't need to know that a value of type tuple requires special processing.""" - return ObsBlueprint.is_fits(value) and value[0] == 'BINTABLE' + return ObsBlueprint.needs_lookup(value) and value[0] == 'BINTABLE' @staticmethod def is_function(value): - return (not ObsBlueprint.is_fits(value) and isinstance(value, str) + return (not ObsBlueprint.needs_lookup(value) and isinstance(value, str) and isinstance(value, str) and '(' in value and ')' in value) + @staticmethod + def has_default_value(value): + """""" + return isinstance(value, tuple) and value[1] + @staticmethod def has_no_value(value): """If functions return None, try not to update the WCS with this @@ -1459,6 +1464,12 @@ def has_no_value(value): return value is None or ( isinstance(value, str) and 'None' in value.strip()) + @staticmethod + def needs_lookup(value): + """Hide the blueprint structure from clients - they shouldn't need + to know that a value of type tuple requires special processing.""" + return isinstance(value, tuple) + def get_configed_axes_count(self): """:return how many axes have been configured to read from WCS""" configed_axes = 0 @@ -1485,6 +1496,58 @@ def update(self, value): self._update = value +class Hdf5ObsBlueprint(ObsBlueprint): + def __init__(self, position_axes=None, energy_axis=None, + polarization_axis=None, time_axis=None, + obs_axis=None, custom_axis=None, module=None, + update=True, instantiated_class=None): + super().__init__( + position_axes, + energy_axis, + polarization_axis, + time_axis, + obs_axis, + custom_axis, + module, + update, + instantiated_class, + ) + tmp = {'Observation.metaRelease': ([], None), + 'Observation.instrument.name': ([], None), + 'Observation.type': ([], None), + 'Observation.environment.ambientTemp': ([], + None), + # set the default for SimpleObservation construction + 'Observation.algorithm.name': ([], 'exposure'), + 'Observation.instrument.keywords': ([], None), + 'Observation.proposal.id': ([], None), + 'Observation.target.name': ([], None), + 'Observation.telescope.name': ([], None), + 'Observation.telescope.geoLocationX': ([], + None), + 'Observation.telescope.geoLocationY': ([], + None), + 'Observation.telescope.geoLocationZ': ([], + None), + 'Observation.observationID': ([], None), + 'Plane.calibrationLevel': ([], CalibrationLevel.RAW_STANDARD), + 'Plane.dataProductType': ([], DataProductType.IMAGE), + 'Plane.metaRelease': ([], None), + 'Plane.dataRelease': ([], None), + 'Plane.productID': ([], None), + 'Plane.provenance.name': ([], None), + 'Plane.provenance.project': ([], None), + 'Plane.provenance.producer': ([], None), + 'Plane.provenance.reference': ([], None), + 'Plane.provenance.lastExecuted': ([], None), + 'Artifact.releaseType': ([], ReleaseType.DATA), + 'Chunk': 'include' + } + # using the tmp to make sure that the keywords are valid + for key in tmp: + self.set(key, tmp[key]) + + class GenericParser: """ Extract CAOM2 metadata from files with no WCS information. @@ -1524,7 +1587,7 @@ def apply_blueprint(self): # apply defaults for key, value in plan.items(): - if ObsBlueprint.is_fits(value) and value[1]: + if ObsBlueprint.needs_lookup(value) and value[1]: # there is a default value set if key in plan: plan[key] = value[1] @@ -1543,6 +1606,12 @@ def augment_observation(self, observation, artifact_uri, product_id=None): raise ValueError( f'Observation type mis-match for {observation}.') + temp = self._get_from_list( + 'Observation.metaRelease', index=0, + current=observation.meta_release + ) + logging.error(f'{temp}!!!!!!!!!!!!!') + logging.error(f'{self._get_from_list}!!!!!!!!!!!!!') observation.meta_release = self._get_datetime(self._get_from_list( 'Observation.metaRelease', index=0, current=observation.meta_release)) @@ -1607,12 +1676,12 @@ def augment_plane(self, plane, artifact_uri): self._to_release_type(self._get_from_list( 'Artifact.releaseType', index=0))) plane.artifacts[artifact_uri] = artifact - self.augment_artifact(artifact) + self.augment_artifact(artifact, 0) self.logger.debug( 'End generic CAOM2 plane augmentation for {}.'.format( artifact_uri)) - def augment_artifact(self, artifact): + def augment_artifact(self, artifact, index): """ Augments a given CAOM2 artifact with available FITS information :param artifact: existing CAOM2 artifact to be augmented @@ -1653,14 +1722,14 @@ def _get_from_list(self, lookup, index, current=None): except KeyError: self.add_error(lookup, sys.exc_info()[1]) self.logger.debug( - 'Could not find {!r} in fits2caom2 configuration.'.format( - lookup)) + f'Could not find {lookup} in configuration.') if current: self.logger.debug( f'{lookup}: using current value of {current!r}.') value = current + logging.error(f'other value {value}') return value - if (keywords and not ObsBlueprint.is_fits(keywords) + if (keywords and not ObsBlueprint.needs_lookup(keywords) and not ObsBlueprint.is_function(keywords)): value = keywords elif self._blueprint.update: @@ -1677,6 +1746,7 @@ def _get_from_list(self, lookup, index, current=None): if isinstance(value, bool) or current is not None: value = current + logging.error(f'value {value}') self.logger.debug(f'{lookup}: value is {value}') return value @@ -1699,8 +1769,8 @@ def _to_release_type(self, value): def _to_enum_type(self, value, to_enum_type): if value is None: raise ValueError( - 'Must set a value of {} for {}.'.format(to_enum_type.__name__, - self.logging_name)) + f'Must set a value of {to_enum_type.__name__} for ' + f'{self._uri}.') elif isinstance(value, to_enum_type): return value else: @@ -1836,103 +1906,92 @@ class BlueprintParser(GenericParser): def __init__(self, obs_blueprint=None, uri=None): super().__init__(obs_blueprint, uri) + self._wcs_parser = WcsParser() + + def _get_chunk_naxis(self, chunk, index): + chunk.naxis = self._get_from_list( + 'Chunk.naxis', index, self._wcs_parser.wcs.wcs.naxis) - def augment_artifact(self, artifact): + def augment_artifact(self, artifact, index): """ Augments a given CAOM2 artifact with available FITS information :param artifact: existing CAOM2 artifact to be augmented """ - super().augment_artifact(artifact) + super().augment_artifact(artifact, index) - self.logger.debug( - 'Begin artifact augmentation for {} with {} HDUs.'.format( - artifact.uri, len(self.headers))) + self.logger.debug(f'Begin artifact augmentation for {artifact.uri}') if self.blueprint.get_configed_axes_count() == 0: raise TypeError( - 'No WCS Data. End artifact augmentation for {}.'.format( - artifact.uri)) + f'No WCS Data. End artifact augmentation for {artifact.uri}.') - for i, header in enumerate(self.headers): - ii = str(i) + if self.ignore_chunks(artifact, index): + return - if self.ignore_chunks(artifact, i, ii): - continue - # # there is one Part per extension, the name is the extension number - # if self._has_data_array(header) and self.blueprint.has_chunk(i): - # if ii not in artifact.parts.keys(): - # # TODO use extension name? - # artifact.parts.add(Part(ii)) - # self.logger.debug(f'Part created for HDU {ii}.') - # else: - # artifact.parts.add(Part(ii)) - # self.logger.debug(f'Create empty part for HDU {ii}') - # continue - - part = artifact.parts[ii] - part.product_type = self._get_from_list('Part.productType', i) - part.meta_producer = self._get_from_list( - 'Part.metaProducer', index=0, current=part.meta_producer) - - # each Part has one Chunk, if it's not an empty part as determined - # just previously - if not part.chunks: - part.chunks.append(Chunk()) - chunk = part.chunks[0] - chunk.meta_producer = self._get_from_list( - 'Chunk.metaProducer', index=0, current=chunk.meta_producer) - - wcs_parser = FitsWcsParser(header, self.file, ii) - # NOTE: astropy.wcs does not distinguished between WCS axes and - # data array axes. naxis in astropy.wcs represents in fact the - # number of WCS axes, whereas chunk.axis represents the naxis - # of the data array. Solution is to determine it directly from - # the header - if 'ZNAXIS' in header: - chunk.naxis = _to_int(header['ZNAXIS']) - elif 'NAXIS' in header: - chunk.naxis = _to_int(header['NAXIS']) - else: - chunk.naxis = self._get_from_list('Chunk.naxis', 0, - wcs_parser.wcs.wcs.naxis) - if self.blueprint._pos_axes_configed: - wcs_parser.augment_position(chunk) - logging.error(chunk.position) - if chunk.position is None: - self._try_position_with_blueprint(chunk, i) - if chunk.position: - chunk.position.resolution = self._get_from_list( - 'Chunk.position.resolution', index=i) + part = artifact.parts[str(index)] + part.product_type = self._get_from_list('Part.productType', index) + part.meta_producer = self._get_from_list( + 'Part.metaProducer', index=0, current=part.meta_producer) + + # each Part has one Chunk, if it's not an empty part as determined + # just previously + if not part.chunks: + part.chunks.append(Chunk()) + chunk = part.chunks[0] + chunk.meta_producer = self._get_from_list( + 'Chunk.metaProducer', index=0, current=chunk.meta_producer) + + # NOTE: astropy.wcs does not distinguished between WCS axes and + # data array axes. naxis in astropy.wcs represents in fact the + # number of WCS axes, whereas chunk.axis represents the naxis + # of the data array. Solution is to determine it directly from + # the header + # if 'ZNAXIS' in header: + # chunk.naxis = _to_int(header['ZNAXIS']) + # elif 'NAXIS' in header: + # chunk.naxis = _to_int(header['NAXIS']) + # else: + # chunk.naxis = self._get_from_list('Chunk.naxis', 0, + # wcs_parser.wcs.wcs.naxis) + self._get_chunk_naxis(chunk, index) + if self.blueprint._pos_axes_configed: + self._wcs_parser.augment_position(chunk) + logging.error(chunk.position) + if chunk.position is None: + self._try_position_with_blueprint(chunk, index) + if chunk.position: + chunk.position.resolution = self._get_from_list( + 'Chunk.position.resolution', index=index) + if self.blueprint._energy_axis_configed: + self._wcs_parser.augment_energy(chunk) + if chunk.energy: + chunk.energy.bandpass_name = self._get_from_list( + 'Chunk.energy.bandpassName', index=index) + chunk.energy.transition = self._get_energy_transition( + chunk.energy.transition) + chunk.energy.resolving_power = _to_float(self._get_from_list( + 'Chunk.energy.resolvingPower', index=index)) + else: if self.blueprint._energy_axis_configed: - wcs_parser.augment_energy(chunk) - if chunk.energy: - chunk.energy.bandpass_name = self._get_from_list( - 'Chunk.energy.bandpassName', index=i) - chunk.energy.transition = self._get_energy_transition( - chunk.energy.transition) - chunk.energy.resolving_power = _to_float(self._get_from_list( - 'Chunk.energy.resolvingPower', index=i)) - else: - if self.blueprint._energy_axis_configed: - self._try_energy_with_blueprint(chunk, i) - if self.blueprint._time_axis_configed: - wcs_parser.augment_temporal(chunk) - if chunk.time is None: - self._try_time_with_blueprint(chunk, i) - if self.blueprint._polarization_axis_configed: - wcs_parser.augment_polarization(chunk) - if chunk.polarization is None: - self._try_polarization_with_blueprint(chunk, i) - if self.blueprint._obs_axis_configed: - wcs_parser.augment_observable(chunk) - if chunk.observable is None and chunk.observable_axis is None: - self._try_observable_with_blueprint(chunk, i) - if self.blueprint._custom_axis_configed: - wcs_parser.augment_custom(chunk) - - # try to set smaller bits of the chunk WCS elements from the - # blueprint - self._try_range_with_blueprint(chunk, i) + self._try_energy_with_blueprint(chunk, index) + if self.blueprint._time_axis_configed: + self._wcs_parser.augment_temporal(chunk) + if chunk.time is None: + self._try_time_with_blueprint(chunk, index) + if self.blueprint._polarization_axis_configed: + self._wcs_parser.augment_polarization(chunk) + if chunk.polarization is None: + self._try_polarization_with_blueprint(chunk, index) + if self.blueprint._obs_axis_configed: + self._wcs_parser.augment_observable(chunk) + if chunk.observable is None and chunk.observable_axis is None: + self._try_observable_with_blueprint(chunk, index) + if self.blueprint._custom_axis_configed: + self._wcs_parser.augment_custom(chunk) + + # try to set smaller bits of the chunk WCS elements from the + # blueprint + self._try_range_with_blueprint(chunk, index) self.logger.debug( f'End artifact augmentation for {artifact.uri}.') @@ -2016,6 +2075,9 @@ def augment_plane(self, plane, artifact_uri): self.logger.debug( f'End plane augmentation for {artifact_uri}.') + def _content_lookup(self, key, extension=None): + raise NotImplementedError + def _get_algorithm(self, obs): """ Create an Algorithm instance populated with available FITS information. @@ -2093,6 +2155,74 @@ def _get_environment(self, current): self.logger.debug('End Environment augmentation.') return enviro + def _get_from_list(self, lookup, index, current=None): + value = None + try: + keys = self.blueprint._get(lookup) + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + self.logger.debug( + f'Could not find {lookup!r} in fits2caom2 configuration.') + if current: + self.logger.debug( + f'{lookup}: using current value of {current!r}.') + value = current + return value + + # logging.error(f'here 1??? {isinstance(keys, tuple)} {type(keys)}') + + if ObsBlueprint.needs_lookup(keys): + for ii in keys[0]: + try: + value = self._content_lookup(ii, index) + logging.error(value) + if value: + self.logger.debug( + f'{lookup}: assigned value {value} based on ' + f'keyword {ii}.') + break + except (KeyError, IndexError): + if keys[0].index(ii) == len(keys[0]) - 1: + self.add_error(lookup, sys.exc_info()[1]) + # assign a default value, if one exists + if keys[1]: + if current is None: + value = keys[1] + self.logger.debug( + f'{lookup}: assigned default value {value}.') + else: + value = current + if value is None: + # checking current does not work in the general case, + # because current might legitimately be 'None' + if self._blueprint.update: + if ( + current is not None + or (current is None and isinstance(value, bool)) + ): + value = current + self.logger.debug( + f'{lookup}: used current value {value}.') + else: + # assign a default value, if one exists + if keys[1]: + if current is None: + value = keys[1] + self.logger.debug( + f'{lookup}: assigned default value {value}.') + else: + value = current + + elif (keys is not None) and (keys != ''): + value = keys + logging.error(f'here 2 {value}???') + elif current: + value = current + logging.error(f'here 3 {value}???') + + self.logger.debug(f'{lookup}: value is {value}') + return value + def _get_instrument(self, current): """ Create an Instrument instance populated with available FITS @@ -2389,6 +2519,36 @@ def _get_requirements(self, current): self.logger.debug('End Requirement augmentation.') return reqts + def _get_set_from_list(self, lookup, index): + value = None + keywords = None + try: + keywords = self.blueprint._get(lookup) + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + self.logger.debug( + 'Could not find \'{}\' in fits2caom2 configuration.'.format( + lookup)) + + if isinstance(keywords, tuple): + for ii in keywords[0]: + try: + # value = self.headers[index].get(ii) + value = self._content_lookup(ii, index) + break + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + if keywords[1]: + value = keywords[1] + self.logger.debug( + '{}: assigned default value {}.'.format(lookup, + value)) + elif keywords: + value = keywords + self.logger.debug(f'{lookup}: assigned value {value}.') + + return value + def _get_target(self, current): """ Create a Target instance populated with available FITS information. @@ -2897,20 +3057,20 @@ def headers(self): """ return self._headers - def ignore_chunks(self, artifact, i, ii): + def ignore_chunks(self, artifact, index): # there is one Part per extension, the name is the extension number if ( - FitsParser._has_data_array(self._headers[i]) - and self.blueprint.has_chunk(i) + FitsParser._has_data_array(self._headers[index]) + and self.blueprint.has_chunk(index) ): - if ii not in artifact.parts.keys(): + if str(index) not in artifact.parts.keys(): # TODO use extension name? - artifact.parts.add(Part(ii)) - self.logger.debug(f'Part created for HDU {ii}.') + artifact.parts.add(Part(str(index))) + self.logger.debug(f'Part created for HDU {index}.') result = False else: - artifact.parts.add(Part(ii)) - self.logger.debug(f'Create empty part for HDU {ii}') + artifact.parts.add(Part(str(index))) + self.logger.debug(f'Create empty part for HDU {index}') result = True return result @@ -2945,7 +3105,7 @@ def apply_blueprint(self): # apply overrides from blueprint to all extensions for key, value in plan.items(): if key in wcs_std: - if ObsBlueprint.is_fits(value): + if ObsBlueprint.needs_lookup(value): # alternative attributes provided for standard wcs attrib. for header in self.headers: for v in value[0]: @@ -2961,7 +3121,7 @@ def apply_blueprint(self): continue else: # value provided for standard wcs attribute - if ObsBlueprint.is_fits(wcs_std[key]): + if ObsBlueprint.needs_lookup(wcs_std[key]): keywords = wcs_std[key][0] elif ObsBlueprint.is_function(wcs_std[key]): continue @@ -2989,7 +3149,7 @@ def apply_blueprint(self): extension)) # apply defaults to all extensions for key, value in plan.items(): - if ObsBlueprint.is_fits(value) and value[1]: + if ObsBlueprint.needs_lookup(value) and value[1]: # there is a default value set for index, header in enumerate(self.headers): for keywords in value[0]: @@ -3044,69 +3204,106 @@ def apply_blueprint(self): return - def _get_from_list(self, lookup, index, current=None): - value = None - try: - keywords = self.blueprint._get(lookup) - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - self.logger.debug( - 'Could not find {!r} in fits2caom2 configuration.'.format( - lookup)) - if current: - self.logger.debug( - f'{lookup}: using current value of {current!r}.') - value = current - return value + def augment_artifact(self, artifact, index=0): + """ + Augments a given CAOM2 artifact with available FITS information + :param artifact: existing CAOM2 artifact to be augmented + """ + self.logger.debug( + 'Begin artifact augmentation for {} with {} HDUs.'.format( + artifact.uri, len(self.headers))) - if isinstance(keywords, tuple): - for ii in keywords[0]: - try: - value = self.headers[index].get(ii) - if value: - self.logger.debug( - '{}: assigned value {} based on keyword {}.'. - format(lookup, value, ii)) - break - except (KeyError, IndexError): - if keywords[0].index(ii) == len(keywords[0]) - 1: - self.add_error(lookup, sys.exc_info()[1]) - # assign a default value, if one exists - if keywords[1]: - if current is None: - value = keywords[1] - self.logger.debug( - '{}: assigned default value {}.'.format(lookup, - value)) - else: - value = current - if value is None: - # checking current does not work in the general case, - # because current might legitimately be 'None' - if self._blueprint.update: - if (current is not None or - (current is None and isinstance(value, bool))): - value = current - self.logger.debug('{}: used current value ' - '{!r}.'.format(lookup, value)) - else: - # assign a default value, if one exists - if keywords[1]: - if current is None: - value = keywords[1] - self.logger.debug( - '{}: assigned default value {}.'.format(lookup, - value)) - else: - value = current + if self.blueprint.get_configed_axes_count() == 0: + raise TypeError( + 'No WCS Data. End artifact augmentation for {}.'.format( + artifact.uri)) - elif (keywords is not None) and (keywords != ''): - value = keywords - elif current: - value = current + for i, header in enumerate(self.headers): + if self.ignore_chunks(artifact, i): + continue + self._wcs_parser = FitsWcsParser(header, self.file, str(i)) + super().augment_artifact(artifact, i) + # part.product_type = self._get_from_list('Part.productType', i) + # part.meta_producer = self._get_from_list( + # 'Part.metaProducer', index=0, current=part.meta_producer) + # + # # each Part has one Chunk, if it's not an empty part as determined + # # just previously + # if not part.chunks: + # part.chunks.append(Chunk()) + # chunk = part.chunks[0] + # chunk.meta_producer = self._get_from_list( + # 'Chunk.metaProducer', index=0, current=chunk.meta_producer) + # + # NOTE: astropy.wcs does not distinguished between WCS axes and + # data array axes. naxis in astropy.wcs represents in fact the + # number of WCS axes, whereas chunk.axis represents the naxis + # of the data array. Solution is to determine it directly from + # the header + # if 'ZNAXIS' in header: + # chunk.naxis = _to_int(header['ZNAXIS']) + # elif 'NAXIS' in header: + # chunk.naxis = _to_int(header['NAXIS']) + # else: + # chunk.naxis = self._get_from_list('Chunk.naxis', 0, + # wcs_parser.wcs.wcs.naxis) + # if self.blueprint._pos_axes_configed: + # wcs_parser.augment_position(chunk) + # logging.error(chunk.position) + # if chunk.position is None: + # self._try_position_with_blueprint(chunk, i) + # if chunk.position: + # chunk.position.resolution = self._get_from_list( + # 'Chunk.position.resolution', index=i) + # if self.blueprint._energy_axis_configed: + # wcs_parser.augment_energy(chunk) + # if chunk.energy: + # chunk.energy.bandpass_name = self._get_from_list( + # 'Chunk.energy.bandpassName', index=i) + # chunk.energy.transition = self._get_energy_transition( + # chunk.energy.transition) + # chunk.energy.resolving_power = _to_float(self._get_from_list( + # 'Chunk.energy.resolvingPower', index=i)) + # else: + # if self.blueprint._energy_axis_configed: + # self._try_energy_with_blueprint(chunk, i) + # if self.blueprint._time_axis_configed: + # wcs_parser.augment_temporal(chunk) + # if chunk.time is None: + # self._try_time_with_blueprint(chunk, i) + # if self.blueprint._polarization_axis_configed: + # wcs_parser.augment_polarization(chunk) + # if chunk.polarization is None: + # self._try_polarization_with_blueprint(chunk, i) + # if self.blueprint._obs_axis_configed: + # wcs_parser.augment_observable(chunk) + # if chunk.observable is None and chunk.observable_axis is None: + # self._try_observable_with_blueprint(chunk, i) + # if self.blueprint._custom_axis_configed: + # wcs_parser.augment_custom(chunk) + # + # # try to set smaller bits of the chunk WCS elements from the + # # blueprint + # self._try_range_with_blueprint(chunk, i) - self.logger.debug(f'{lookup}: value is {value}') - return value + self.logger.debug( + f'End artifact augmentation for {artifact.uri}.') + + def _content_lookup(self, key, extension=None): + return self.headers[extension].get(key) + + def _get_chunk_naxis(self, chunk, index=None): + # NOTE: astropy.wcs does not distinguished between WCS axes and + # data array axes. naxis in astropy.wcs represents in fact the + # number of WCS axes, whereas chunk.axis represents the naxis + # of the data array. Solution is to determine it directly from + # the header + if 'ZNAXIS' in self._headers[index]: + chunk.naxis = _to_int(self._headers[index]['ZNAXIS']) + elif 'NAXIS' in self._headers[index]: + chunk.naxis = _to_int(self._headers[index]['NAXIS']) + else: + super()._get_chunk_naxis(chunk) def _get_from_table(self, lookup, extension): """ @@ -3146,35 +3343,6 @@ def _get_from_table(self, lookup, extension): self.logger.debug(f'{lookup}: value is {value}') return value - def _get_set_from_list(self, lookup, index): - value = None - keywords = None - try: - keywords = self.blueprint._get(lookup) - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - self.logger.debug( - 'Could not find \'{}\' in fits2caom2 configuration.'.format( - lookup)) - - if isinstance(keywords, tuple): - for ii in keywords[0]: - try: - value = self.headers[index].get(ii) - break - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - if keywords[1]: - value = keywords[1] - self.logger.debug( - '{}: assigned default value {}.'.format(lookup, - value)) - elif keywords: - value = keywords - self.logger.debug(f'{lookup}: assigned value {value}.') - - return value - @staticmethod def _has_data_array(header): """ @@ -3210,22 +3378,81 @@ def _has_data_array(header): return True +class HDF5Parser(BlueprintParser): + + def __init__( + self, obs_blueprint, uri, local_f_name, find_roots_here='/sitedata' + ): + super().__init__(obs_blueprint, uri) + import h5py + self._file = h5py.File(local_f_name, 'r') + self._wcs_parser = None + self._roots = [] + self.apply_blueprint() + self._set_roots(find_roots_here, self._file) + + def _set_roots(self, root_name, root): + bits = root_name.split('/') + if len(bits) == 2: + logging.error(root[root_name].keys()) + for key in root[root_name].keys(): + self._roots.append(root[root_name][key]) + else: + x = f'/{"/".join(ii for ii in bits[2:])}' + self._set_roots(x, root) + + def augment_artifact(self, artifact, index=0): + for root in self._roots: + self._wcs_parser = Hdf5WcsParser(root, self.blueprint) + super().augment_artifact(artifact, index) + + def _content_lookup(self, key, extension=None): + bits = key.split('/') + if isinstance(extension, int): + extension = self._roots[extension] + + logging.error(f'key {key} root.name {extension.name} bits {bits}') + + if isinstance(key, list): + return None + if len(bits) == 2: + logging.error('path 1') + if '(' in bits[1]: + logging.error('path 2') + x = bits[1].split('(') + index = int(x[1].split(')')[0]) + logging.error(f'x {x} index {index}') + return extension[x[0]][index] + else: + logging.error('path 3') + return extension[bits[1]] + else: + # the 2 is because there's always a leading slash, so the + # first bit is an empty string + temp = f'/{"/".join(ii for ii in bits[2:])}' + logging.error(f'path 4 {temp} {bits[2:]}') + return self._content_lookup(temp, extension[bits[1]]) + + def _get_chunk_naxis(self, chunk, index): + chunk.naxis = self._get_from_list('Chunk.naxis', index, chunk.naxis) + + def ignore_chunks(self, artifact, index=0): + artifact.parts.add(Part(str(index))) + return False + + class WcsParser: + """ + WCS axes methods. + """ + ENERGY_AXIS = 'energy' POLARIZATION_AXIS = 'polarization' TIME_AXIS = 'time' def __init__(self): - """ - - :param header: FITS extension header - :param file: name of FITS file - :param extension: which HDU - WCS axes methods of this class. - """ - - # add the HDU extension to logging messages from this class self.logger = logging.getLogger(self.__class__.__name__) + self.wcs = None def augment_custom(self, chunk): """ @@ -3451,6 +3678,37 @@ def augment_observable(self, chunk): Slice(self._get_axis(0, ctype, cunit), pix_bin)) self.logger.debug('End Observable WCS augmentation.') + def _get_axis(self, index, over_ctype=None, over_cunit=None): + """ Assemble a generic axis """ + aug_ctype = str(self.wcs.ctype[index]) if over_ctype is None \ + else over_ctype + aug_cunit = str(self.wcs.cunit[index]) if over_cunit is None \ + else over_cunit + if aug_cunit is not None and len(aug_cunit) == 0: + aug_cunit = None + aug_axis = Axis(aug_ctype, aug_cunit) + return aug_axis + + def _get_axis_index(self, keywords): + """ + Return the index of a specific axis type or None of it doesn't exist + :param keywords: + :return: + """ + axis = None + for i, elem in enumerate(self.wcs.ctype): + elem = elem.split('-')[0] + logging.error(elem) + if elem in keywords: + axis = i + break + elif len(elem) == 0: + check = self.wcs.ctype[i] + if check in keywords: + axis = i + break + return axis + def _get_cd(self, x_index, y_index): """ returns cd info""" @@ -3492,6 +3750,21 @@ def _get_dimension(self, xindex, yindex): self.logger.debug('End 2D dimension augmentation.') return aug_dimension + def _get_position_axis(self): + # there are two celestial axes, get the applicable indices from + # the axis_types + xindex = self._get_axis_index(POSITION_CTYPES[0]) + yindex = self._get_axis_index(POSITION_CTYPES[1]) + + if (xindex is not None) and (yindex is not None): + return xindex + 1, yindex + 1 + elif (xindex is None) and (yindex is None): + return None + else: + raise ValueError('Found only one position axis ra/dec: {}/{} in ' + '{}'. + format(xindex, yindex, self.file)) + def _get_ref_coord(self, index): aug_crpix = _to_float(self._sanitize(self.wcs.crpix[index])) aug_crval = _to_float(self._sanitize(self.wcs.crval[index])) @@ -3500,6 +3773,52 @@ def _get_ref_coord(self, index): aug_ref_coord = RefCoord(aug_crpix, aug_crval) return aug_ref_coord + def _get_spatial_axis(self, xindex, yindex): + """Assemble the bits to make the axis parameter needed for + SpatialWCS construction.""" + logging.error(f'xindex {xindex} yindex {yindex}') + aug_dimension = self._get_dimension(xindex, yindex) + + aug_ref_coord = Coord2D(self._get_ref_coord(xindex), + self._get_ref_coord(yindex)) + + aug_cd11, aug_cd12, aug_cd21, aug_cd22 = \ + self._get_cd(xindex, yindex) + + if aug_dimension is not None and \ + aug_ref_coord is not None and \ + aug_cd11 is not None and \ + aug_cd12 is not None and \ + aug_cd21 is not None and \ + aug_cd22 is not None: + aug_function = CoordFunction2D(aug_dimension, aug_ref_coord, + aug_cd11, aug_cd12, + aug_cd21, aug_cd22) + self.logger.debug('End CoordFunction2D augmentation.') + else: + aug_function = None + + aug_axis = CoordAxis2D(self._get_axis(xindex), + self._get_axis(yindex), + self._get_coord_error(xindex), + self._get_coord_error(yindex), + None, None, aug_function) + self.logger.debug('End CoordAxis2D augmentation.') + return aug_axis + + def _sanitize(self, value): + """ + Sanitizes values from FITS to caom2 + :param value: + :return: + """ + if isinstance(value, float) and math.isnan(value): + return None + elif not str(value): + return None # empty string + else: + return value + class FitsWcsParser(WcsParser): """ @@ -3534,83 +3853,6 @@ def __init__(self, header, file, extension): self.file = file self.extension = extension - def _get_axis_index(self, keywords): - """ - Return the index of a specific axis type or None of it doesn't exist - :param keywords: - :return: - """ - axis = None - for i, elem in enumerate(self.wcs.ctype): - elem = elem.split('-')[0] - if elem in keywords: - axis = i - break - elif len(elem) == 0: - check = self.wcs.ctype[i] - if check in keywords: - axis = i - break - return axis - - def _get_axis(self, index, over_ctype=None, over_cunit=None): - """ Assemble a generic axis """ - aug_ctype = str(self.wcs.ctype[index]) if over_ctype is None \ - else over_ctype - aug_cunit = str(self.wcs.cunit[index]) if over_cunit is None \ - else over_cunit - if aug_cunit is not None and len(aug_cunit) == 0: - aug_cunit = None - aug_axis = Axis(aug_ctype, aug_cunit) - return aug_axis - - def _get_spatial_axis(self, xindex, yindex): - """Assemble the bits to make the axis parameter needed for - SpatialWCS construction.""" - aug_dimension = self._get_dimension(xindex, yindex) - - aug_ref_coord = Coord2D(self._get_ref_coord(xindex), - self._get_ref_coord(yindex)) - - aug_cd11, aug_cd12, aug_cd21, aug_cd22 = \ - self._get_cd(xindex, yindex) - - if aug_dimension is not None and \ - aug_ref_coord is not None and \ - aug_cd11 is not None and \ - aug_cd12 is not None and \ - aug_cd21 is not None and \ - aug_cd22 is not None: - aug_function = CoordFunction2D(aug_dimension, aug_ref_coord, - aug_cd11, aug_cd12, - aug_cd21, aug_cd22) - self.logger.debug('End CoordFunction2D augmentation.') - else: - aug_function = None - - aug_axis = CoordAxis2D(self._get_axis(xindex), - self._get_axis(yindex), - self._get_coord_error(xindex), - self._get_coord_error(yindex), - None, None, aug_function) - self.logger.debug('End CoordAxis2D augmentation.') - return aug_axis - - def _get_position_axis(self): - # there are two celestial axes, get the applicable indices from - # the axis_types - xindex = self._get_axis_index(POSITION_CTYPES[0]) - yindex = self._get_axis_index(POSITION_CTYPES[1]) - - if (xindex is not None) and (yindex is not None): - return xindex + 1, yindex + 1 - elif (xindex is None) and (yindex is None): - return None - else: - raise ValueError('Found only one position axis ra/dec: {}/{} in ' - '{}'. - format(xindex, yindex, self.file)) - def _get_axis_length(self, for_axis): # try ZNAXIS first in order to get the size of the original # image in case it was FITS compressed @@ -3624,18 +3866,211 @@ def _get_axis_length(self, for_axis): raise ValueError(msg) return result - def _sanitize(self, value): + +class Hdf5WcsParser(WcsParser): + + def __init__(self, root, blueprint): """ - Sanitizes values from FITS to caom2 - :param value: - :return: + :param root: h5py.h5p.Dataset or h5py.h5p.Group + :param blueprint: ObsBlueprint """ - if isinstance(value, float) and math.isnan(value): - return None - elif not str(value): - return None # empty string + super().__init__() + self._wcs = None + self._axes = { + 'ra': [0, False], + 'dec': [0, False], + 'time': [0, False], + 'energy': [0, False], + 'polarization': [0, False], + 'obs': [0, False], + 'custom': [0, False], + } + self._set_wcs(root, blueprint) + + @property + def wcs(self): + return self._wcs.wcs + + @wcs.setter + def wcs(self, value): + self._wcs = value + + def _get_axis_index(self, keywords): + result = self._axes['custom'][0] + if 'RA' in keywords: + result = self._axes['ra'][0] + elif 'DEC' in keywords: + result = self._axes['dec'][0] + elif 'TIME' in keywords: + result = self._axes['time'][0] + elif 'FREQ' in keywords: + result = self._axes['energy'][0] + elif 'STOKES' in keywords: + result = self._axes['polarization'][0] + elif 'FLUX' in keywords: + result = self._axes['obs'][0] + return result + + def _get_axis_length(self, for_axis): + logging.error(f'{for_axis} {self._wcs.array_shape}') + # this is fucking broken, just trying to figure out what the indicees + # should really be right now + # index = 1 + # if for_axis == 2: + # index = 0 + # return self._wcs.array_shape[index] + return self._wcs.array_shape[for_axis-1] + + def _set_wcs(self, root, blueprint): + count = 0 + if blueprint._pos_axes_configed: + self._axes['ra'][1] = True + self._axes['ra'][0] = count + self._axes['dec'][1] = True + self._axes['dec'][0] = count + 1 + count += 2 + if blueprint._time_axis_configed: + self._axes['time'][1] = True + self._axes['time'][0] = count + count += 1 + if blueprint._energy_axis_configed: + self._axes['energy'][1] = True + self._axes['energy'][0] = count + count += 1 + if blueprint._polarization_axis_configed: + self._axes['polarization'][1] = True + self._axes['polarization'][0] = count + count += 1 + if blueprint._obs_axis_configed: + self._axes['obs'][1] = True + self._axes['obs'][0] = count + count += 1 + if blueprint._custom_axis_configed: + self._axes['custom'][1] = True + self._axes['custom'][0] = count + count += 1 + + self._wcs = WCS(naxis=count) + z = [ + _to_str(self._xx_lookup( + blueprint._get('Chunk.position.axis.axis1.ctype'), root + )) if self._axes['ra'][1] else None, + _to_str(self._xx_lookup( + blueprint._get('Chunk.position.axis.axis2.ctype'), root + )) if self._axes['dec'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.time.axis.axis.ctype'), root + ) if self._axes['time'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.energy.axis.axis.ctype'), root + ) if self._axes['energy'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.polarization.axis.axis.ctype'), root + ) if self._axes['polarization'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.observable.axis.axis.ctype'), root + ) if self._axes['obs'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.custom.axis.axis.ctype'), root + ) if self._axes['custom'][1] else None, + ] + + self._wcs.wcs.ctype = z[:count] + z = [ + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.dimension.naxis1'), root + ) if self._axes['ra'][1] else 0, + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.dimension.naxis2'), root + ) if self._axes['dec'][1] else 0, + self._xx_lookup( + blueprint._get('Chunk.time.axis.function.naxis'), root + ) if self._axes['time'][1] else 0, + self._xx_lookup( + blueprint._get('Chunk.energy.axis.function.naxis'), root + ) if self._axes['energy'][1] else 0, + self._xx_lookup( + blueprint._get('Chunk.polarization.axis.function.naxis'), root + ) if self._axes['polarization'][1] else 0, + self._xx_lookup( + blueprint._get('Chunk.observable.dependent.bin'), root + ) if self._axes['obs'][1] else 0, + self._xx_lookup( + blueprint._get('Chunk.custom.axis.function.naxis'), root + ) if self._axes['custom'][1] else 0, + ] + self._wcs.array_shape = z[:count] + + z = [ + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord1.pix'), root + ) if self._axes['ra'][1] else None, + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord2.pix'), root + ) if self._axes['dec'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.time.axis.function.refCoord.pix'), root + ) if self._axes['time'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.energy.axis.function.refCoord.pix'), root + ) if self._axes['energy'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.polarization.axis.function.refCoord.pix'), root + ) if self._axes['polarization'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.observable.axis.function.refCoord.pix'), root + ) if self._axes['obs'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.custom.axis.function.refCoord.pix'), root + ) if self._axes['custom'][1] else None, + ] + self._wcs.wcs.crpix = z[:count] + + def _xx_lookup(self, key, root): + logging.error(f'key {key} root name {root.name}') + if key is None: + raise NotImplementedError + if isinstance(key, tuple): + logging.error('path 10') + result = None + for ii in key[0]: + result = self._xx_lookup(ii, root) + if result is None and key[1] is not None: + # apply the default value + result = key[1] + return result else: - return value + if key.startswith('/'): + logging.error('path 15') + bits = key.split('/') + logging.error(f'key {key} root.name {root.name} bits {bits}') + if len(bits) == 2: + logging.error('path 11') + if '(' in bits[1]: + logging.error('path 12') + x = bits[1].split('(') + index = int(x[1].split(')')[0]) + logging.error(f'x {x[0]} index {index}') + y = root[x[0]][index] + logging.error(y) + return y + else: + logging.error('path 13') + return str(root[bits[1]]) + else: + logging.error('path 14') + # the 2 is because there's always a leading slash, so the + # first bit is an empty string + temp = f'/{"/".join(ii for ii in bits[2:])}' + return self._xx_lookup(temp, root[bits[1]]) + else: + # a value has been set + logging.error('path 17') + return key def _to_str(value): @@ -3924,6 +4359,10 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, logging.debug( f'Using a FitsParser for local file {local}') parser = FitsParser(local, blueprint, uri=uri) + elif '.h5' in local: + logging.debug( + f'Using an HDF5Parser for local file {local}') + parser = HDF5Parser(blueprint, uri, local) else: # explicitly ignore headers for txt and image files logging.debug(f'Using a GenericParser for {local}') @@ -4031,7 +4470,10 @@ def caom2gen(): blueprints = {} if len(args.blueprint) == 1: # one blueprint to rule them all - blueprint = ObsBlueprint(module=module) + if '.h5' in args.lineage: + blueprint = Hdf5ObsBlueprint(module=module) + else: + blueprint = ObsBlueprint(module=module) blueprint.load_from_file(args.blueprint[0]) for i, cardinality in enumerate(args.lineage): product_id, uri = _extract_ids(cardinality) @@ -4052,7 +4494,10 @@ def caom2gen(): product_id, uri = _extract_ids(cardinality) logging.debug('Loading blueprint for {} from {}'.format( uri, args.blueprint[i])) - blueprint = ObsBlueprint(module=module) + if '.h5' in uri: + blueprint = Hdf5ObsBlueprint(module=module) + else: + blueprint = ObsBlueprint(module=module) blueprint.load_from_file(args.blueprint[i]) blueprints[uri] = blueprint diff --git a/caom2utils/caom2utils/legacy.py b/caom2utils/caom2utils/legacy.py index e347a04e..d676200f 100755 --- a/caom2utils/caom2utils/legacy.py +++ b/caom2utils/caom2utils/legacy.py @@ -545,7 +545,10 @@ def main_app(): obs_blueprint = {} for i, uri in enumerate(args.fileURI): - obs_blueprint[uri] = caom2blueprint.ObsBlueprint() + if '.h5' in uri: + obs_blueprint[uri] = caom2blueprint.Hdf5ObsBlueprint() + else: + obs_blueprint[uri] = caom2blueprint.ObsBlueprint() if config: result = update_blueprint(obs_blueprint[uri], uri, config, defaults, overrides) diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint new file mode 100644 index 00000000..a61ac1bf --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint @@ -0,0 +1,9 @@ +Observation.type = OBJECT +Plane.dataProductType = image +Plane.calibrationLevel = 2 +Chunk.position.axis.function.dimension.naxis1 = 1920 +Chunk.position.axis.function.dimension.naxis2 = 4608 +Chunk.position.axis.function.refCoord.coord1.pix = /header/wcs/crpix(0) +Chunk.position.axis.function.refCoord.coord2.pix = /header/wcs/crpix(1) +Chunk.position.axis.axis1.ctype = /header/wcs/ctype(0) +Chunk.position.axis.axis2.ctype = /header/wcs/ctype(1) diff --git a/caom2utils/caom2utils/tests/test_collections.py b/caom2utils/caom2utils/tests/test_collections.py index 24e6bc61..aca8babf 100644 --- a/caom2utils/caom2utils/tests/test_collections.py +++ b/caom2utils/caom2utils/tests/test_collections.py @@ -104,7 +104,8 @@ def test_differences(directory): prod_id = [p.product_id for p in expected.planes.values()][0] product_id = f'--productID {prod_id}' collection_id = expected.collection - data_files = _get_files(['header', 'png', 'gif', 'cat', 'fits'], directory) + data_files = _get_files( + ['header', 'png', 'gif', 'cat', 'fits', 'h5'], directory) assert data_files file_meta = _get_uris(collection_id, data_files, expected) @@ -230,6 +231,9 @@ def _get_cardinality(directory): elif 'apass/catalog' in directory: return '--lineage catalog/vos://cadc.nrc.ca!vospace/CAOMworkshop/' \ 'Examples/DAO/dao_c122_2016_012725.fits' + elif 'taos_' in directory: + return '--lineage star04239531/' \ + 'cadc:TAOSII/taos2_20220201T201317Z_star04239531.h5' else: return '' @@ -341,6 +345,7 @@ def _compare_observations(expected, actual, output_dir): msg = 'Differences found observation {} in {}\n{}'.\ format(expected.observation_id, output_dir, '\n'.join([r for r in result])) + _write_observation(actual) raise AssertionError(msg) else: logging.info('Observation {} in {} match'.format( @@ -355,5 +360,5 @@ def _read_observation(fname): def _write_observation(obs): writer = ObservationWriter(True, False, 'caom2', - 'http://www.opencadc.org/caom2/xml/v2.3') + 'http://www.opencadc.org/caom2/xml/v2.4') writer.write(obs, './x.xml') diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index 7df41f7f..d174a01a 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -665,7 +665,7 @@ def test_augment_value_errors(): test_parser.augment_plane(test_obs, 'cadc:TEST/abc.fits.gz') with pytest.raises(ValueError): - test_parser.augment_artifact(test_obs) + test_parser.augment_artifact(test_obs, 0) def test_get_from_list(): @@ -1583,6 +1583,25 @@ def test_parser_construction(vos_mock, stdout_mock): os.unlink(test_out_fqn) +def test_content_lookup_hdf5(): + from caom2utils import caom2blueprint + test_key = 'Chunk.position.axis.axis1.ctype' + test_path = '/header/wcs/ctype(0)' + test_blueprint = ObsBlueprint(position_axes=(1, 2)) + test_blueprint.set(test_key, test_path) + test_f_name = 'taos2_20220201T201317Z_star04239531.h5' + test_uri = f'cadc:TAOSII/{test_f_name}' + test_local_fqn = f'{TESTDATA_DIR}/taos_h5file/20220201T200117/{test_f_name}' + test_subject = caom2blueprint.HDF5Parser( + test_blueprint, test_uri, test_local_fqn, '/sitedata' + ) + assert len(test_subject._roots) == 3, 'wrong number of roots' + test_result = test_subject._content_lookup( + test_path, test_subject._roots[0] + ) + assert test_result == b'RA---TAN-SIP' + + def _get_local_headers(file_name): return _get_headers(file_name, None) From c91e642edcb95b85545bf1e8a7da27d0606977fa Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Mon, 28 Feb 2022 16:08:41 -0800 Subject: [PATCH 05/38] CADC-10808 - interim commit. --- caom2utils/caom2utils/caom2blueprint.py | 272 +++++++++++++----- .../20220201T200117/taos.blueprint | 12 + .../taos_h5file/20220201T200117/taosii.py | 34 +++ .../caom2utils/tests/test_collections.py | 6 +- 4 files changed, 255 insertions(+), 69 deletions(-) create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 2f7e04bc..81c2b083 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -1450,7 +1450,8 @@ def is_table(value): @staticmethod def is_function(value): return (not ObsBlueprint.needs_lookup(value) and isinstance(value, str) - and isinstance(value, str) and '(' in value and ')' in value) + and isinstance(value, str) and '()' in value) + # and isinstance(value, str) and '(' in value and ')' in value) @staticmethod def has_default_value(value): @@ -1547,6 +1548,13 @@ def __init__(self, position_axes=None, energy_axis=None, for key in tmp: self.set(key, tmp[key]) + # rules so far: + # - lookup value starting with // means rooted at base of the hdf5 file + # - lookup value starting with / means rooted at the base of the + # "find_roots_here" parameter for HDF5Parser + # - (integer) means return only the value with the index of "integer" + # from a list + class GenericParser: """ @@ -1787,7 +1795,6 @@ def _execute_external(self, value, key, extension): """ # determine which of the possible values for parameter the user # is hoping for - parameter = None if 'uri' in value: parameter = self.uri elif 'header' in value and isinstance(self, FitsParser): @@ -1796,8 +1803,9 @@ def _execute_external(self, value, key, extension): parameter = {'uri': self.uri, 'header': self._headers[extension]} else: - parameter = {'uri': self.uri, - 'header': None} + # parameter = {'uri': self.uri, + # 'header': None} + parameter = {'base': self._file} result = '' execute = None @@ -1941,22 +1949,10 @@ def augment_artifact(self, artifact, index): chunk.meta_producer = self._get_from_list( 'Chunk.metaProducer', index=0, current=chunk.meta_producer) - # NOTE: astropy.wcs does not distinguished between WCS axes and - # data array axes. naxis in astropy.wcs represents in fact the - # number of WCS axes, whereas chunk.axis represents the naxis - # of the data array. Solution is to determine it directly from - # the header - # if 'ZNAXIS' in header: - # chunk.naxis = _to_int(header['ZNAXIS']) - # elif 'NAXIS' in header: - # chunk.naxis = _to_int(header['NAXIS']) - # else: - # chunk.naxis = self._get_from_list('Chunk.naxis', 0, - # wcs_parser.wcs.wcs.naxis) self._get_chunk_naxis(chunk, index) if self.blueprint._pos_axes_configed: self._wcs_parser.augment_position(chunk) - logging.error(chunk.position) + # logging.error(chunk.position) if chunk.position is None: self._try_position_with_blueprint(chunk, index) if chunk.position: @@ -2215,10 +2211,10 @@ def _get_from_list(self, lookup, index, current=None): elif (keys is not None) and (keys != ''): value = keys - logging.error(f'here 2 {value}???') + # logging.error(f'here 2 {value}???') elif current: value = current - logging.error(f'here 3 {value}???') + # logging.error(f'here 3 {value}???') self.logger.debug(f'{lookup}: value is {value}') return value @@ -3383,9 +3379,10 @@ class HDF5Parser(BlueprintParser): def __init__( self, obs_blueprint, uri, local_f_name, find_roots_here='/sitedata' ): - super().__init__(obs_blueprint, uri) import h5py self._file = h5py.File(local_f_name, 'r') + logging.error(type(self._file)) + super().__init__(obs_blueprint, uri) self._wcs_parser = None self._roots = [] self.apply_blueprint() @@ -3402,29 +3399,38 @@ def _set_roots(self, root_name, root): self._set_roots(x, root) def augment_artifact(self, artifact, index=0): - for root in self._roots: - self._wcs_parser = Hdf5WcsParser(root, self.blueprint) - super().augment_artifact(artifact, index) + for i, root in enumerate(self._roots): + logging.error(f'root {root.name}') + self._wcs_parser = Hdf5WcsParser(root, self.blueprint, self._file) + super().augment_artifact(artifact, i) def _content_lookup(self, key, extension=None): bits = key.split('/') if isinstance(extension, int): extension = self._roots[extension] - logging.error(f'key {key} root.name {extension.name} bits {bits}') + # logging.error(f'key {key} root.name {extension.name} bits {bits}') if isinstance(key, list): return None if len(bits) == 2: - logging.error('path 1') + # logging.error('path 1') if '(' in bits[1]: - logging.error('path 2') + # logging.error('path 2') x = bits[1].split('(') - index = int(x[1].split(')')[0]) - logging.error(f'x {x} index {index}') - return extension[x[0]][index] + if ',' in x[1]: + logging.error('path 100') + a = x[1].split(')')[0].split(',') + if len(a) > 2: + raise NotImplementedError + y = extension[x[0]][int(a[0])][int(a[1])] + return y + else: + index = int(x[1].split(')')[0]) + # logging.error(f'x {x} index {index}') + return extension[x[0]][index] else: - logging.error('path 3') + # logging.error('path 3') return extension[bits[1]] else: # the 2 is because there's always a leading slash, so the @@ -3776,7 +3782,7 @@ def _get_ref_coord(self, index): def _get_spatial_axis(self, xindex, yindex): """Assemble the bits to make the axis parameter needed for SpatialWCS construction.""" - logging.error(f'xindex {xindex} yindex {yindex}') + # logging.error(f'xindex {xindex} yindex {yindex}') aug_dimension = self._get_dimension(xindex, yindex) aug_ref_coord = Coord2D(self._get_ref_coord(xindex), @@ -3812,10 +3818,13 @@ def _sanitize(self, value): :param value: :return: """ + import numpy if isinstance(value, float) and math.isnan(value): return None elif not str(value): return None # empty string + elif isinstance(value, numpy.bytes_): + return value.decode('utf-8') else: return value @@ -3869,7 +3878,7 @@ def _get_axis_length(self, for_axis): class Hdf5WcsParser(WcsParser): - def __init__(self, root, blueprint): + def __init__(self, root, blueprint, base): """ :param root: h5py.h5p.Dataset or h5py.h5p.Group :param blueprint: ObsBlueprint @@ -3882,9 +3891,10 @@ def __init__(self, root, blueprint): 'time': [0, False], 'energy': [0, False], 'polarization': [0, False], - 'obs': [0, False], + 'observable': [0, False], 'custom': [0, False], } + self._base = base self._set_wcs(root, blueprint) @property @@ -3908,17 +3918,11 @@ def _get_axis_index(self, keywords): elif 'STOKES' in keywords: result = self._axes['polarization'][0] elif 'FLUX' in keywords: - result = self._axes['obs'][0] + result = self._axes['observable'][0] return result def _get_axis_length(self, for_axis): - logging.error(f'{for_axis} {self._wcs.array_shape}') - # this is fucking broken, just trying to figure out what the indicees - # should really be right now - # index = 1 - # if for_axis == 2: - # index = 0 - # return self._wcs.array_shape[index] + # logging.error(f'{for_axis} {self._wcs.array_shape}') return self._wcs.array_shape[for_axis-1] def _set_wcs(self, root, blueprint): @@ -3942,8 +3946,8 @@ def _set_wcs(self, root, blueprint): self._axes['polarization'][0] = count count += 1 if blueprint._obs_axis_configed: - self._axes['obs'][1] = True - self._axes['obs'][0] = count + self._axes['observable'][1] = True + self._axes['observable'][0] = count count += 1 if blueprint._custom_axis_configed: self._axes['custom'][1] = True @@ -3952,10 +3956,10 @@ def _set_wcs(self, root, blueprint): self._wcs = WCS(naxis=count) z = [ - _to_str(self._xx_lookup( + self._sanitize(self._xx_lookup( blueprint._get('Chunk.position.axis.axis1.ctype'), root )) if self._axes['ra'][1] else None, - _to_str(self._xx_lookup( + self._sanitize(self._xx_lookup( blueprint._get('Chunk.position.axis.axis2.ctype'), root )) if self._axes['dec'][1] else None, self._xx_lookup( @@ -3969,13 +3973,37 @@ def _set_wcs(self, root, blueprint): ) if self._axes['polarization'][1] else None, self._xx_lookup( blueprint._get('Chunk.observable.axis.axis.ctype'), root - ) if self._axes['obs'][1] else None, + ) if self._axes['observable'][1] else None, self._xx_lookup( blueprint._get('Chunk.custom.axis.axis.ctype'), root ) if self._axes['custom'][1] else None, ] - self._wcs.wcs.ctype = z[:count] + z = [ + self._sanitize(self._xx_lookup( + blueprint._get('Chunk.position.axis.axis1.cunit'), root + )) if self._axes['ra'][1] else None, + self._sanitize(self._xx_lookup( + blueprint._get('Chunk.position.axis.axis2.cunit'), root + )) if self._axes['dec'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.time.axis.axis.cunit'), root + ) if self._axes['time'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.energy.axis.axis.cunit'), root + ) if self._axes['energy'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.polarization.axis.axis.cunit'), root + ) if self._axes['polarization'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.observable.axis.axis.cunit'), root + ) if self._axes['observable'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.custom.axis.axis.cunit'), root + ) if self._axes['custom'][1] else None, + ] + self._wcs.wcs.cunit = z[:count] + z = [ self._xx_lookup( blueprint._get( @@ -3996,7 +4024,7 @@ def _set_wcs(self, root, blueprint): ) if self._axes['polarization'][1] else 0, self._xx_lookup( blueprint._get('Chunk.observable.dependent.bin'), root - ) if self._axes['obs'][1] else 0, + ) if self._axes['observable'][1] else 0, self._xx_lookup( blueprint._get('Chunk.custom.axis.function.naxis'), root ) if self._axes['custom'][1] else 0, @@ -4023,19 +4051,93 @@ def _set_wcs(self, root, blueprint): ) if self._axes['polarization'][1] else None, self._xx_lookup( blueprint._get('Chunk.observable.axis.function.refCoord.pix'), root - ) if self._axes['obs'][1] else None, + ) if self._axes['observable'][1] else None, self._xx_lookup( blueprint._get('Chunk.custom.axis.function.refCoord.pix'), root ) if self._axes['custom'][1] else None, ] self._wcs.wcs.crpix = z[:count] + z = [ + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord1.val'), root + ) if self._axes['ra'][1] else None, + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord2.val'), root + ) if self._axes['dec'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.time.axis.function.refCoord.val'), root + ) if self._axes['time'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.energy.axis.function.refCoord.val'), root + ) if self._axes['energy'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.polarization.axis.function.refCoord.val'), root + ) if self._axes['polarization'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.observable.axis.function.refCoord.val'), root + ) if self._axes['observable'][1] else None, + self._xx_lookup( + blueprint._get('Chunk.custom.axis.function.refCoord.val'), root + ) if self._axes['custom'][1] else None, + ] + self._wcs.wcs.crval = z[:count] + + # row first + z = [ + [ + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.cd11'), root + ) if self._axes['ra'][1] else 0.0, + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.cd12'), root + ) if self._axes['dec'][1] else 0.0, + ], + [ + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.cd21'), root + ) if self._axes['ra'][1] else 0.0, + self._xx_lookup( + blueprint._get( + 'Chunk.position.axis.function.cd22'), root + ) if self._axes['dec'][1] else 0.0, + ], + ] + # self._xx_lookup( + # blueprint._get('Chunk.time.axis.function.cd33'), root + # ) if self._axes['time'][1] else None, + # self._xx_lookup( + # blueprint._get('Chunk.energy.axis.function.cd44'), root + # ) if self._axes['energy'][1] else None, + # self._xx_lookup( + # blueprint._get('Chunk.polarization.axis.function.cd55'), root + # ) if self._axes['polarization'][1] else None, + # self._xx_lookup( + # blueprint._get('Chunk.observable.axis.function.cd66'), root + # ) if self._axes['observable'][1] else None, + # self._xx_lookup( + # blueprint._get('Chunk.custom.axis.function.cd77'), root + # ) if self._axes['custom'][1] else None, + # ] + self._wcs.wcs.cd = z[:count][:count] + self._wcs.wcs.equinox = self._xx_lookup( + blueprint._get('Chunk.position.equinox'), self._base + ) if self._axes['ra'][1] else None + def _xx_lookup(self, key, root): - logging.error(f'key {key} root name {root.name}') + # logging.error(f'key {key} root name {root.name}') if key is None: raise NotImplementedError + if key.startswith('//'): + key = key.replace('//', '/') + root = self._base if isinstance(key, tuple): - logging.error('path 10') + # logging.error('path 10') result = None for ii in key[0]: result = self._xx_lookup(ii, root) @@ -4045,33 +4147,68 @@ def _xx_lookup(self, key, root): return result else: if key.startswith('/'): - logging.error('path 15') + # logging.error('path 15') bits = key.split('/') - logging.error(f'key {key} root.name {root.name} bits {bits}') + # logging.error(f'key {key} root.name {root.name} bits {bits}') if len(bits) == 2: - logging.error('path 11') + # logging.error('path 11') if '(' in bits[1]: - logging.error('path 12') + # logging.error('path 12') x = bits[1].split('(') - index = int(x[1].split(')')[0]) - logging.error(f'x {x[0]} index {index}') - y = root[x[0]][index] - logging.error(y) - return y + if ',' in x[1]: + # logging.error('path 19') + a = x[1].split(')')[0].split(',') + if len(a) > 2: + raise NotImplementedError + y = root[x[0]][int(a[0])][int(a[1])] + return y + else: + # logging.error(f'path 20 {x}') + index = int(x[1].split(')')[0]) + # logging.error(f'x {x[0]} index {index}') + y = root[x[0]][index] + # logging.error(y) + return y else: - logging.error('path 13') - return str(root[bits[1]]) + # logging.error('path 13') + return root[bits[1]] else: - logging.error('path 14') # the 2 is because there's always a leading slash, so the # first bit is an empty string temp = f'/{"/".join(ii for ii in bits[2:])}' + # logging.error(f'path 14 y{temp}y x{bits}x') return self._xx_lookup(temp, root[bits[1]]) else: # a value has been set - logging.error('path 17') + # logging.error(f'path 17 {key}') return key + def _get_z(self, blueprint, root, default): + z = [ + self._xx_lookup( + blueprint._get('Chunk.position.axis.axis1.cunit'), root + ) if self._axes['ra'][1] else default, + self._xx_lookup( + blueprint._get('Chunk.position.axis.axis2.cunit'), root + ) if self._axes['dec'][1] else default, + self._xx_lookup( + blueprint._get('Chunk.time.axis.axis.cunit'), root + ) if self._axes['time'][1] else default, + self._xx_lookup( + blueprint._get('Chunk.energy.axis.axis.cunit'), root + ) if self._axes['energy'][1] else default, + self._xx_lookup( + blueprint._get('Chunk.polarization.axis.axis.cunit'), root + ) if self._axes['polarization'][1] else default, + self._xx_lookup( + blueprint._get('Chunk.observable.axis.axis.cunit'), root + ) if self._axes['observable'][1] else default, + self._xx_lookup( + blueprint._get('Chunk.custom.axis.axis.cunit'), root + ) if self._axes['custom'][1] else default, + ] + return z + def _to_str(value): return str(value).strip() if value is not None else None @@ -4470,7 +4607,10 @@ def caom2gen(): blueprints = {} if len(args.blueprint) == 1: # one blueprint to rule them all - if '.h5' in args.lineage: + # logging.error(f'one blueprint {args.lineage} {type(args.lineage)}') + temp = ' '.join(ii for ii in args.lineage) + if '.h5' in temp: + # logging.error('picking the correct one') blueprint = Hdf5ObsBlueprint(module=module) else: blueprint = ObsBlueprint(module=module) @@ -4492,7 +4632,7 @@ def caom2gen(): for i, cardinality in enumerate(args.lineage): product_id, uri = _extract_ids(cardinality) - logging.debug('Loading blueprint for {} from {}'.format( + logging.error('Loading blueprint for {} from {}'.format( uri, args.blueprint[i])) if '.h5' in uri: blueprint = Hdf5ObsBlueprint(module=module) diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint index a61ac1bf..72a887db 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint @@ -1,9 +1,21 @@ Observation.type = OBJECT +Observation.target_position.point.cval1 = get_target_position_cval1() +Observation.target_position.point.cval2 = get_target_position_cval2() +Observation.target_position.equinox = //header/object/epoch Plane.dataProductType = image Plane.calibrationLevel = 2 Chunk.position.axis.function.dimension.naxis1 = 1920 Chunk.position.axis.function.dimension.naxis2 = 4608 Chunk.position.axis.function.refCoord.coord1.pix = /header/wcs/crpix(0) +Chunk.position.axis.function.refCoord.coord1.val = /header/wcs/crval(0) Chunk.position.axis.function.refCoord.coord2.pix = /header/wcs/crpix(1) +Chunk.position.axis.function.refCoord.coord2.val = /header/wcs/crval(1) Chunk.position.axis.axis1.ctype = /header/wcs/ctype(0) +Chunk.position.axis.axis1.cunit = /header/wcs/cunit(0) Chunk.position.axis.axis2.ctype = /header/wcs/ctype(1) +Chunk.position.axis.axis2.cunit = /header/wcs/cunit(1) +Chunk.position.axis.function.cd11 = /header/wcs/cd(0,0) +Chunk.position.axis.function.cd12 = /header/wcs/cd(0,1) +Chunk.position.axis.function.cd21 = /header/wcs/cd(1,0) +Chunk.position.axis.function.cd22 = /header/wcs/cd(1,1) +Chunk.position.equinox = //header/object/epoch diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py new file mode 100644 index 00000000..4fbf9c86 --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py @@ -0,0 +1,34 @@ +from astropy import units +from astropy.coordinates import SkyCoord + + +def get_target_position_cval1(base): + ra, dec_ignore = _get_target_position(base) + return ra + + +def get_target_position_cval2(base): + ra_ignore, dec = _get_target_position(base) + return dec + + +def _get_target_position(base): + import logging + b = base.get('base') + try: + ra = b['header']['object']['obj_ra'] + dec = b['header']['object']['obj_dec'] + logging.error(f'{ra} {dec}') + result = SkyCoord( + ra.decode('utf-8'), + dec.decode('utf-8'), + frame='icrs', + unit=(units.hourangle, units.deg), + ) + return result.ra.degree, result.dec.degree + except Exception as e: + import logging + import traceback + logging.error(e) + logging.error(traceback.format_exc()) + raise e diff --git a/caom2utils/caom2utils/tests/test_collections.py b/caom2utils/caom2utils/tests/test_collections.py index aca8babf..59964286 100644 --- a/caom2utils/caom2utils/tests/test_collections.py +++ b/caom2utils/caom2utils/tests/test_collections.py @@ -342,9 +342,9 @@ def _compare_observations(expected, actual, output_dir): result = get_differences(expected, actual, 'Observation') if result: - msg = 'Differences found observation {} in {}\n{}'.\ - format(expected.observation_id, - output_dir, '\n'.join([r for r in result])) + tmp = '\n'.join([r for r in result]) + msg = f'Differences found observation {expected.observation_id} in ' \ + f'{output_dir}\n{tmp}' _write_observation(actual) raise AssertionError(msg) else: From 63ba5ebb185d9b52127407a70f01bbfa1681c461 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Tue, 1 Mar 2022 17:23:38 -0800 Subject: [PATCH 06/38] CADC-10809 - interim commit. --- caom2utils/caom2utils/caom2blueprint.py | 45 +++++++++++++------ .../20220201T200117/taos.blueprint | 40 ++++++++++------- .../taos_h5file/20220201T200117/taosii.py | 8 +++- 3 files changed, 63 insertions(+), 30 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 81c2b083..6193c0e9 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -1554,6 +1554,9 @@ def __init__(self, position_axes=None, energy_axis=None, # "find_roots_here" parameter for HDF5Parser # - (integer) means return only the value with the index of "integer" # from a list + # - (integer:integer) means return only the value with the index of + # "integer" from a list, followed by "integer" from the list in the + # list class GenericParser: @@ -1618,8 +1621,6 @@ def augment_observation(self, observation, artifact_uri, product_id=None): 'Observation.metaRelease', index=0, current=observation.meta_release ) - logging.error(f'{temp}!!!!!!!!!!!!!') - logging.error(f'{self._get_from_list}!!!!!!!!!!!!!') observation.meta_release = self._get_datetime(self._get_from_list( 'Observation.metaRelease', index=0, current=observation.meta_release)) @@ -2155,6 +2156,8 @@ def _get_from_list(self, lookup, index, current=None): value = None try: keys = self.blueprint._get(lookup) + if 'equinox' in lookup: + logging.error(f'path 1007 {keys}') except KeyError: self.add_error(lookup, sys.exc_info()[1]) self.logger.debug( @@ -3381,12 +3384,13 @@ def __init__( ): import h5py self._file = h5py.File(local_f_name, 'r') - logging.error(type(self._file)) + # logging.error(type(self._file)) super().__init__(obs_blueprint, uri) self._wcs_parser = None self._roots = [] self.apply_blueprint() self._set_roots(find_roots_here, self._file) + # logging.error(self._blueprint) def _set_roots(self, root_name, root): bits = root_name.split('/') @@ -3400,12 +3404,11 @@ def _set_roots(self, root_name, root): def augment_artifact(self, artifact, index=0): for i, root in enumerate(self._roots): - logging.error(f'root {root.name}') + # logging.error(f'root {root.name}') self._wcs_parser = Hdf5WcsParser(root, self.blueprint, self._file) super().augment_artifact(artifact, i) def _content_lookup(self, key, extension=None): - bits = key.split('/') if isinstance(extension, int): extension = self._roots[extension] @@ -3413,14 +3416,18 @@ def _content_lookup(self, key, extension=None): if isinstance(key, list): return None + if key.startswith('//'): + key = key.replace('//', '/') + extension = self._file + bits = key.split('/') if len(bits) == 2: # logging.error('path 1') if '(' in bits[1]: # logging.error('path 2') x = bits[1].split('(') - if ',' in x[1]: - logging.error('path 100') - a = x[1].split(')')[0].split(',') + if ':' in x[1]: + # logging.error('path 100') + a = x[1].split(')')[0].split(':') if len(a) > 2: raise NotImplementedError y = extension[x[0]][int(a[0])][int(a[1])] @@ -3436,7 +3443,7 @@ def _content_lookup(self, key, extension=None): # the 2 is because there's always a leading slash, so the # first bit is an empty string temp = f'/{"/".join(ii for ii in bits[2:])}' - logging.error(f'path 4 {temp} {bits[2:]}') + # logging.error(f'path 4 {temp} {bits[2:]}') return self._content_lookup(temp, extension[bits[1]]) def _get_chunk_naxis(self, chunk, index): @@ -4129,13 +4136,20 @@ def _set_wcs(self, root, blueprint): blueprint._get('Chunk.position.equinox'), self._base ) if self._axes['ra'][1] else None + if blueprint._time_axis_configed: + self._wcs.wcs.mjdstart = self._xx_lookup( + blueprint._get('Chunk.time.axis.range.start.val') + ) + self._wcs.wcs.mjdend = self._xx_lookup( + blueprint._get('Chunk.time.axis.range.end.val') + ) + + # logging.error(f'{self._wcs.wcs}') + def _xx_lookup(self, key, root): # logging.error(f'key {key} root name {root.name}') if key is None: raise NotImplementedError - if key.startswith('//'): - key = key.replace('//', '/') - root = self._base if isinstance(key, tuple): # logging.error('path 10') result = None @@ -4146,6 +4160,9 @@ def _xx_lookup(self, key, root): result = key[1] return result else: + if key.startswith('//'): + key = key.replace('//', '/') + root = self._base if key.startswith('/'): # logging.error('path 15') bits = key.split('/') @@ -4155,9 +4172,9 @@ def _xx_lookup(self, key, root): if '(' in bits[1]: # logging.error('path 12') x = bits[1].split('(') - if ',' in x[1]: + if ':' in x[1]: # logging.error('path 19') - a = x[1].split(')')[0].split(',') + a = x[1].split(')')[0].split(':') if len(a) > 2: raise NotImplementedError y = root[x[0]][int(a[0])][int(a[1])] diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint index 72a887db..f07370fc 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint @@ -1,21 +1,31 @@ +Observation.sequenceNumber = [//header/run/run_seq] Observation.type = OBJECT +Observation.target.name = [//header/object/obj_id] Observation.target_position.point.cval1 = get_target_position_cval1() Observation.target_position.point.cval2 = get_target_position_cval2() -Observation.target_position.equinox = //header/object/epoch -Plane.dataProductType = image +Observation.target_position.coordsys = FK5 +Observation.target_position.equinox = [//header/object/epoch] +Plane.dataProductType = timeseries Plane.calibrationLevel = 2 Chunk.position.axis.function.dimension.naxis1 = 1920 Chunk.position.axis.function.dimension.naxis2 = 4608 -Chunk.position.axis.function.refCoord.coord1.pix = /header/wcs/crpix(0) -Chunk.position.axis.function.refCoord.coord1.val = /header/wcs/crval(0) -Chunk.position.axis.function.refCoord.coord2.pix = /header/wcs/crpix(1) -Chunk.position.axis.function.refCoord.coord2.val = /header/wcs/crval(1) -Chunk.position.axis.axis1.ctype = /header/wcs/ctype(0) -Chunk.position.axis.axis1.cunit = /header/wcs/cunit(0) -Chunk.position.axis.axis2.ctype = /header/wcs/ctype(1) -Chunk.position.axis.axis2.cunit = /header/wcs/cunit(1) -Chunk.position.axis.function.cd11 = /header/wcs/cd(0,0) -Chunk.position.axis.function.cd12 = /header/wcs/cd(0,1) -Chunk.position.axis.function.cd21 = /header/wcs/cd(1,0) -Chunk.position.axis.function.cd22 = /header/wcs/cd(1,1) -Chunk.position.equinox = //header/object/epoch +Chunk.position.axis.function.refCoord.coord1.pix = [/header/wcs/crpix(0)] +Chunk.position.axis.function.refCoord.coord1.val = [/header/wcs/crval(0)] +Chunk.position.axis.function.refCoord.coord2.pix = [/header/wcs/crpix(1)] +Chunk.position.axis.function.refCoord.coord2.val = [/header/wcs/crval(1)] +Chunk.position.axis.axis1.ctype = [/header/wcs/ctype(0)] +Chunk.position.axis.axis1.cunit = [/header/wcs/cunit(0)] +Chunk.position.axis.axis2.ctype = [/header/wcs/ctype(1)] +Chunk.position.axis.axis2.cunit = [/header/wcs/cunit(1)] +Chunk.position.axis.function.cd11 = [/header/wcs/cd(0:0)] +Chunk.position.axis.function.cd12 = [/header/wcs/cd(0:1)] +Chunk.position.axis.function.cd21 = [/header/wcs/cd(1:0)] +Chunk.position.axis.function.cd22 = [/header/wcs/cd(1:1)] +Chunk.position.equinox = [//header/object/epoch] + +Chunk.time.axis.axis.ctype = TIME +Chunk.time.axis.axis.cunit = s +Chunk.time.axis.range.start.pix = 0 +Chunk.time.axis.range.start.val = [//header/timeseries/mjdrunstart] +Chunk.time.axis.range.end.pix = get_time_axis_range_end() +Chunk.time.axis.range.end.val = [//header/timeseries/mjdrunend] diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py index 4fbf9c86..678870c2 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py @@ -18,7 +18,7 @@ def _get_target_position(base): try: ra = b['header']['object']['obj_ra'] dec = b['header']['object']['obj_dec'] - logging.error(f'{ra} {dec}') + # logging.error(f'{ra} {dec}') result = SkyCoord( ra.decode('utf-8'), dec.decode('utf-8'), @@ -32,3 +32,9 @@ def _get_target_position(base): logging.error(e) logging.error(traceback.format_exc()) raise e + + +def get_time_axis_range_end(base): + b = base.get('base') + x = b['header']['timeseries']['numepochs'] + return x - 1 From b7c0b7b8e8cad7c791c825cf4c96f2f394a40c66 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 10 Mar 2022 16:31:41 -0800 Subject: [PATCH 07/38] CADC-10809 - interim commit - have one CAOM2 record for an HDF5 file. --- caom2utils/caom2utils/caom2blueprint.py | 984 ++++++++++++------ .../20220201T200117/taos.blueprint | 14 + .../taos_h5file/20220201T200117/taosii.py | 10 + .../caom2utils/tests/test_fits2caom2.py | 41 +- 4 files changed, 721 insertions(+), 328 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 6193c0e9..06849e36 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -71,6 +71,7 @@ from logging.handlers import TimedRotatingFileHandler import math +import numpy from astropy.wcs import Wcsprm, WCS from astropy.io import fits from astropy.time import Time @@ -112,11 +113,12 @@ APP_NAME = 'caom2gen' -__all__ = ['FitsParser', 'FitsWcsParser', 'DispatchingFormatter', - 'ObsBlueprint', 'get_arg_parser', 'proc', +__all__ = ['BlueprintParser', 'FitsParser', 'FitsWcsParser', + 'DispatchingFormatter', 'ObsBlueprint', 'get_arg_parser', 'proc', 'POLARIZATION_CTYPES', 'gen_proc', 'get_gen_proc_arg_parser', 'GenericParser', 'augment', 'get_vos_headers', - 'get_external_headers', 'update_artifact_meta'] + 'get_external_headers', 'HDF5Parser', 'Hdf5ObsBlueprint', + 'Hdf5WcsParser', 'update_artifact_meta'] CUSTOM_CTYPES = [ 'RM', @@ -1450,8 +1452,9 @@ def is_table(value): @staticmethod def is_function(value): return (not ObsBlueprint.needs_lookup(value) and isinstance(value, str) - and isinstance(value, str) and '()' in value) - # and isinstance(value, str) and '(' in value and ')' in value) + # and isinstance(value, str) and '()' in value) + and isinstance(value, str) and '(' in value and ')' in value + and '/' not in value) @staticmethod def has_default_value(value): @@ -1558,6 +1561,271 @@ def __init__(self, position_axes=None, energy_axis=None, # "integer" from a list, followed by "integer" from the list in the # list + def configure_custom_axis(self, axis, override=True): + """ + Set the expected FITS custom keywords by index in the blueprint + and the wcs_std lookup. + + :param axis: The index expected for the custom axis. + :param override: Set to False when reading from a file. + :return: + """ + if self._custom_axis_configed: + self.logger.debug( + 'Attempt to configure already-configured custom axis.') + return + + if override: + self.set('Chunk.custom.axis.axis.ctype', ([], None)) + self.set('Chunk.custom.axis.axis.cunit', ([], None)) + self.set('Chunk.custom.axis.function.naxis', ([], None)) + self.set('Chunk.custom.axis.function.delta', ([], None)) + self.set('Chunk.custom.axis.function.refCoord.pix', ([], None)) + self.set('Chunk.custom.axis.function.refCoord.val', ([], None)) + + # TODO - what goes here? + self._wcs_std['Chunk.custom.axis.axis.ctype'] = '' + self._wcs_std['Chunk.custom.axis.axis.cunit'] = '' + self._wcs_std['Chunk.custom.axis.function.naxis'] = '' + self._wcs_std['Chunk.custom.axis.function.delta'] = '' + self._wcs_std['Chunk.custom.axis.function.refCoord.pix'] = '' + self._wcs_std['Chunk.custom.axis.function.refCoord.val'] = '' + self._custom_axis_configed = True + + def configure_position_axes(self, axes, override=True): + """ + Set the expected FITS spatial keywords by indices in the blueprint and + the wcs_std lookup. + + :param axes: The index expected for the position axes. + :return: + """ + logging.error('called??????') + if self._pos_axes_configed: + self.logger.error( + 'Attempt to configure already-configured position axes.') + return + + if override: + logging.error('yes, no maybe?') + self.set('Chunk.position.coordsys', ([], None)) + self.set('Chunk.position.equinox', ([], None)) + self.set('Chunk.position.axis.axis1.ctype', ([], '')) + self.set('Chunk.position.axis.axis1.cunit', ([], None)) + self.set('Chunk.position.axis.axis2.ctype', ([], None)) + self.set('Chunk.position.axis.axis2.cunit', ([], None)) + self.set('Chunk.position.axis.error1.syser', ([], None)) + self.set('Chunk.position.axis.error1.rnder', ([], None)) + self.set('Chunk.position.axis.error2.syser', ([], None)) + self.set('Chunk.position.axis.error2.rnder', ([], None)) + self.set('Chunk.position.axis.function.cd11', ([], None)) + self.set('Chunk.position.axis.function.cd12', ([], None)) + self.set('Chunk.position.axis.function.cd21', ([], None)) + self.set('Chunk.position.axis.function.cd22', ([], None)) + self.set('Chunk.position.axis.function.dimension.naxis1', + ([], None)) + self.set('Chunk.position.axis.function.dimension.naxis2', + ([], None)) + self.set('Chunk.position.axis.function.refCoord.coord1.pix', + ([], None)) + self.set('Chunk.position.axis.function.refCoord.coord1.val', + ([], None)) + self.set('Chunk.position.axis.function.refCoord.coord2.pix', + ([], None)) + self.set('Chunk.position.axis.function.refCoord.coord2.val', + ([], None)) + + self._wcs_std['Chunk.position.coordsys'] = '' + self._wcs_std['Chunk.position.equinox'] = '' + + self._wcs_std['Chunk.position.axis.axis1.ctype'] = '' + self._wcs_std['Chunk.position.axis.axis1.cunit'] = '' + self._wcs_std['Chunk.position.axis.axis2.ctype'] = '' + self._wcs_std['Chunk.position.axis.axis2.cunit'] = '' + self._wcs_std['Chunk.position.axis.error1.syser'] = '' + self._wcs_std['Chunk.position.axis.error1.rnder'] = '' + self._wcs_std['Chunk.position.axis.error2.syser'] = '' + self._wcs_std['Chunk.position.axis.error2.rnder'] = '' + self._wcs_std['Chunk.position.axis.function.cd11'] = '' + self._wcs_std['Chunk.position.axis.function.cd12'] = '' + self._wcs_std['Chunk.position.axis.function.cd21'] = '' + self._wcs_std['Chunk.position.axis.function.cd22'] = '' + self._wcs_std['Chunk.position.axis.function.dimension.naxis1'] = '' + self._wcs_std['Chunk.position.axis.function.dimension.naxis2'] = '' + self._wcs_std['Chunk.position.axis.function.refCoord.coord1.pix'] = '' + self._wcs_std['Chunk.position.axis.function.refCoord.coord1.val'] = '' + self._wcs_std['Chunk.position.axis.function.refCoord.coord2.pix'] = '' + self._wcs_std['Chunk.position.axis.function.refCoord.coord2.val'] = '' + + self._pos_axes_configed = True + + def configure_energy_axis(self, axis, override=True): + """ + :param axis: The index expected for the energy axis. + :param override: Set to False when reading from a file. + :return: + """ + if self._energy_axis_configed: + self.logger.debug( + 'Attempt to configure already-configured energy axis.') + return + + if override: + self.set('Chunk.energy.specsys', ([], None)) + self.set('Chunk.energy.ssysobs', ([], None)) + self.set('Chunk.energy.restfrq', ([], None)) + self.set('Chunk.energy.restwav', ([], None)) + self.set('Chunk.energy.velosys', ([], None)) + self.set('Chunk.energy.zsource', ([], None)) + self.set('Chunk.energy.ssyssrc', ([], None)) + self.set('Chunk.energy.velang', ([], None)) + + self.set('Chunk.energy.bandpassName', ([], None)) + self.set('Chunk.energy.resolvingPower', ([], None)) + + self.set('Chunk.energy.axis.axis.ctype', ([], None)) + self.set('Chunk.energy.axis.axis.cunit', ([], None)) + self.set('Chunk.energy.axis.error.syser', ([], None)) + self.set('Chunk.energy.axis.error.rnder', ([], None)) + self.set('Chunk.energy.axis.function.naxis', ([], None)) + self.set('Chunk.energy.axis.function.delta', ([], None)) + self.set('Chunk.energy.axis.function.refCoord.pix', ([], None)) + self.set('Chunk.energy.axis.function.refCoord.val', ([], None)) + + self._wcs_std['Chunk.energy.specsys'] = '' + self._wcs_std['Chunk.energy.ssysobs'] = '' + self._wcs_std['Chunk.energy.restfrq'] = '' + self._wcs_std['Chunk.energy.restwav'] = '' + self._wcs_std['Chunk.energy.velosys'] = '' + self._wcs_std['Chunk.energy.zsource'] = '' + self._wcs_std['Chunk.energy.ssyssrc'] = '' + self._wcs_std['Chunk.energy.velang'] = '' + + self._wcs_std['Chunk.energy.axis.axis.ctype'] = '' + self._wcs_std['Chunk.energy.axis.axis.cunit'] = '' + self._wcs_std['Chunk.energy.axis.error.syser'] = '' + self._wcs_std['Chunk.energy.axis.error.rnder'] = '' + self._wcs_std['Chunk.energy.axis.function.naxis'] = '' + self._wcs_std['Chunk.energy.axis.function.delta'] = '' + self._wcs_std['Chunk.energy.axis.function.refCoord.pix'] = '' + self._wcs_std['Chunk.energy.axis.function.refCoord.val'] = '' + self._energy_axis_configed = True + + def configure_polarization_axis(self, axis, override=True): + """ + Set the expected FITS polarization keywords by index in the blueprint + and the wcs_std lookup. + + :param axis: The index expected for the polarization axis. + :param override: Set to False when reading from a file. + :return: + """ + if self._polarization_axis_configed: + self.logger.debug( + 'Attempt to configure already-configured polarization axis.') + return + + if override: + # STOKES is the only value allowed for PolarizationWCS ctype. + self.set('Chunk.polarization.axis.axis.ctype', ([], 'STOKES')) + self.set('Chunk.polarization.axis.axis.cunit', ([], None)) + self.set('Chunk.polarization.axis.function.naxis', ([], None)) + self.set('Chunk.polarization.axis.function.delta', ([], None)) + self.set('Chunk.polarization.axis.function.refCoord.pix', + ([], None)) + self.set('Chunk.polarization.axis.function.refCoord.val', + ([], None)) + + self._wcs_std['Chunk.polarization.axis.axis.ctype'] = '' + self._wcs_std['Chunk.polarization.axis.axis.cunit'] = '' + self._wcs_std['Chunk.polarization.axis.function.naxis'] = '' + self._wcs_std['Chunk.polarization.axis.function.delta'] = '' + self._wcs_std['Chunk.polarization.axis.function.refCoord.pix'] = '' + self._wcs_std['Chunk.polarization.axis.function.refCoord.val'] = '' + + self._polarization_axis_configed = True + + def configure_observable_axis(self, axis, override=True): + """ + Set the expected FITS observable keywords by index in the blueprint + and the wcs_std lookup. + Note: observable axis is not a standard WCS and it's not used by + astropy.wcs so, arguably, it can be removed. It is here for now for + consistency purposes. + :param axis: The index expected for the observable axis. + :param override: Set to False when reading from a file. + :return: + """ + if self._obs_axis_configed: + self.logger.debug( + 'Attempt to configure already-configured observable axis.') + return + + if override: + self.set('Chunk.observable.axis.axis.ctype', ([], None)) + self.set('Chunk.observable.axis.axis.cunit', ([], None)) + self.set('Chunk.observable.axis.function.refCoord.pix', ([], None)) + + self._wcs_std['Chunk.observable.axis.axis.ctype'] = '' + self._wcs_std['Chunk.observable.axis.axis.cunit'] = '' + self._wcs_std['Chunk.observable.axis.function.refCoord.pix'] = '' + + self._obs_axis_configed = True + + def configure_time_axis(self, axis, override=True): + """ + Set the expected FITS time keywords by index in the blueprint and + the wcs_std lookup. + + :param axis: The index expected for the time axis. + :param override: Set to False when reading from a file. + :return: + """ + if self._time_axis_configed: + self.logger.debug( + 'Attempt to configure already-configured time axis.') + return + + if override: + self.set('Chunk.time.exposure', ([], None)) + self.set('Chunk.time.timesys', ([], None)) + self.set('Chunk.time.trefpos', ([], None)) + self.set('Chunk.time.mjdref', ([], None)) + self.set('Chunk.time.resolution', ([], None)) + self.set('Chunk.time.axis.axis.ctype', ([], None)) + self.set('Chunk.time.axis.axis.cunit', ([], None)) + self.set('Chunk.time.axis.error.syser', ([], None)) + self.set('Chunk.time.axis.error.rnder', ([], None)) + self.set('Chunk.time.axis.function.naxis', ([], None)) + self.set('Chunk.time.axis.function.delta', ([], None)) + self.set('Chunk.time.axis.function.refCoord.pix', ([], None)) + self.set('Chunk.time.axis.function.refCoord.val', ([], None)) + + self._wcs_std['Chunk.time.exposure'] = '' + self._wcs_std['Chunk.time.resolution'] = '' + self._wcs_std['Chunk.time.timesys'] = '' + self._wcs_std['Chunk.time.trefpos'] = '' + self._wcs_std['Chunk.time.mjdref'] = '' + + self._wcs_std['Chunk.time.axis.axis.ctype'] = \ + f'CTYPE{axis}' + self._wcs_std['Chunk.time.axis.axis.cunit'] = \ + f'CUNIT{axis}' + self._wcs_std['Chunk.time.axis.error.syser'] = \ + f'CSYER{axis}' + self._wcs_std['Chunk.time.axis.error.rnder'] = \ + f'CRDER{axis}' + self._wcs_std['Chunk.time.axis.function.naxis'] = \ + f'NAXIS{axis}' + self._wcs_std['Chunk.time.axis.function.delta'] = \ + f'CDELT{axis}' + self._wcs_std['Chunk.time.axis.function.refCoord.pix'] = \ + f'CRPIX{axis}' + self._wcs_std['Chunk.time.axis.function.refCoord.val'] = \ + f'CRVAL{axis}' + + self._time_axis_configed = True + class GenericParser: """ @@ -1804,9 +2072,11 @@ def _execute_external(self, value, key, extension): parameter = {'uri': self.uri, 'header': self._headers[extension]} else: - # parameter = {'uri': self.uri, - # 'header': None} - parameter = {'base': self._file} + if hasattr(self, '_file'): + parameter = {'base': self._file} + else: + parameter = {'uri': self.uri, + 'header': None} result = '' execute = None @@ -1823,8 +2093,8 @@ def _execute_external(self, value, key, extension): try: result = execute(parameter) logging.debug( - 'Key {} calculated value of {} using {}'.format( - key, result, value)) + f'Key {key} calculated value of {result} using {value} type ' + f'{type(result)}') except Exception as e: msg = 'Failed to execute {} for {} in {}'.format( execute.__name__, key, self.uri) @@ -2156,8 +2426,8 @@ def _get_from_list(self, lookup, index, current=None): value = None try: keys = self.blueprint._get(lookup) - if 'equinox' in lookup: - logging.error(f'path 1007 {keys}') + # if 'equinox' in lookup: + # logging.error(f'path 1007 {keys}') except KeyError: self.add_error(lookup, sys.exc_info()[1]) self.logger.debug( @@ -2174,7 +2444,7 @@ def _get_from_list(self, lookup, index, current=None): for ii in keys[0]: try: value = self._content_lookup(ii, index) - logging.error(value) + # logging.error(value) if value: self.logger.debug( f'{lookup}: assigned value {value} based on ' @@ -2213,7 +2483,10 @@ def _get_from_list(self, lookup, index, current=None): value = current elif (keys is not None) and (keys != ''): - value = keys + if keys == 'None': + value = None + else: + value = keys # logging.error(f'here 2 {value}???') elif current: value = current @@ -2358,8 +2631,7 @@ def _get_naxis(self, label, index): if aug_axis_ctype is not None: aug_axis = Axis(aug_axis_ctype, aug_axis_cunit) self.logger.debug( - 'Creating polarization Axis for {} from blueprint'. - format(self.uri)) + f'Creating {label} Axis for {self.uri} from blueprint') aug_error = self._two_param_constructor( f'Chunk.{label}.axis.error.syser', @@ -2382,16 +2654,24 @@ def _get_naxis(self, label, index): aug_function = \ CoordFunction1D(aug_length, aug_delta, aug_ref_coord) self.logger.debug( - 'Creating {} function for {} from blueprint'. - format(label, self.uri)) + f'Creating {label} function for {self.uri} from blueprint') aug_naxis = None - if aug_axis is not None and aug_function is not None: - aug_naxis = CoordAxis1D(aug_axis, aug_error, None, None, - aug_function) - self.logger.debug( - 'Creating {} CoordAxis1D for {} from blueprint'. - format(label, self.uri)) + if aug_function is None: + aug_range = self._try_range_return(index, label) + if aug_axis is not None and aug_range is not None: + aug_naxis = CoordAxis1D( + axis=aug_axis, error=aug_error, range=aug_range) + self.logger.debug( + f'Creating range {label} CoordAxis1D for {self.uri} from ' + f'blueprint') + else: + if aug_axis is not None and aug_function is not None: + aug_naxis = CoordAxis1D(aug_axis, aug_error, None, None, + aug_function) + self.logger.debug( + f'Creating function {label} CoordAxis1D for {self.uri} ' + f'from blueprint') self.logger.debug( f'End {label} naxis construction from blueprint.') return aug_naxis @@ -2879,6 +3159,22 @@ def _try_range(self, wcs, index, lookup): wcs.axis.range = CoordRange1D(aug_range_start, aug_range_end) self.logger.debug(f'Completed setting range for {lookup}') + def _try_range_return(self, index, lookup): + self.logger.debug(f'Try to set the range for {lookup}') + range = None + aug_range_start = self._two_param_constructor( + f'Chunk.{lookup}.axis.range.start.pix', + f'Chunk.{lookup}.axis.range.start.val', + index, _to_float, RefCoord) + aug_range_end = self._two_param_constructor( + f'Chunk.{lookup}.axis.range.end.pix', + f'Chunk.{lookup}.axis.range.end.val', + index, _to_float, RefCoord) + if aug_range_start and aug_range_end: + range = CoordRange1D(aug_range_start, aug_range_end) + self.logger.debug(f'Completed setting range for {lookup}') + return range + def _try_range_with_blueprint(self, chunk, index): """Use the blueprint to set elements and attributes that are not in the scope of astropy and fits, and therefore are not @@ -2907,8 +3203,7 @@ def _try_time_with_blueprint(self, chunk, index): """ self.logger.debug('Begin augmentation with blueprint for temporal.') - chunk.time_axis = self._get_from_list('Chunk.energyAxis', index) - + chunk.time_axis = _to_int(self._get_from_list('Chunk.timeAxis', index)) aug_naxis = self._get_naxis('time', index) if aug_naxis is not None: if chunk.time: @@ -3222,68 +3517,6 @@ def augment_artifact(self, artifact, index=0): continue self._wcs_parser = FitsWcsParser(header, self.file, str(i)) super().augment_artifact(artifact, i) - # part.product_type = self._get_from_list('Part.productType', i) - # part.meta_producer = self._get_from_list( - # 'Part.metaProducer', index=0, current=part.meta_producer) - # - # # each Part has one Chunk, if it's not an empty part as determined - # # just previously - # if not part.chunks: - # part.chunks.append(Chunk()) - # chunk = part.chunks[0] - # chunk.meta_producer = self._get_from_list( - # 'Chunk.metaProducer', index=0, current=chunk.meta_producer) - # - # NOTE: astropy.wcs does not distinguished between WCS axes and - # data array axes. naxis in astropy.wcs represents in fact the - # number of WCS axes, whereas chunk.axis represents the naxis - # of the data array. Solution is to determine it directly from - # the header - # if 'ZNAXIS' in header: - # chunk.naxis = _to_int(header['ZNAXIS']) - # elif 'NAXIS' in header: - # chunk.naxis = _to_int(header['NAXIS']) - # else: - # chunk.naxis = self._get_from_list('Chunk.naxis', 0, - # wcs_parser.wcs.wcs.naxis) - # if self.blueprint._pos_axes_configed: - # wcs_parser.augment_position(chunk) - # logging.error(chunk.position) - # if chunk.position is None: - # self._try_position_with_blueprint(chunk, i) - # if chunk.position: - # chunk.position.resolution = self._get_from_list( - # 'Chunk.position.resolution', index=i) - # if self.blueprint._energy_axis_configed: - # wcs_parser.augment_energy(chunk) - # if chunk.energy: - # chunk.energy.bandpass_name = self._get_from_list( - # 'Chunk.energy.bandpassName', index=i) - # chunk.energy.transition = self._get_energy_transition( - # chunk.energy.transition) - # chunk.energy.resolving_power = _to_float(self._get_from_list( - # 'Chunk.energy.resolvingPower', index=i)) - # else: - # if self.blueprint._energy_axis_configed: - # self._try_energy_with_blueprint(chunk, i) - # if self.blueprint._time_axis_configed: - # wcs_parser.augment_temporal(chunk) - # if chunk.time is None: - # self._try_time_with_blueprint(chunk, i) - # if self.blueprint._polarization_axis_configed: - # wcs_parser.augment_polarization(chunk) - # if chunk.polarization is None: - # self._try_polarization_with_blueprint(chunk, i) - # if self.blueprint._obs_axis_configed: - # wcs_parser.augment_observable(chunk) - # if chunk.observable is None and chunk.observable_axis is None: - # self._try_observable_with_blueprint(chunk, i) - # if self.blueprint._custom_axis_configed: - # wcs_parser.augment_custom(chunk) - # - # # try to set smaller bits of the chunk WCS elements from the - # # blueprint - # self._try_range_with_blueprint(chunk, i) self.logger.debug( f'End artifact augmentation for {artifact.uri}.') @@ -3395,7 +3628,7 @@ def __init__( def _set_roots(self, root_name, root): bits = root_name.split('/') if len(bits) == 2: - logging.error(root[root_name].keys()) + # logging.error(root[root_name].keys()) for key in root[root_name].keys(): self._roots.append(root[root_name][key]) else: @@ -3415,6 +3648,7 @@ def _content_lookup(self, key, extension=None): # logging.error(f'key {key} root.name {extension.name} bits {bits}') if isinstance(key, list): + # TODO - document why this is the case return None if key.startswith('//'): key = key.replace('//', '/') @@ -3615,21 +3849,23 @@ def augment_temporal(self, chunk): delta = self.wcs.cd[time_axis_index][time_axis_index] else: delta = self.wcs.cdelt[time_axis_index] - aug_function = CoordFunction1D( - self._get_axis_length(time_axis_index + 1), - delta, aug_ref_coord) - naxis = CoordAxis1D(aug_naxis, aug_error, None, None, aug_function) - if not chunk.time: - chunk.time = TemporalWCS(naxis) - else: - chunk.time.axis = naxis - - chunk.time.exposure = _to_float(self.header.get('EXPTIME')) - chunk.time.resolution = _to_float(self.header.get('TIMEDEL')) - chunk.time.timesys = str(self.header.get('TIMESYS', 'UTC')) - chunk.time.trefpos = self.header.get('TREFPOS', None) - chunk.time.mjdref = self.header.get('MJDREF', - self.header.get('MJDDATE')) + if aug_ref_coord is not None: + aug_function = CoordFunction1D( + self._get_axis_length(time_axis_index + 1), + delta, aug_ref_coord) + naxis = CoordAxis1D(aug_naxis, aug_error, None, None, aug_function) + if not chunk.time: + chunk.time = TemporalWCS(naxis) + else: + chunk.time.axis = naxis + + self._finish_chunk_time(chunk) + # chunk.time.exposure = _to_float(self.header.get('EXPTIME')) + # chunk.time.resolution = _to_float(self.header.get('TIMEDEL')) + # chunk.time.timesys = str(self.header.get('TIMESYS', 'UTC')) + # chunk.time.trefpos = self.header.get('TREFPOS', None) + # chunk.time.mjdref = self.header.get('MJDREF', + # self.header.get('MJDDATE')) self.logger.debug('End TemporalWCS augmentation.') def augment_polarization(self, chunk): @@ -3659,6 +3895,7 @@ def augment_polarization(self, chunk): self._get_axis_length(polarization_axis_index + 1), delta, self._get_ref_coord(polarization_axis_index)) + logging.error(naxis) if not chunk.polarization: chunk.polarization = PolarizationWCS(naxis) else: @@ -3683,14 +3920,12 @@ def augment_observable(self, chunk): return chunk.observable_axis = observable_axis_index + 1 - ctype = self.header.get(f'CTYPE{chunk.observable_axis}') - cunit = self.header.get(f'CUNIT{chunk.observable_axis}') - pix_bin = self.header.get(f'CRPIX{chunk.observable_axis}') - if ctype is not None and cunit is not None and pix_bin is not None: - chunk.observable = ObservableAxis( - Slice(self._get_axis(0, ctype, cunit), pix_bin)) + self._finish_chunk_observable(chunk) self.logger.debug('End Observable WCS augmentation.') + def _finish_chunk_time(self, chunk): + raise NotImplementedError + def _get_axis(self, index, over_ctype=None, over_cunit=None): """ Assemble a generic axis """ aug_ctype = str(self.wcs.ctype[index]) if over_ctype is None \ @@ -3825,8 +4060,10 @@ def _sanitize(self, value): :param value: :return: """ - import numpy - if isinstance(value, float) and math.isnan(value): + logging.error(type(value)) + if value is None: + return None + elif isinstance(value, float) and math.isnan(value): return None elif not str(value): return None # empty string @@ -3869,6 +4106,22 @@ def __init__(self, header, file, extension): self.file = file self.extension = extension + def _finish_chunk_observable(self, chunk): + ctype = self.header.get(f'CTYPE{chunk.observable_axis}') + cunit = self.header.get(f'CUNIT{chunk.observable_axis}') + pix_bin = self.header.get(f'CRPIX{chunk.observable_axis}') + if ctype is not None and cunit is not None and pix_bin is not None: + chunk.observable = ObservableAxis( + Slice(self._get_axis(0, ctype, cunit), pix_bin)) + + def _finish_chunk_time(self, chunk): + chunk.time.exposure = _to_float(self.header.get('EXPTIME')) + chunk.time.resolution = _to_float(self.header.get('TIMEDEL')) + chunk.time.timesys = str(self.header.get('TIMESYS', 'UTC')) + chunk.time.trefpos = self.header.get('TREFPOS', None) + chunk.time.mjdref = self.header.get('MJDREF', + self.header.get('MJDDATE')) + def _get_axis_length(self, for_axis): # try ZNAXIS first in order to get the size of the original # image in case it was FITS compressed @@ -3930,236 +4183,250 @@ def _get_axis_index(self, keywords): def _get_axis_length(self, for_axis): # logging.error(f'{for_axis} {self._wcs.array_shape}') - return self._wcs.array_shape[for_axis-1] + if self._wcs.array_shape is None: + # TODO I think this is wrong + return 1 + else: + if len(self._wcs.array_shape) == 1: + result = self._wcs.array_shape + else: + result = self._wcs.array_shape[for_axis-1] + return _to_int(result) def _set_wcs(self, root, blueprint): + self._wcs = WCS(naxis=blueprint.get_configed_axes_count()) + array_shape = [0] * blueprint.get_configed_axes_count() count = 0 if blueprint._pos_axes_configed: self._axes['ra'][1] = True - self._axes['ra'][0] = count self._axes['dec'][1] = True + self._axes['ra'][0] = count self._axes['dec'][0] = count + 1 + temp = [0] * blueprint.get_configed_axes_count() + cd = [temp.copy() + for ii in range(blueprint.get_configed_axes_count())] + logging.error(self._attribute_lookup( + blueprint._get('Chunk.position.axis.axis1.ctype'), root + )) + self._wcs.wcs.ctype[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.axis1.ctype'), root + )) + self._wcs.wcs.ctype[count + 1] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.axis2.ctype'), root + )) + self._wcs.wcs.cunit[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.axis1.cunit'), root + )) + self._wcs.wcs.cunit[count + 1] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.axis2.cunit'), root + )) + array_shape[count] = self._attribute_lookup( + blueprint._get( + 'Chunk.position.axis.function.dimension.naxis1'), root + ) + array_shape[count + 1] = self._attribute_lookup( + blueprint._get( + 'Chunk.position.axis.function.dimension.naxis2'), root + ) + self._wcs.wcs.crpix[count] = self._attribute_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord1.pix'), root + ) + self._wcs.wcs.crpix[count + 1] = self._attribute_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord2.pix'), root + ) + self._wcs.wcs.crval[count] = self._attribute_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord1.val'), root + ) + self._wcs.wcs.crval[count + 1] = self._attribute_lookup( + blueprint._get( + 'Chunk.position.axis.function.refCoord.coord2.val'), root + ) + cd[count][0] = self._attribute_lookup( + blueprint._get('Chunk.position.axis.function.cd11'), root + ) + cd[count][1] = self._attribute_lookup( + blueprint._get('Chunk.position.axis.function.cd12'), root + ) + cd[count + 1][0] = self._attribute_lookup( + blueprint._get('Chunk.position.axis.function.cd21'), root + ) + cd[count + 1][1] = self._attribute_lookup( + blueprint._get('Chunk.position.axis.function.cd22'), root + ) + self._wcs.wcs.crder[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.error1.rnder'), root + )) + self._wcs.wcs.crder[count + 1] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.error2.rnder'), root + )) + self._wcs.wcs.csyer[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.error1.syser'), root + )) + self._wcs.wcs.csyer[count + 1] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.position.axis.error2.syser'), root + )) + self._finish_position(blueprint) + self._wcs.wcs.cd = cd count += 2 if blueprint._time_axis_configed: self._axes['time'][1] = True self._axes['time'][0] = count + self._wcs.wcs.ctype[count] = self._attribute_lookup( + blueprint._get('Chunk.time.axis.axis.ctype'), root + ) + self._wcs.wcs.cunit[count] = self._attribute_lookup( + blueprint._get('Chunk.time.axis.axis.cunit'), root + ) + array_shape[count] = self._attribute_lookup( + blueprint._get('Chunk.time.axis.function.naxis'), root + ) + self._wcs.wcs.crpix[count] = self._attribute_lookup( + blueprint._get('Chunk.time.axis.function.refCoord.pix'), root + ) + self._wcs.wcs.crval[count] = self._attribute_lookup( + blueprint._get('Chunk.time.axis.function.refCoord.val'), root + ) + self._wcs.wcs.crder[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.time.axis.error.rnder'), root + )) + self._wcs.wcs.csyer[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.time.axis.error.syser'), root + )) + self._finish_time(blueprint) count += 1 if blueprint._energy_axis_configed: self._axes['energy'][1] = True self._axes['energy'][0] = count + x = self._attribute_lookup( + blueprint._get('Chunk.energy.axis.axis.ctype'), root + ) + self._wcs.wcs.ctype[count] = x + self._wcs.wcs.cunit[count] = self._attribute_lookup( + blueprint._get('Chunk.energy.axis.axis.cunit'), root + ) + array_shape[count] = self._attribute_lookup( + blueprint._get('Chunk.energy.axis.function.naxis'), root + ) + self._wcs.wcs.crpix[count] = self._attribute_lookup( + blueprint._get('Chunk.energy.axis.function.refCoord.pix'), root + ) + self._wcs.wcs.crval[count] = self._attribute_lookup( + blueprint._get('Chunk.energy.axis.function.refCoord.val'), root + ) + self._wcs.wcs.crder[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.energy.axis.error.rnder'), root + )) + self._wcs.wcs.csyer[count] = self._sanitize( + self._attribute_lookup( + blueprint._get('Chunk.energy.axis.error.syser'), root + )) + self._finish_energy(blueprint) count += 1 if blueprint._polarization_axis_configed: self._axes['polarization'][1] = True self._axes['polarization'][0] = count + self._wcs.wcs.ctype[count] = self._attribute_lookup( + blueprint._get('Chunk.polarization.axis.axis.ctype'), root + ) + self._wcs.wcs.cunit[count] = self._attribute_lookup( + blueprint._get('Chunk.polarization.axis.axis.cunit'), root + ) + array_shape[count] = self._attribute_lookup( + blueprint._get('Chunk.polarization.axis.function.naxis'), root + ) + self._wcs.wcs.crpix[count] = self._attribute_lookup( + blueprint._get('Chunk.polarization.axis.function.refCoord.pix'), + root + ) + self._wcs.wcs.crval[count] = self._attribute_lookup( + blueprint._get('Chunk.polarization.axis.function.refCoord.val'), + root + ) count += 1 + # TODO - where's the delta? if blueprint._obs_axis_configed: self._axes['observable'][1] = True self._axes['observable'][0] = count count += 1 + self._wcs.wcs.ctype[count] = self._attribute_lookup( + blueprint._get('Chunk.observable.axis.axis.ctype'), root + ) + self._wcs.wcs.cunit[count] = self._attribute_lookup( + blueprint._get('Chunk.observable.axis.axis.cunit'), root + ) + array_shape[count] = 1.0 + self._wcs.wcs.crpix[count] = self._attribute_lookup( + blueprint._get('Chunk.observable.axis.function.refCoord.pix'), + root + ) + self._wcs.wcs.crval[count] = 0.0 if blueprint._custom_axis_configed: self._axes['custom'][1] = True self._axes['custom'][0] = count count += 1 - - self._wcs = WCS(naxis=count) - z = [ - self._sanitize(self._xx_lookup( - blueprint._get('Chunk.position.axis.axis1.ctype'), root - )) if self._axes['ra'][1] else None, - self._sanitize(self._xx_lookup( - blueprint._get('Chunk.position.axis.axis2.ctype'), root - )) if self._axes['dec'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.time.axis.axis.ctype'), root - ) if self._axes['time'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.energy.axis.axis.ctype'), root - ) if self._axes['energy'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.polarization.axis.axis.ctype'), root - ) if self._axes['polarization'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.observable.axis.axis.ctype'), root - ) if self._axes['observable'][1] else None, - self._xx_lookup( + self._wcs.wcs.ctype[count] = self._attribute_lookup( blueprint._get('Chunk.custom.axis.axis.ctype'), root - ) if self._axes['custom'][1] else None, - ] - self._wcs.wcs.ctype = z[:count] - z = [ - self._sanitize(self._xx_lookup( - blueprint._get('Chunk.position.axis.axis1.cunit'), root - )) if self._axes['ra'][1] else None, - self._sanitize(self._xx_lookup( - blueprint._get('Chunk.position.axis.axis2.cunit'), root - )) if self._axes['dec'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.time.axis.axis.cunit'), root - ) if self._axes['time'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.energy.axis.axis.cunit'), root - ) if self._axes['energy'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.polarization.axis.axis.cunit'), root - ) if self._axes['polarization'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.observable.axis.axis.cunit'), root - ) if self._axes['observable'][1] else None, - self._xx_lookup( + ) + self._wcs.wcs.cunit[count] = self._attribute_lookup( blueprint._get('Chunk.custom.axis.axis.cunit'), root - ) if self._axes['custom'][1] else None, - ] - self._wcs.wcs.cunit = z[:count] - - z = [ - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.dimension.naxis1'), root - ) if self._axes['ra'][1] else 0, - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.dimension.naxis2'), root - ) if self._axes['dec'][1] else 0, - self._xx_lookup( - blueprint._get('Chunk.time.axis.function.naxis'), root - ) if self._axes['time'][1] else 0, - self._xx_lookup( - blueprint._get('Chunk.energy.axis.function.naxis'), root - ) if self._axes['energy'][1] else 0, - self._xx_lookup( - blueprint._get('Chunk.polarization.axis.function.naxis'), root - ) if self._axes['polarization'][1] else 0, - self._xx_lookup( - blueprint._get('Chunk.observable.dependent.bin'), root - ) if self._axes['observable'][1] else 0, - self._xx_lookup( + ) + array_shape[count] = self._attribute_lookup( blueprint._get('Chunk.custom.axis.function.naxis'), root - ) if self._axes['custom'][1] else 0, - ] - self._wcs.array_shape = z[:count] - - z = [ - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord1.pix'), root - ) if self._axes['ra'][1] else None, - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord2.pix'), root - ) if self._axes['dec'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.time.axis.function.refCoord.pix'), root - ) if self._axes['time'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.energy.axis.function.refCoord.pix'), root - ) if self._axes['energy'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.polarization.axis.function.refCoord.pix'), root - ) if self._axes['polarization'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.observable.axis.function.refCoord.pix'), root - ) if self._axes['observable'][1] else None, - self._xx_lookup( + ) + # TODO delta + self._wcs.wcs.crpix[count] = self._attribute_lookup( blueprint._get('Chunk.custom.axis.function.refCoord.pix'), root - ) if self._axes['custom'][1] else None, - ] - self._wcs.wcs.crpix = z[:count] - - z = [ - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord1.val'), root - ) if self._axes['ra'][1] else None, - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord2.val'), root - ) if self._axes['dec'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.time.axis.function.refCoord.val'), root - ) if self._axes['time'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.energy.axis.function.refCoord.val'), root - ) if self._axes['energy'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.polarization.axis.function.refCoord.val'), root - ) if self._axes['polarization'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.observable.axis.function.refCoord.val'), root - ) if self._axes['observable'][1] else None, - self._xx_lookup( - blueprint._get('Chunk.custom.axis.function.refCoord.val'), root - ) if self._axes['custom'][1] else None, - ] - self._wcs.wcs.crval = z[:count] - - # row first - z = [ - [ - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.cd11'), root - ) if self._axes['ra'][1] else 0.0, - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.cd12'), root - ) if self._axes['dec'][1] else 0.0, - ], - [ - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.cd21'), root - ) if self._axes['ra'][1] else 0.0, - self._xx_lookup( - blueprint._get( - 'Chunk.position.axis.function.cd22'), root - ) if self._axes['dec'][1] else 0.0, - ], - ] - # self._xx_lookup( - # blueprint._get('Chunk.time.axis.function.cd33'), root - # ) if self._axes['time'][1] else None, - # self._xx_lookup( - # blueprint._get('Chunk.energy.axis.function.cd44'), root - # ) if self._axes['energy'][1] else None, - # self._xx_lookup( - # blueprint._get('Chunk.polarization.axis.function.cd55'), root - # ) if self._axes['polarization'][1] else None, - # self._xx_lookup( - # blueprint._get('Chunk.observable.axis.function.cd66'), root - # ) if self._axes['observable'][1] else None, - # self._xx_lookup( - # blueprint._get('Chunk.custom.axis.function.cd77'), root - # ) if self._axes['custom'][1] else None, - # ] - self._wcs.wcs.cd = z[:count][:count] - self._wcs.wcs.equinox = self._xx_lookup( - blueprint._get('Chunk.position.equinox'), self._base - ) if self._axes['ra'][1] else None - - if blueprint._time_axis_configed: - self._wcs.wcs.mjdstart = self._xx_lookup( - blueprint._get('Chunk.time.axis.range.start.val') ) - self._wcs.wcs.mjdend = self._xx_lookup( - blueprint._get('Chunk.time.axis.range.end.val') + self._wcs.wcs.crval[count] = self._attribute_lookup( + blueprint._get('Chunk.custom.axis.function.refCoord.val'), root ) - # logging.error(f'{self._wcs.wcs}') - - def _xx_lookup(self, key, root): + # logging.error(f'count is {count}') + self._wcs.array_shape = array_shape + # logging.error(f'{self._wcs.array_shape}') + # logging.error(f'{self._wcs.wcs.radesys}') + + def _append_cd_value(self, cd, cd_value, count): + prefix = [] + suffix = [] + for ii in range(0, len(cd)): + prefix.append(0.0) + for ii in range(len(cd), count): + suffix.append(0.0) + cd.append(prefix + [cd_value] + suffix) + + def _attribute_lookup(self, key, root): # logging.error(f'key {key} root name {root.name}') if key is None: - raise NotImplementedError + # raise NotImplementedError + # why might this be the wrong decision?, because it means the + # blueprint lookup returned None, which is a valid value + # assignment, isn't it? + return None if isinstance(key, tuple): # logging.error('path 10') result = None for ii in key[0]: - result = self._xx_lookup(ii, root) + result = self._attribute_lookup(ii, root) if result is None and key[1] is not None: # apply the default value result = key[1] return result - else: + elif isinstance(key, str): if key.startswith('//'): key = key.replace('//', '/') root = self._base @@ -4194,37 +4461,108 @@ def _xx_lookup(self, key, root): # first bit is an empty string temp = f'/{"/".join(ii for ii in bits[2:])}' # logging.error(f'path 14 y{temp}y x{bits}x') - return self._xx_lookup(temp, root[bits[1]]) + return self._attribute_lookup(temp, root[bits[1]]) else: # a value has been set # logging.error(f'path 17 {key}') + if key == 'None': + return None return key + else: + # logging.error('path 30') + return key - def _get_z(self, blueprint, root, default): - z = [ - self._xx_lookup( - blueprint._get('Chunk.position.axis.axis1.cunit'), root - ) if self._axes['ra'][1] else default, - self._xx_lookup( - blueprint._get('Chunk.position.axis.axis2.cunit'), root - ) if self._axes['dec'][1] else default, - self._xx_lookup( - blueprint._get('Chunk.time.axis.axis.cunit'), root - ) if self._axes['time'][1] else default, - self._xx_lookup( - blueprint._get('Chunk.energy.axis.axis.cunit'), root - ) if self._axes['energy'][1] else default, - self._xx_lookup( - blueprint._get('Chunk.polarization.axis.axis.cunit'), root - ) if self._axes['polarization'][1] else default, - self._xx_lookup( - blueprint._get('Chunk.observable.axis.axis.cunit'), root - ) if self._axes['observable'][1] else default, - self._xx_lookup( - blueprint._get('Chunk.custom.axis.axis.cunit'), root - ) if self._axes['custom'][1] else default, - ] - return z + def _finish_chunk_observable(self, chunk): + ctype = self._wcs.wcs.ctype[chunk.observable_axis-1] + cunit = self._wcs.wcs.ctype[chunk.observable_axis-1] + pix_bin = _to_int(self._wcs.wcs.crpix[chunk.observable_axis-1]) + if ctype is not None and cunit is not None and pix_bin is not None: + chunk.observable = ObservableAxis( + Slice(self._get_axis(0, ctype, cunit), pix_bin)) + + def _finish_chunk_time(self, chunk): + if not math.isnan(self._wcs.wcs.xposure): + chunk.time.exposure = self._wcs.wcs.xposure + chunk.time.timesys = self._wcs.wcs.timesys + chunk.time.trefpos = self._wcs.wcs.trefpos + # convert from the numpy array length 2 of self._wcs.wcs.mjdref + # to a single value + # TODO chunk.time.mjdref = self._wcs.to_header().get('MJDREF') + + def _finish_energy(self, blueprint): + x = self._attribute_lookup( + blueprint._get('Chunk.energy.specsys'), self._base) + if x: + self._wcs.wcs.specsys = x + x = self._attribute_lookup( + blueprint._get('Chunk.energy.ssysobs'), self._base) + if x: + self._wcs.wcs.ssysobs = x + x = _to_float( + self._attribute_lookup( + blueprint._get('Chunk.energy.restfrq'), self._base) + ) + if x: + self._wcs.wcs.restfrq = x + x = self._attribute_lookup( + blueprint._get('Chunk.energy.restwav'), self._base) + if x: + self._wcs.wcs.restwav = x + x = self._attribute_lookup( + blueprint._get('Chunk.energy.velosys'), self._base) + if x: + self._wcs.wcs.velosys = x + x = self._attribute_lookup( + blueprint._get('Chunk.energy.zsource'), self._base) + if x: + self._wcs.wcs.zsource = x + x = self._attribute_lookup( + blueprint._get('Chunk.energy.ssyssrc'), self._base) + if x: + self._wcs.wcs.ssyssrc = x + x = self._attribute_lookup( + blueprint._get('Chunk.energy.velang'), self._base) + if x: + self._wcs.wcs.velangl = x + return + + def _finish_position(self, blueprint): + x = self._attribute_lookup( + blueprint._get('Chunk.position.coordsys'), self._base + ) + if x: + self._wcs.wcs.radesys = x + x = _to_float( + self._attribute_lookup( + blueprint._get('Chunk.position.equinox'), self._base + ) + ) + if x: + self._wcs.wcs.equinox = x + + def _finish_time(self, blueprint): + x = self._attribute_lookup( + blueprint._get('Chunk.time.exposure'), self._base + ) + if x: + self._wcs.wcs.xposure = x + x = self._attribute_lookup( + blueprint._get('Chunk.time.timesys'), self._base + ) + if x: + self._wcs.wcs.timesys = x + x = self._attribute_lookup( + blueprint._get('Chunk.time.trefpos'), self._base + ) + if x: + self._wcs.wcs.trefpos = x + x = self._attribute_lookup( + blueprint._get('Chunk.time.mjdref'), self._base + ) + if x: + logging.error(f'xx{x}xx') + logging.error(type(x)) + self._wcs.wcs.mjdref = x def _to_str(value): diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint index f07370fc..ac11cd8e 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint @@ -1,3 +1,4 @@ +Observation.metaRelease = 2018-05-21T02:07:22.0 Observation.sequenceNumber = [//header/run/run_seq] Observation.type = OBJECT Observation.target.name = [//header/object/obj_id] @@ -22,10 +23,23 @@ Chunk.position.axis.function.cd12 = [/header/wcs/cd(0:1)] Chunk.position.axis.function.cd21 = [/header/wcs/cd(1:0)] Chunk.position.axis.function.cd22 = [/header/wcs/cd(1:1)] Chunk.position.equinox = [//header/object/epoch] +Chunk.position.axis.error1.syser = None +Chunk.position.axis.error1.rnder= None +Chunk.position.axis.error2.syser = None +Chunk.position.axis.error2.rnder = None +Chunk.position.coordsys = None +Chunk.timeAxis = 3 Chunk.time.axis.axis.ctype = TIME Chunk.time.axis.axis.cunit = s Chunk.time.axis.range.start.pix = 0 Chunk.time.axis.range.start.val = [//header/timeseries/mjdrunstart] Chunk.time.axis.range.end.pix = get_time_axis_range_end() Chunk.time.axis.range.end.val = [//header/timeseries/mjdrunend] +Chunk.time.axis.function.naxis = [//header/timeseries/numepochs] +Chunk.time.axis.error.rnder = None +Chunk.time.axis.error.syser = None +Chunk.time.exposure = get_exposure() +Chunk.time.timesys = None +Chunk.time.trefpos = None +Chunk.time.mjdref = None diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py index 678870c2..dd394a80 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py @@ -2,6 +2,16 @@ from astropy.coordinates import SkyCoord +def get_exposure(base): + b = base.get('base') + mjdrunstart = b['header']['timeseries']['mjdrunstart'] + mjdrunend = b['header']['timeseries']['mjdrunend'] + result = 0.0 + if mjdrunstart is not None and mjdrunend is not None: + result = mjdrunend - mjdrunstart + return result + + def get_target_position_cval1(base): ra, dec_ignore = _get_target_position(base) return ra diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index d174a01a..e7b1ad31 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -72,6 +72,8 @@ from cadcutils import net from cadcdata import FileInfo from caom2utils import FitsParser, FitsWcsParser, main_app, update_blueprint +from caom2utils import HDF5Parser, Hdf5WcsParser, BlueprintParser +from caom2utils import Hdf5ObsBlueprint from caom2utils import ObsBlueprint, GenericParser, gen_proc from caom2utils import get_gen_proc_arg_parser, augment from caom2utils.legacy import load_config @@ -158,6 +160,35 @@ def test_augment_energy(): assert result is None, repr(energy) +def test_hdf5_wcs_parser_set_wcs(): + test_position_bp = Hdf5ObsBlueprint(position_axes=(1, 2)) + test_energy_bp = Hdf5ObsBlueprint(energy_axis=1) + test_time_bp = Hdf5ObsBlueprint(time_axis=1) + test_polarization_bp = Hdf5ObsBlueprint(polarization_axis=1) + test_observable_bp = Hdf5ObsBlueprint(obs_axis=1) + test_custom_bp = Hdf5ObsBlueprint(custom_axis=1) + test_f_name = 'taos2_20220201T201317Z_star04239531.h5' + test_uri = f'cadc:TEST/{test_f_name}' + test_fqn = f'{TESTDATA_DIR}/taos_h5file/20220201T200117/{test_f_name}' + test_artifact = Artifact(test_uri, ProductType.SCIENCE, ReleaseType.DATA) + + for bp in [ + test_position_bp, + test_energy_bp, + test_time_bp, + test_polarization_bp, + test_observable_bp, + test_custom_bp, + ]: + test_subject = HDF5Parser(bp, test_uri, test_fqn) + assert test_subject is not None, 'expect a result' + test_subject.augment_artifact(test_artifact) + if bp == test_position_bp: + assert test_subject._wcs_parser._wcs.naxis == 2, 'wrong pos axis' + else: + assert test_subject._wcs_parser._wcs.naxis == 1, 'wrong axis count' + + def test_augment_failure(): bp = ObsBlueprint() test_fitsparser = FitsParser(sample_file_4axes, bp) @@ -1470,11 +1501,11 @@ def test_apply_blueprint_execute_external(): test_generic_blueprint.set( 'Observation.type', '_get_test_obs_type(parameters)') - # generic parser - test_generic_parser = GenericParser(test_generic_blueprint) - assert test_generic_parser is not None, \ - 'expect generic construction to complete' - assert test_generic_parser._get_from_list('Observation.type', index=0) \ + # generic parser - function execution should have occurred, the return + # value is dependent on the parameters to the call + test_gp = GenericParser(test_generic_blueprint) + assert test_gp is not None, 'expect generic construction to complete' + assert test_gp._get_from_list('Observation.type', index=0) \ == 'generic_parser_value', 'wrong generic plan value' # fits parser From 7c1d27aeb21bf55198f30849258a024438b40ed7 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Mon, 14 Mar 2022 08:47:11 -0700 Subject: [PATCH 08/38] CADC-10808 - working example. --- caom2utils/caom2utils/caom2blueprint.py | 302 ++++++------------ .../caom2utils/tests/data/cfhtsg/mp9801/y.xml | 1 - .../20220201T200117/taos.blueprint | 1 - 3 files changed, 101 insertions(+), 203 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 06849e36..5538914c 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -1561,6 +1561,9 @@ def __init__(self, position_axes=None, energy_axis=None, # "integer" from a list, followed by "integer" from the list in the # list + # there are no sensible/known HDF5 defaults, so just try to make sure + # the blueprint executes with a lot of None values + def configure_custom_axis(self, axis, override=True): """ Set the expected FITS custom keywords by index in the blueprint @@ -1578,7 +1581,7 @@ def configure_custom_axis(self, axis, override=True): if override: self.set('Chunk.custom.axis.axis.ctype', ([], None)) self.set('Chunk.custom.axis.axis.cunit', ([], None)) - self.set('Chunk.custom.axis.function.naxis', ([], None)) + self.set('Chunk.custom.axis.function.naxis', ([], 1)) self.set('Chunk.custom.axis.function.delta', ([], None)) self.set('Chunk.custom.axis.function.refCoord.pix', ([], None)) self.set('Chunk.custom.axis.function.refCoord.val', ([], None)) @@ -1600,17 +1603,15 @@ def configure_position_axes(self, axes, override=True): :param axes: The index expected for the position axes. :return: """ - logging.error('called??????') if self._pos_axes_configed: self.logger.error( 'Attempt to configure already-configured position axes.') return if override: - logging.error('yes, no maybe?') self.set('Chunk.position.coordsys', ([], None)) self.set('Chunk.position.equinox', ([], None)) - self.set('Chunk.position.axis.axis1.ctype', ([], '')) + self.set('Chunk.position.axis.axis1.ctype', ([], None)) self.set('Chunk.position.axis.axis1.cunit', ([], None)) self.set('Chunk.position.axis.axis2.ctype', ([], None)) self.set('Chunk.position.axis.axis2.cunit', ([], None)) @@ -1687,7 +1688,7 @@ def configure_energy_axis(self, axis, override=True): self.set('Chunk.energy.axis.axis.cunit', ([], None)) self.set('Chunk.energy.axis.error.syser', ([], None)) self.set('Chunk.energy.axis.error.rnder', ([], None)) - self.set('Chunk.energy.axis.function.naxis', ([], None)) + self.set('Chunk.energy.axis.function.naxis', ([], 1)) self.set('Chunk.energy.axis.function.delta', ([], None)) self.set('Chunk.energy.axis.function.refCoord.pix', ([], None)) self.set('Chunk.energy.axis.function.refCoord.val', ([], None)) @@ -1729,7 +1730,7 @@ def configure_polarization_axis(self, axis, override=True): # STOKES is the only value allowed for PolarizationWCS ctype. self.set('Chunk.polarization.axis.axis.ctype', ([], 'STOKES')) self.set('Chunk.polarization.axis.axis.cunit', ([], None)) - self.set('Chunk.polarization.axis.function.naxis', ([], None)) + self.set('Chunk.polarization.axis.function.naxis', ([], 1)) self.set('Chunk.polarization.axis.function.delta', ([], None)) self.set('Chunk.polarization.axis.function.refCoord.pix', ([], None)) @@ -1796,7 +1797,7 @@ def configure_time_axis(self, axis, override=True): self.set('Chunk.time.axis.axis.cunit', ([], None)) self.set('Chunk.time.axis.error.syser', ([], None)) self.set('Chunk.time.axis.error.rnder', ([], None)) - self.set('Chunk.time.axis.function.naxis', ([], None)) + self.set('Chunk.time.axis.function.naxis', ([], 1)) self.set('Chunk.time.axis.function.delta', ([], None)) self.set('Chunk.time.axis.function.refCoord.pix', ([], None)) self.set('Chunk.time.axis.function.refCoord.val', ([], None)) @@ -2004,7 +2005,6 @@ def _get_from_list(self, lookup, index, current=None): self.logger.debug( f'{lookup}: using current value of {current!r}.') value = current - logging.error(f'other value {value}') return value if (keywords and not ObsBlueprint.needs_lookup(keywords) and not ObsBlueprint.is_function(keywords)): @@ -2023,7 +2023,6 @@ def _get_from_list(self, lookup, index, current=None): if isinstance(value, bool) or current is not None: value = current - logging.error(f'value {value}') self.logger.debug(f'{lookup}: value is {value}') return value @@ -2223,7 +2222,6 @@ def augment_artifact(self, artifact, index): self._get_chunk_naxis(chunk, index) if self.blueprint._pos_axes_configed: self._wcs_parser.augment_position(chunk) - # logging.error(chunk.position) if chunk.position is None: self._try_position_with_blueprint(chunk, index) if chunk.position: @@ -2426,8 +2424,6 @@ def _get_from_list(self, lookup, index, current=None): value = None try: keys = self.blueprint._get(lookup) - # if 'equinox' in lookup: - # logging.error(f'path 1007 {keys}') except KeyError: self.add_error(lookup, sys.exc_info()[1]) self.logger.debug( @@ -2438,13 +2434,10 @@ def _get_from_list(self, lookup, index, current=None): value = current return value - # logging.error(f'here 1??? {isinstance(keys, tuple)} {type(keys)}') - if ObsBlueprint.needs_lookup(keys): for ii in keys[0]: try: value = self._content_lookup(ii, index) - # logging.error(value) if value: self.logger.debug( f'{lookup}: assigned value {value} based on ' @@ -2487,10 +2480,8 @@ def _get_from_list(self, lookup, index, current=None): value = None else: value = keys - # logging.error(f'here 2 {value}???') elif current: value = current - # logging.error(f'here 3 {value}???') self.logger.debug(f'{lookup}: value is {value}') return value @@ -3617,18 +3608,15 @@ def __init__( ): import h5py self._file = h5py.File(local_f_name, 'r') - # logging.error(type(self._file)) super().__init__(obs_blueprint, uri) self._wcs_parser = None self._roots = [] self.apply_blueprint() self._set_roots(find_roots_here, self._file) - # logging.error(self._blueprint) def _set_roots(self, root_name, root): bits = root_name.split('/') if len(bits) == 2: - # logging.error(root[root_name].keys()) for key in root[root_name].keys(): self._roots.append(root[root_name][key]) else: @@ -3637,7 +3625,6 @@ def _set_roots(self, root_name, root): def augment_artifact(self, artifact, index=0): for i, root in enumerate(self._roots): - # logging.error(f'root {root.name}') self._wcs_parser = Hdf5WcsParser(root, self.blueprint, self._file) super().augment_artifact(artifact, i) @@ -3645,8 +3632,6 @@ def _content_lookup(self, key, extension=None): if isinstance(extension, int): extension = self._roots[extension] - # logging.error(f'key {key} root.name {extension.name} bits {bits}') - if isinstance(key, list): # TODO - document why this is the case return None @@ -3655,12 +3640,9 @@ def _content_lookup(self, key, extension=None): extension = self._file bits = key.split('/') if len(bits) == 2: - # logging.error('path 1') if '(' in bits[1]: - # logging.error('path 2') x = bits[1].split('(') if ':' in x[1]: - # logging.error('path 100') a = x[1].split(')')[0].split(':') if len(a) > 2: raise NotImplementedError @@ -3668,16 +3650,13 @@ def _content_lookup(self, key, extension=None): return y else: index = int(x[1].split(')')[0]) - # logging.error(f'x {x} index {index}') return extension[x[0]][index] else: - # logging.error('path 3') return extension[bits[1]] else: # the 2 is because there's always a leading slash, so the # first bit is an empty string temp = f'/{"/".join(ii for ii in bits[2:])}' - # logging.error(f'path 4 {temp} {bits[2:]}') return self._content_lookup(temp, extension[bits[1]]) def _get_chunk_naxis(self, chunk, index): @@ -3849,10 +3828,9 @@ def augment_temporal(self, chunk): delta = self.wcs.cd[time_axis_index][time_axis_index] else: delta = self.wcs.cdelt[time_axis_index] - if aug_ref_coord is not None: - aug_function = CoordFunction1D( - self._get_axis_length(time_axis_index + 1), - delta, aug_ref_coord) + axis_length = self._get_axis_length(time_axis_index + 1) + if aug_ref_coord is not None and axis_length is not None: + aug_function = CoordFunction1D(axis_length, delta, aug_ref_coord) naxis = CoordAxis1D(aug_naxis, aug_error, None, None, aug_function) if not chunk.time: chunk.time = TemporalWCS(naxis) @@ -3860,12 +3838,6 @@ def augment_temporal(self, chunk): chunk.time.axis = naxis self._finish_chunk_time(chunk) - # chunk.time.exposure = _to_float(self.header.get('EXPTIME')) - # chunk.time.resolution = _to_float(self.header.get('TIMEDEL')) - # chunk.time.timesys = str(self.header.get('TIMESYS', 'UTC')) - # chunk.time.trefpos = self.header.get('TREFPOS', None) - # chunk.time.mjdref = self.header.get('MJDREF', - # self.header.get('MJDDATE')) self.logger.debug('End TemporalWCS augmentation.') def augment_polarization(self, chunk): @@ -3895,7 +3867,6 @@ def augment_polarization(self, chunk): self._get_axis_length(polarization_axis_index + 1), delta, self._get_ref_coord(polarization_axis_index)) - logging.error(naxis) if not chunk.polarization: chunk.polarization = PolarizationWCS(naxis) else: @@ -3946,7 +3917,6 @@ def _get_axis_index(self, keywords): axis = None for i, elem in enumerate(self.wcs.ctype): elem = elem.split('-')[0] - logging.error(elem) if elem in keywords: axis = i break @@ -3957,6 +3927,9 @@ def _get_axis_index(self, keywords): break return axis + def _get_axis_length(self, index): + raise NotImplementedError + def _get_cd(self, x_index, y_index): """ returns cd info""" @@ -4024,7 +3997,6 @@ def _get_ref_coord(self, index): def _get_spatial_axis(self, xindex, yindex): """Assemble the bits to make the axis parameter needed for SpatialWCS construction.""" - # logging.error(f'xindex {xindex} yindex {yindex}') aug_dimension = self._get_dimension(xindex, yindex) aug_ref_coord = Coord2D(self._get_ref_coord(xindex), @@ -4060,7 +4032,6 @@ def _sanitize(self, value): :param value: :return: """ - logging.error(type(value)) if value is None: return None elif isinstance(value, float) and math.isnan(value): @@ -4182,13 +4153,12 @@ def _get_axis_index(self, keywords): return result def _get_axis_length(self, for_axis): - # logging.error(f'{for_axis} {self._wcs.array_shape}') if self._wcs.array_shape is None: # TODO I think this is wrong return 1 else: if len(self._wcs.array_shape) == 1: - result = self._wcs.array_shape + result = self._wcs.array_shape[0] else: result = self._wcs.array_shape[for_axis-1] return _to_int(result) @@ -4197,6 +4167,14 @@ def _set_wcs(self, root, blueprint): self._wcs = WCS(naxis=blueprint.get_configed_axes_count()) array_shape = [0] * blueprint.get_configed_axes_count() count = 0 + + def assign_sanitize(assignee, index, key, sanitize=True): + x = self._attribute_lookup(blueprint._get(key), root) + if sanitize: + x = self._sanitize(x) + if x is not None: + assignee[index] = x + if blueprint._pos_axes_configed: self._axes['ra'][1] = True self._axes['dec'][1] = True @@ -4205,25 +4183,14 @@ def _set_wcs(self, root, blueprint): temp = [0] * blueprint.get_configed_axes_count() cd = [temp.copy() for ii in range(blueprint.get_configed_axes_count())] - logging.error(self._attribute_lookup( - blueprint._get('Chunk.position.axis.axis1.ctype'), root - )) - self._wcs.wcs.ctype[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.axis1.ctype'), root - )) - self._wcs.wcs.ctype[count + 1] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.axis2.ctype'), root - )) - self._wcs.wcs.cunit[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.axis1.cunit'), root - )) - self._wcs.wcs.cunit[count + 1] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.axis2.cunit'), root - )) + assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.position.axis.axis1.ctype') + assign_sanitize(self._wcs.wcs.ctype, count + 1, + 'Chunk.position.axis.axis2.ctype') + assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.position.axis.axis1.cunit') + assign_sanitize(self._wcs.wcs.cunit, count + 1, + 'Chunk.position.axis.axis2.cunit') array_shape[count] = self._attribute_lookup( blueprint._get( 'Chunk.position.axis.function.dimension.naxis1'), root @@ -4232,22 +4199,14 @@ def _set_wcs(self, root, blueprint): blueprint._get( 'Chunk.position.axis.function.dimension.naxis2'), root ) - self._wcs.wcs.crpix[count] = self._attribute_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord1.pix'), root - ) - self._wcs.wcs.crpix[count + 1] = self._attribute_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord2.pix'), root - ) - self._wcs.wcs.crval[count] = self._attribute_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord1.val'), root - ) - self._wcs.wcs.crval[count + 1] = self._attribute_lookup( - blueprint._get( - 'Chunk.position.axis.function.refCoord.coord2.val'), root - ) + assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.position.axis.function.refCoord.coord1.pix') + assign_sanitize(self._wcs.wcs.crpix, count + 1, + 'Chunk.position.axis.function.refCoord.coord2.pix') + assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.position.axis.function.refCoord.coord1.val') + assign_sanitize(self._wcs.wcs.crval, count + 1, + 'Chunk.position.axis.function.refCoord.coord2.val') cd[count][0] = self._attribute_lookup( blueprint._get('Chunk.position.axis.function.cd11'), root ) @@ -4260,145 +4219,106 @@ def _set_wcs(self, root, blueprint): cd[count + 1][1] = self._attribute_lookup( blueprint._get('Chunk.position.axis.function.cd22'), root ) - self._wcs.wcs.crder[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.error1.rnder'), root - )) - self._wcs.wcs.crder[count + 1] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.error2.rnder'), root - )) - self._wcs.wcs.csyer[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.error1.syser'), root - )) - self._wcs.wcs.csyer[count + 1] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.position.axis.error2.syser'), root - )) + assign_sanitize(self._wcs.wcs.crder, count, + 'Chunk.position.axis.error1.rnder') + assign_sanitize(self._wcs.wcs.crder, count + 1, + 'Chunk.position.axis.error2.rnder') + assign_sanitize(self._wcs.wcs.csyer, count, + 'Chunk.position.axis.error1.syser') + assign_sanitize(self._wcs.wcs.csyer, count + 1, + 'Chunk.position.axis.error2.syser') self._finish_position(blueprint) self._wcs.wcs.cd = cd count += 2 if blueprint._time_axis_configed: self._axes['time'][1] = True self._axes['time'][0] = count - self._wcs.wcs.ctype[count] = self._attribute_lookup( - blueprint._get('Chunk.time.axis.axis.ctype'), root - ) - self._wcs.wcs.cunit[count] = self._attribute_lookup( - blueprint._get('Chunk.time.axis.axis.cunit'), root - ) + assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.time.axis.axis.ctype', False) + assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.time.axis.axis.cunit', False) array_shape[count] = self._attribute_lookup( blueprint._get('Chunk.time.axis.function.naxis'), root ) - self._wcs.wcs.crpix[count] = self._attribute_lookup( - blueprint._get('Chunk.time.axis.function.refCoord.pix'), root - ) - self._wcs.wcs.crval[count] = self._attribute_lookup( - blueprint._get('Chunk.time.axis.function.refCoord.val'), root - ) - self._wcs.wcs.crder[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.time.axis.error.rnder'), root - )) - self._wcs.wcs.csyer[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.time.axis.error.syser'), root - )) + assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.time.axis.function.refCoord.pix', False) + assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.time.axis.function.refCoord.val', False) + assign_sanitize(self._wcs.wcs.crder, count, + 'Chunk.time.axis.error.rnder') + assign_sanitize(self._wcs.wcs.csyer, count, + 'Chunk.time.axis.error.syser') self._finish_time(blueprint) count += 1 if blueprint._energy_axis_configed: self._axes['energy'][1] = True self._axes['energy'][0] = count - x = self._attribute_lookup( - blueprint._get('Chunk.energy.axis.axis.ctype'), root - ) - self._wcs.wcs.ctype[count] = x - self._wcs.wcs.cunit[count] = self._attribute_lookup( - blueprint._get('Chunk.energy.axis.axis.cunit'), root - ) + assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.energy.axis.axis.ctype', False) + assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.energy.axis.axis.cunit', False) array_shape[count] = self._attribute_lookup( blueprint._get('Chunk.energy.axis.function.naxis'), root ) - self._wcs.wcs.crpix[count] = self._attribute_lookup( - blueprint._get('Chunk.energy.axis.function.refCoord.pix'), root - ) - self._wcs.wcs.crval[count] = self._attribute_lookup( - blueprint._get('Chunk.energy.axis.function.refCoord.val'), root - ) - self._wcs.wcs.crder[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.energy.axis.error.rnder'), root - )) - self._wcs.wcs.csyer[count] = self._sanitize( - self._attribute_lookup( - blueprint._get('Chunk.energy.axis.error.syser'), root - )) + assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.energy.axis.function.refCoord.pix', False) + assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.energy.axis.function.refCoord.val', False) + assign_sanitize(self._wcs.wcs.crder, count, + 'Chunk.energy.axis.error.rnder') + assign_sanitize(self._wcs.wcs.csyer, count, + 'Chunk.energy.axis.error.syser') self._finish_energy(blueprint) count += 1 if blueprint._polarization_axis_configed: self._axes['polarization'][1] = True self._axes['polarization'][0] = count - self._wcs.wcs.ctype[count] = self._attribute_lookup( - blueprint._get('Chunk.polarization.axis.axis.ctype'), root - ) - self._wcs.wcs.cunit[count] = self._attribute_lookup( - blueprint._get('Chunk.polarization.axis.axis.cunit'), root - ) + assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.polarization.axis.axis.ctype', False) + assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.polarization.axis.axis.cunit', False) array_shape[count] = self._attribute_lookup( blueprint._get('Chunk.polarization.axis.function.naxis'), root ) - self._wcs.wcs.crpix[count] = self._attribute_lookup( - blueprint._get('Chunk.polarization.axis.function.refCoord.pix'), - root - ) - self._wcs.wcs.crval[count] = self._attribute_lookup( - blueprint._get('Chunk.polarization.axis.function.refCoord.val'), - root - ) + assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.polarization.axis.function.refCoord.pix', + False) + assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.polarization.axis.function.refCoord.val', + False) count += 1 # TODO - where's the delta? if blueprint._obs_axis_configed: self._axes['observable'][1] = True self._axes['observable'][0] = count - count += 1 - self._wcs.wcs.ctype[count] = self._attribute_lookup( - blueprint._get('Chunk.observable.axis.axis.ctype'), root - ) - self._wcs.wcs.cunit[count] = self._attribute_lookup( - blueprint._get('Chunk.observable.axis.axis.cunit'), root - ) + assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.observable.axis.axis.ctype', False) + assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.observable.axis.axis.cunit', False) array_shape[count] = 1.0 - self._wcs.wcs.crpix[count] = self._attribute_lookup( - blueprint._get('Chunk.observable.axis.function.refCoord.pix'), - root - ) + assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.observable.axis.function.refCoord.pix', + False) self._wcs.wcs.crval[count] = 0.0 + count += 1 if blueprint._custom_axis_configed: self._axes['custom'][1] = True self._axes['custom'][0] = count - count += 1 - self._wcs.wcs.ctype[count] = self._attribute_lookup( - blueprint._get('Chunk.custom.axis.axis.ctype'), root - ) - self._wcs.wcs.cunit[count] = self._attribute_lookup( - blueprint._get('Chunk.custom.axis.axis.cunit'), root - ) + assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.custom.axis.axis.ctype', False) + assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.custom.axis.axis.cunit', False) array_shape[count] = self._attribute_lookup( blueprint._get('Chunk.custom.axis.function.naxis'), root ) # TODO delta - self._wcs.wcs.crpix[count] = self._attribute_lookup( - blueprint._get('Chunk.custom.axis.function.refCoord.pix'), root - ) - self._wcs.wcs.crval[count] = self._attribute_lookup( - blueprint._get('Chunk.custom.axis.function.refCoord.val'), root - ) + assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.custom.axis.function.refCoord.pix', False) + assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.custom.axis.function.refCoord.val', False) + count += 1 - # logging.error(f'count is {count}') self._wcs.array_shape = array_shape - # logging.error(f'{self._wcs.array_shape}') - # logging.error(f'{self._wcs.wcs.radesys}') def _append_cd_value(self, cd, cd_value, count): prefix = [] @@ -4410,15 +4330,11 @@ def _append_cd_value(self, cd, cd_value, count): cd.append(prefix + [cd_value] + suffix) def _attribute_lookup(self, key, root): - # logging.error(f'key {key} root name {root.name}') if key is None: - # raise NotImplementedError - # why might this be the wrong decision?, because it means the - # blueprint lookup returned None, which is a valid value - # assignment, isn't it? + # the blueprint lookup returned None, which is a valid value + # assignment return None if isinstance(key, tuple): - # logging.error('path 10') result = None for ii in key[0]: result = self._attribute_lookup(ii, root) @@ -4431,45 +4347,33 @@ def _attribute_lookup(self, key, root): key = key.replace('//', '/') root = self._base if key.startswith('/'): - # logging.error('path 15') bits = key.split('/') - # logging.error(f'key {key} root.name {root.name} bits {bits}') if len(bits) == 2: - # logging.error('path 11') if '(' in bits[1]: - # logging.error('path 12') x = bits[1].split('(') if ':' in x[1]: - # logging.error('path 19') a = x[1].split(')')[0].split(':') if len(a) > 2: raise NotImplementedError y = root[x[0]][int(a[0])][int(a[1])] return y else: - # logging.error(f'path 20 {x}') index = int(x[1].split(')')[0]) - # logging.error(f'x {x[0]} index {index}') y = root[x[0]][index] - # logging.error(y) return y else: - # logging.error('path 13') return root[bits[1]] else: # the 2 is because there's always a leading slash, so the # first bit is an empty string temp = f'/{"/".join(ii for ii in bits[2:])}' - # logging.error(f'path 14 y{temp}y x{bits}x') return self._attribute_lookup(temp, root[bits[1]]) else: # a value has been set - # logging.error(f'path 17 {key}') if key == 'None': return None return key else: - # logging.error('path 30') return key def _finish_chunk_observable(self, chunk): @@ -4560,8 +4464,6 @@ def _finish_time(self, blueprint): blueprint._get('Chunk.time.mjdref'), self._base ) if x: - logging.error(f'xx{x}xx') - logging.error(type(x)) self._wcs.wcs.mjdref = x @@ -4962,10 +4864,8 @@ def caom2gen(): blueprints = {} if len(args.blueprint) == 1: # one blueprint to rule them all - # logging.error(f'one blueprint {args.lineage} {type(args.lineage)}') temp = ' '.join(ii for ii in args.lineage) if '.h5' in temp: - # logging.error('picking the correct one') blueprint = Hdf5ObsBlueprint(module=module) else: blueprint = ObsBlueprint(module=module) @@ -4987,7 +4887,7 @@ def caom2gen(): for i, cardinality in enumerate(args.lineage): product_id, uri = _extract_ids(cardinality) - logging.error('Loading blueprint for {} from {}'.format( + logging.debug('Loading blueprint for {} from {}'.format( uri, args.blueprint[i])) if '.h5' in uri: blueprint = Hdf5ObsBlueprint(module=module) diff --git a/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml b/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml index 294cd584..0b93b2ee 100644 --- a/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml +++ b/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml @@ -32,7 +32,6 @@ MEGAPIPE 2.0 CADC - None http://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/en/megapipe/ 2018-03-14T16:36:50.000 diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint index ac11cd8e..98f209a7 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos.blueprint @@ -36,7 +36,6 @@ Chunk.time.axis.range.start.pix = 0 Chunk.time.axis.range.start.val = [//header/timeseries/mjdrunstart] Chunk.time.axis.range.end.pix = get_time_axis_range_end() Chunk.time.axis.range.end.val = [//header/timeseries/mjdrunend] -Chunk.time.axis.function.naxis = [//header/timeseries/numepochs] Chunk.time.axis.error.rnder = None Chunk.time.axis.error.syser = None Chunk.time.exposure = get_exposure() From 3c23c56483444b63e7683c4f52f58d698518e144 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Mon, 14 Mar 2022 12:58:45 -0700 Subject: [PATCH 09/38] CADC-10809 - address the naming bits of Adrian's design review comments. --- caom2utils/README.rst | 7 +- caom2utils/caom2utils/caom2blueprint.py | 218 ++++++++--------- caom2utils/caom2utils/legacy.py | 2 +- .../20220201T200117/20220201T200117.xml | 228 ++++++++++++++++++ .../caom2utils/tests/test_fits2caom2.py | 18 +- .../caom2utils/tests/test_obs_blueprint.py | 48 ++-- doc/user/script_description.md | 62 +++-- 7 files changed, 417 insertions(+), 166 deletions(-) create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml diff --git a/caom2utils/README.rst b/caom2utils/README.rst index 8166faa8..7465184e 100755 --- a/caom2utils/README.rst +++ b/caom2utils/README.rst @@ -102,7 +102,7 @@ caom2gen (-i IN_OBS_XML | --observation collection observationID) [--local LOCAL [LOCAL ...]] [--external_url EXTERNAL_URL [EXTERNAL_URL ...]] [--module MODULE] [--plugin PLUGIN] [--lineage LINEAGE [LINEAGE ...]] - [--use_generic_parser USE_GENERIC_PARSER [USE_GENERIC_PARSER ...]] --blueprint BLUEPRINT [BLUEPRINT ...] + [--use_blueprint_parser USE_BLUEPRINT_PARSER [USE_BLUEPRINT_PARSER ...]] --blueprint BLUEPRINT [BLUEPRINT ...] Augments an observation with information in one or more fits files. @@ -163,10 +163,11 @@ caom2gen -q, --quiet run quietly --resource-id RESOURCE_ID resource identifier (default ivo://cadc.nrc.ca/fits2caom2) - --use_generic_parser USE_GENERIC_PARSER [USE_GENERIC_PARSER ...] + --use_blueprint_parser USE_BLUEPRINT_PARSER [USE_BLUEPRINT_PARSER ...] productID/artifactURI. List of lineage entries that will be processed with a - GenericParser. Good for non-fits files. + BlueprintParser. Good for files with no + metadata in the content. -u, --user USER name of user to authenticate. Note: application prompts for the corresponding password! diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 5538914c..52095bb4 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -113,10 +113,10 @@ APP_NAME = 'caom2gen' -__all__ = ['BlueprintParser', 'FitsParser', 'FitsWcsParser', +__all__ = ['ContentParser', 'FitsParser', 'FitsWcsParser', 'DispatchingFormatter', 'ObsBlueprint', 'get_arg_parser', 'proc', 'POLARIZATION_CTYPES', 'gen_proc', 'get_gen_proc_arg_parser', - 'GenericParser', 'augment', 'get_vos_headers', + 'BlueprintParser', 'augment', 'get_vos_headers', 'get_external_headers', 'HDF5Parser', 'Hdf5ObsBlueprint', 'Hdf5WcsParser', 'update_artifact_meta'] @@ -250,7 +250,7 @@ class ObsBlueprint: ob.set('Observation.algorithm.name', 'exposure') ob.set_fits_attribute('Chunk.energy.axis.axis.ctype', ['MYCTYPE'], extension=1) - ob.add_fits_attribute('Chunk.energy.axis.axis.ctype', 'MYCTYPE2', + ob.add_attribute('Chunk.energy.axis.axis.ctype', 'MYCTYPE2', extension=1) ob.set('Chunk.energy.velang', 33, extension=1) ob.set_default('Chunk.position.coordsys', 'RA-DEC', extension=1) @@ -1216,13 +1216,13 @@ def set(self, caom2_element, value, extension=0): else: self._plan[caom2_element] = value - def add_fits_attribute(self, caom2_element, fits_attribute, extension=0): + def add_attribute(self, caom2_element, attribute, extension=0): """ - Adds a FITS attribute in the list of other FITS attributes associated + Adds an attribute in the list of other attributes associated with an caom2 element. :param caom2_element: name CAOM2 element (as in ObsBlueprint.CAOM2_ELEMEMTS) - :param fits_attribute: name of FITS attribute the element is mapped to + :param attribute: name of attribute the element is mapped to :param extension: extension number (used only for Chunk elements) :raises AttributeError if the caom2 element has already an associated value or KeyError if the caom2 element does not exists. @@ -1238,29 +1238,27 @@ def add_fits_attribute(self, caom2_element, fits_attribute, extension=0): if caom2_element in self._extensions[extension]: if (isinstance(self._extensions[extension][caom2_element], tuple)): - if (fits_attribute not in + if (attribute not in self._extensions[extension][caom2_element][0]): self._extensions[extension][caom2_element][0].\ - insert(0, fits_attribute) + insert(0, attribute) else: raise AttributeError( - ('No FITS attributes in extension {} associated ' - 'with keyword {}').format(extension, - caom2_element)) + (f'No attributes in extension {extension} associated ' + 'with keyword {caom2_element}')) else: self._extensions[extension][caom2_element] = \ - ([fits_attribute], None) + ([attribute], None) else: if caom2_element in self._plan: if isinstance(self._plan[caom2_element], tuple): - if fits_attribute not in self._plan[caom2_element][0]: - self._plan[caom2_element][0].insert(0, fits_attribute) + if attribute not in self._plan[caom2_element][0]: + self._plan[caom2_element][0].insert(0, attribute) else: - raise AttributeError( - 'No FITS attributes associated with keyword {}'. - format(caom2_element)) + raise AttributeError(f'No attributes associated with ' + f'keyword {caom2_element}') else: - self._plan[caom2_element] = ([fits_attribute], None) + self._plan[caom2_element] = ([attribute], None) def add_table_attribute(self, caom2_element, ttype_attribute, extension=0, index=0): @@ -1318,13 +1316,13 @@ def add_table_attribute(self, caom2_element, ttype_attribute, extension=0, def set_default(self, caom2_element, default, extension=0): """ - Sets the default value of a caom2 element that is associated with FITS + Sets the default value of a caom2 element that is associated with attributes. If the element does not exist or does not have a list of - associated FITS attributes, default is set as the associated value + associated attributes, default is set as the associated value of the element. - If set_fits_attribute is called for the same caom2_element after this, - the default value will be reset to None. + If set is called for the same caom2_element after this, the default + value will be reset to None. :param caom2_element: name CAOM2 element (as in ObsBlueprint.CAOM2_ELEMEMTS) @@ -1405,7 +1403,7 @@ def _get(self, caom2_element, extension=0): :param caom2_element: name CAOM2 element (as in ObsBlueprint.CAOM2_ELEMEMTS) :param extension: extension number - :return: Tuple of the form (list_of_associated_fits_attributes, + :return: Tuple of the form (list_of_associated_attributes, default_value) OR the actual value associated with the CAOM2 element """ ObsBlueprint.check_caom2_element(caom2_element) @@ -1415,7 +1413,7 @@ def _get(self, caom2_element, extension=0): (caom2_element in self._extensions[extension]): return self._extensions[extension][caom2_element] - # look in the generic plan + # look in the minimal plan if caom2_element not in self._plan: return None else: @@ -1566,7 +1564,7 @@ def __init__(self, position_axes=None, energy_axis=None, def configure_custom_axis(self, axis, override=True): """ - Set the expected FITS custom keywords by index in the blueprint + Set the expected custom keywords by index in the blueprint and the wcs_std lookup. :param axis: The index expected for the custom axis. @@ -1597,7 +1595,7 @@ def configure_custom_axis(self, axis, override=True): def configure_position_axes(self, axes, override=True): """ - Set the expected FITS spatial keywords by indices in the blueprint and + Set the expected spatial keywords by indices in the blueprint and the wcs_std lookup. :param axes: The index expected for the position axes. @@ -1714,7 +1712,7 @@ def configure_energy_axis(self, axis, override=True): def configure_polarization_axis(self, axis, override=True): """ - Set the expected FITS polarization keywords by index in the blueprint + Set the expected polarization keywords by index in the blueprint and the wcs_std lookup. :param axis: The index expected for the polarization axis. @@ -1748,7 +1746,7 @@ def configure_polarization_axis(self, axis, override=True): def configure_observable_axis(self, axis, override=True): """ - Set the expected FITS observable keywords by index in the blueprint + Set the expected observable keywords by index in the blueprint and the wcs_std lookup. Note: observable axis is not a standard WCS and it's not used by astropy.wcs so, arguably, it can be removed. It is here for now for @@ -1775,7 +1773,7 @@ def configure_observable_axis(self, axis, override=True): def configure_time_axis(self, axis, override=True): """ - Set the expected FITS time keywords by index in the blueprint and + Set the expected time keywords by index in the blueprint and the wcs_std lookup. :param axis: The index expected for the time axis. @@ -1828,7 +1826,7 @@ def configure_time_axis(self, axis, override=True): self._time_axis_configed = True -class GenericParser: +class BlueprintParser: """ Extract CAOM2 metadata from files with no WCS information. """ @@ -1880,8 +1878,7 @@ def augment_observation(self, observation, artifact_uri, product_id=None): :param product_id: the key for finding for the plane to augment """ self.logger.debug( - 'Begin generic CAOM2 observation augmentation for URI {}.'.format( - artifact_uri)) + f'Begin CAOM2 observation augmentation for URI {artifact_uri}.') if observation is None or not isinstance(observation, Observation): raise ValueError( f'Observation type mis-match for {observation}.') @@ -1915,8 +1912,7 @@ def augment_observation(self, observation, artifact_uri, product_id=None): observation.planes[product_id] = plane self.augment_plane(plane, artifact_uri) self.logger.debug( - 'End generic CAOM2 observation augmentation for {}.'.format( - artifact_uri)) + f'End CAOM2 observation augmentation for {artifact_uri}.') def augment_plane(self, plane, artifact_uri): """ @@ -1925,8 +1921,7 @@ def augment_plane(self, plane, artifact_uri): :param artifact_uri: """ self.logger.debug( - 'Begin generic CAOM2 plane augmentation for {}.'.format( - artifact_uri)) + f'Begin CAOM2 plane augmentation for {artifact_uri}.') if plane is None or not isinstance(plane, Plane): raise ValueError(f'Plane type mis-match for {plane}') @@ -1956,16 +1951,14 @@ def augment_plane(self, plane, artifact_uri): plane.artifacts[artifact_uri] = artifact self.augment_artifact(artifact, 0) self.logger.debug( - 'End generic CAOM2 plane augmentation for {}.'.format( - artifact_uri)) + f'End CAOM2 plane augmentation for {artifact_uri}.') def augment_artifact(self, artifact, index): """ - Augments a given CAOM2 artifact with available FITS information + Augments a given CAOM2 artifact with available information :param artifact: existing CAOM2 artifact to be augmented """ - self.logger.debug('Begin generic CAOM2 artifact augmentation for ' - '{}.'.format(self.uri)) + self.logger.debug(f'Begin CAOM2 artifact augmentation for {self.uri}.') if artifact is None or not isinstance(artifact, Artifact): raise ValueError( f'Artifact type mis-match for {artifact}') @@ -1989,9 +1982,7 @@ def augment_artifact(self, artifact, index): current=artifact.content_read_groups) artifact.meta_producer = self._get_from_list( 'Artifact.metaProducer', index=0, current=artifact.meta_producer) - self.logger.debug( - 'End generic CAOM2 artifact augmentation for {}.'.format( - self.uri)) + self.logger.debug(f'End CAOM2 artifact augmentation for {self.uri}.') def _get_from_list(self, lookup, index, current=None): value = None @@ -2180,7 +2171,7 @@ def _get_datetime(self, from_value): return None -class BlueprintParser(GenericParser): +class ContentParser(BlueprintParser): def __init__(self, obs_blueprint=None, uri=None): super().__init__(obs_blueprint, uri) @@ -2192,16 +2183,18 @@ def _get_chunk_naxis(self, chunk, index): def augment_artifact(self, artifact, index): """ - Augments a given CAOM2 artifact with available FITS information + Augments a given CAOM2 artifact with available content information :param artifact: existing CAOM2 artifact to be augmented """ super().augment_artifact(artifact, index) - self.logger.debug(f'Begin artifact augmentation for {artifact.uri}') + self.logger.debug( + f'Begin content artifact augmentation for {artifact.uri}') if self.blueprint.get_configed_axes_count() == 0: raise TypeError( - f'No WCS Data. End artifact augmentation for {artifact.uri}.') + f'No WCS Data. End content artifact augmentation for ' + f'{artifact.uri}.') if self.ignore_chunks(artifact, index): return @@ -2259,19 +2252,18 @@ def augment_artifact(self, artifact, index): self._try_range_with_blueprint(chunk, index) self.logger.debug( - f'End artifact augmentation for {artifact.uri}.') + f'End content artifact augmentation for {artifact.uri}.') def augment_observation(self, observation, artifact_uri, product_id=None): """ - Augments a given observation with available FITS information. + Augments a given observation with available content information. :param observation: existing CAOM2 observation to be augmented. :param artifact_uri: the key for finding the artifact to augment :param product_id: the key for finding for the plane to augment """ super().augment_observation(observation, artifact_uri, product_id) self.logger.debug( - 'Begin observation augmentation for URI {}.'.format( - artifact_uri)) + f'Begin content observation augmentation for URI {artifact_uri}.') members = self._get_members(observation) if members: if isinstance(members, TypedSet): @@ -2308,17 +2300,17 @@ def augment_observation(self, observation, artifact_uri, product_id=None): observation.environment = self._get_environment( observation.environment) self.logger.debug( - f'End observation augmentation for {artifact_uri}.') + f'End content observation augmentation for {artifact_uri}.') def augment_plane(self, plane, artifact_uri): """ - Augments a given plane with available FITS information. + Augments a given plane with available content information. :param plane: existing CAOM2 plane to be augmented. :param artifact_uri: """ super().augment_plane(plane, artifact_uri) self.logger.debug( - f'Begin plane augmentation for {artifact_uri}.') + f'Begin content plane augmentation for {artifact_uri}.') plane.meta_release = self._get_datetime(self._get_from_list( 'Plane.metaRelease', index=0, current=plane.meta_release)) @@ -2338,14 +2330,15 @@ def augment_plane(self, plane, artifact_uri): plane.quality = self._get_quality(current=plane.quality) self.logger.debug( - f'End plane augmentation for {artifact_uri}.') + f'End content plane augmentation for {artifact_uri}.') def _content_lookup(self, key, extension=None): raise NotImplementedError def _get_algorithm(self, obs): """ - Create an Algorithm instance populated with available FITS information. + Create an Algorithm instance populated with available content + information. :return: Algorithm """ self.logger.debug('Begin Algorithm augmentation.') @@ -2358,7 +2351,7 @@ def _get_algorithm(self, obs): def _get_energy_transition(self, current): """ - Create an EnergyTransition instance populated with available FITS + Create an EnergyTransition instance populated with available content information. :return: EnergyTransition """ @@ -2377,7 +2370,7 @@ def _get_energy_transition(self, current): def _get_environment(self, current): """ - Create an Environment instance populated with available FITS + Create an Environment instance populated with available content information. :current Environment instance, if one already exists in the Observation @@ -2427,7 +2420,7 @@ def _get_from_list(self, lookup, index, current=None): except KeyError: self.add_error(lookup, sys.exc_info()[1]) self.logger.debug( - f'Could not find {lookup!r} in fits2caom2 configuration.') + f'Could not find {lookup!r} in caom2blueprint configuration.') if current: self.logger.debug( f'{lookup}: using current value of {current!r}.') @@ -2488,7 +2481,7 @@ def _get_from_list(self, lookup, index, current=None): def _get_instrument(self, current): """ - Create an Instrument instance populated with available FITS + Create an Instrument instance populated with available content information. :return: Instrument """ @@ -2501,7 +2494,7 @@ def _get_instrument(self, current): instr = None if name: instr = Instrument(str(name)) - FitsParser._add_keywords(keywords, current, instr) + ContentParser._add_keywords(keywords, current, instr) self.logger.debug('End Instrument augmentation.') return instr @@ -2566,7 +2559,7 @@ def _get_members(self, obs): def _get_metrics(self, current): """ - Create a Metrics instance populated with available FITS information. + Create a Metrics instance populated with available content information. :return: Metrics """ self.logger.debug('Begin Metrics augmentation.') @@ -2669,7 +2662,7 @@ def _get_naxis(self, label, index): def _get_observable(self, current): """ - Create a Observable instance populated with available FITS information. + Create a Observable instance populated with available content information. :return: Observable """ self.logger.debug('Begin Observable augmentation.') @@ -2682,7 +2675,7 @@ def _get_observable(self, current): def _get_proposal(self, current): """ - Create a Proposal instance populated with available FITS information. + Create a Proposal instance populated with available content information. :return: Proposal """ self.logger.debug('Begin Proposal augmentation.') @@ -2703,13 +2696,13 @@ def _get_proposal(self, current): proposal = current if prop_id: proposal = Proposal(str(prop_id), pi, project, title) - FitsParser._add_keywords(keywords, current, proposal) + ContentParser._add_keywords(keywords, current, proposal) self.logger.debug(f'End Proposal augmentation {prop_id}.') return proposal def _get_provenance(self, current): """ - Create a Provenance instance populated with available FITS information. + Create a Provenance instance populated with available Content information. :return: Provenance """ self.logger.debug('Begin Provenance augmentation.') @@ -2747,7 +2740,7 @@ def _get_provenance(self, current): if name: prov = Provenance(name, p_version, project, producer, run_id, reference, last_executed) - FitsParser._add_keywords(keywords, current, prov) + ContentParser._add_keywords(keywords, current, prov) if inputs: if isinstance(inputs, TypedSet): for i in inputs: @@ -2764,7 +2757,7 @@ def _get_provenance(self, current): def _get_quality(self, current): """ - Create a Quality instance populated with available FITS information. + Create a Quality instance populated with available content information. :return: Quality """ self.logger.debug('Begin Quality augmentation.') @@ -2777,7 +2770,7 @@ def _get_quality(self, current): def _get_requirements(self, current): """ - Create a Requirements instance populated with available FITS + Create a Requirements instance populated with available content information. :return: Requirements """ @@ -2797,8 +2790,7 @@ def _get_set_from_list(self, lookup, index): except KeyError: self.add_error(lookup, sys.exc_info()[1]) self.logger.debug( - 'Could not find \'{}\' in fits2caom2 configuration.'.format( - lookup)) + f'Could not find \'{lookup}\' in caom2blueprint configuration.') if isinstance(keywords, tuple): for ii in keywords[0]: @@ -2821,7 +2813,7 @@ def _get_set_from_list(self, lookup, index): def _get_target(self, current): """ - Create a Target instance populated with available FITS information. + Create a Target instance populated with available content information. :return: Target """ self.logger.debug('Begin Target augmentation.') @@ -2850,13 +2842,13 @@ def _get_target(self, current): if name: target = Target(str(name), target_type, standard, redshift, moving=moving, target_id=target_id) - FitsParser._add_keywords(keywords, current, target) + ContentParser._add_keywords(keywords, current, target) self.logger.debug('End Target augmentation.') return target def _get_target_position(self, current): """ - Create a Target Position instance populated with available FITS + Create a Target Position instance populated with available content information. :return: Target Position """ @@ -2883,7 +2875,7 @@ def _get_target_position(self, current): def _get_telescope(self, current): """ - Create a Telescope instance populated with available FITS information. + Create a Telescope instance populated with available content information. :return: Telescope """ self.logger.debug('Begin Telescope augmentation.') @@ -2907,7 +2899,7 @@ def _get_telescope(self, current): aug_tel = None if name: aug_tel = Telescope(str(name), geo_x, geo_y, geo_z) - FitsParser._add_keywords(keywords, current, aug_tel) + ContentParser._add_keywords(keywords, current, aug_tel) self.logger.debug('End Telescope augmentation.') return aug_tel @@ -3168,8 +3160,8 @@ def _try_range_return(self, index, lookup): def _try_range_with_blueprint(self, chunk, index): """Use the blueprint to set elements and attributes that - are not in the scope of astropy and fits, and therefore are not - covered by the FitsWcsParser class. Per PD 19/04/18, bounds and + are not in the scope of astropy and files content, and therefore are + not covered by the *WcsParser classes. Per PD 19/04/18, bounds and range are not covered by WCS keywords.""" for i in ['energy', 'time', 'polarization']: @@ -3265,7 +3257,7 @@ def _add_keywords(keywords, current, to_set): to_set.keywords.remove('none') -class FitsParser(BlueprintParser): +class FitsParser(ContentParser): """ Parses a FITS file and extracts the CAOM2 related information which can be used to augment an existing CAOM2 observation, plane or artifact. The @@ -3467,7 +3459,7 @@ def apply_blueprint(self): # TODO When a projection is specified, wcslib expects corresponding # DP arguments with NAXES attributes. Normally, omitting the attribute - # signals no distortion which is the assumption in fits2caom2 for + # signals no distortion which is the assumption in caom2blueprint for # energy and polarization axes. Following is a workaround for # SIP projections. # For more details see: @@ -3601,7 +3593,7 @@ def _has_data_array(header): return True -class HDF5Parser(BlueprintParser): +class HDF5Parser(ContentParser): def __init__( self, obs_blueprint, uri, local_f_name, find_roots_here='/sitedata' @@ -3749,9 +3741,7 @@ def augment_energy(self, chunk): chunk.energy.specsys = specsys chunk.energy.ssysobs = _to_str(self._sanitize(self.wcs.ssysobs)) - # TODO not sure why, but wcs returns 0.0 when the FITS keywords - # for the following two keywords are actually not present in - # the header + # wcs returns 0.0 by default if self._sanitize(self.wcs.restfrq) != 0: chunk.energy.restfrq = self._sanitize(self.wcs.restfrq) if self._sanitize(self.wcs.restwav) != 0: @@ -3796,14 +3786,6 @@ def augment_temporal(self, chunk): """ Augments a chunk with temporal WCS information - The expected caom2 - FITS keywords mapping is: - - time.exposure = EXPTIME - time.resolution = TIMEDEL - time.timesys = TIMESYS default UTC - time.trefpos = TREFPOS - time.mjdref = MJDREF | MJDDATE - :param chunk: :return: """ @@ -4028,7 +4010,7 @@ def _get_spatial_axis(self, xindex, yindex): def _sanitize(self, value): """ - Sanitizes values from FITS to caom2 + Sanitizes values from content to caom2 :param value: :return: """ @@ -4086,6 +4068,15 @@ def _finish_chunk_observable(self, chunk): Slice(self._get_axis(0, ctype, cunit), pix_bin)) def _finish_chunk_time(self, chunk): + """ + The expected caom2 - FITS keywords mapping is: + + time.exposure = EXPTIME + time.resolution = TIMEDEL + time.timesys = TIMESYS default UTC + time.trefpos = TREFPOS + time.mjdref = MJDREF | MJDDATE + """ chunk.time.exposure = _to_float(self.header.get('EXPTIME')) chunk.time.resolution = _to_float(self.header.get('TIMEDEL')) chunk.time.timesys = str(self.header.get('TIMESYS', 'UTC')) @@ -4688,7 +4679,7 @@ def _extract_ids(cardinality): def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, validate_wcs=True, plugin=None, local=None, - external_url=None, connected=True, use_generic_parser=False, + external_url=None, connected=True, use_blueprint_parser=False, client=None, **kwargs): """ Find or construct a plane and an artifact to go with the observation @@ -4726,10 +4717,10 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, meta_uri = uri visit_local = None - if use_generic_parser: + if use_blueprint_parser: logging.debug( - f'Using a GenericParser as requested for {uri}') - parser = GenericParser(blueprint, uri=uri) + f'Using a BlueprintParser as requested for {uri}') + parser = BlueprintParser(blueprint, uri=uri) elif local: if uri.startswith('vos'): if '.fits' in local or '.fits.gz' in local: @@ -4740,8 +4731,8 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, parser = FitsParser(headers, blueprint, uri=uri) elif '.csv' in local: logging.debug( - f'Using a GenericParser for vos local {local}') - parser = GenericParser(blueprint, uri=uri) + f'Using a BlueprintParser for vos local {local}') + parser = BlueprintParser(blueprint, uri=uri) else: raise ValueError(f'Unexpected file type {local}') else: @@ -4759,16 +4750,16 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, parser = HDF5Parser(blueprint, uri, local) else: # explicitly ignore headers for txt and image files - logging.debug(f'Using a GenericParser for {local}') - parser = GenericParser(blueprint, uri=uri) + logging.debug(f'Using a BlueprintParser for {local}') + parser = BlueprintParser(blueprint, uri=uri) elif external_url: headers = get_external_headers(external_url) if headers is None: logging.debug( - 'Using a GenericParser for un-retrievable remote headers ' + 'Using a BlueprintParser for un-retrievable remote headers ' '{}'.format(uri) ) - parser = GenericParser(blueprint, uri=uri) + parser = BlueprintParser(blueprint, uri=uri) else: logging.debug( f'Using a FitsParser for remote headers {uri}') @@ -4786,8 +4777,8 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, else: # explicitly ignore headers for txt and image files logging.debug( - f'Using a GenericParser for remote file {uri}') - parser = GenericParser(blueprint, uri=uri) + f'Using a BlueprintParser for remote file {uri}') + parser = BlueprintParser(blueprint, uri=uri) if parser is None: result = None @@ -5241,13 +5232,13 @@ def gen_proc(args, blueprints, **kwargs): if args.external_url: external_url = args.external_url[ii] - use_generic_parser = False - if args.use_generic_parser: - use_generic_parser = uri in args.use_generic_parser + use_blueprint_parser = False + if args.use_blueprint_parser: + use_blueprint_parser = uri in args.use_blueprint_parser obs = _augment(obs, product_id, uri, blueprint, subject, args.dumpconfig, validate_wcs, args.plugin, file_name, - external_url, connected, use_generic_parser, client, + external_url, connected, use_blueprint_parser, client, **kwargs) if obs is None: @@ -5303,10 +5294,11 @@ def get_gen_proc_arg_parser(): help=('productID/artifactURI. List of plane/artifact ' 'identifiers that will be' 'created for the identified observation.')) - parser.add_argument('--use_generic_parser', nargs='+', + parser.add_argument('--use_blueprint_parser', nargs='+', help=('productID/artifactURI. List of lineage entries ' - 'that will be processed with a GenericParser. ' - 'Good for non-fits files.')) + 'that will be processed with a BlueprintParser. ' + 'Good for files with no metadata in the ' + 'content.')) return parser diff --git a/caom2utils/caom2utils/legacy.py b/caom2utils/caom2utils/legacy.py index d676200f..298fc9a3 100755 --- a/caom2utils/caom2utils/legacy.py +++ b/caom2utils/caom2utils/legacy.py @@ -447,7 +447,7 @@ def update_blueprint(obs_blueprint, artifact_uri=None, config=None, # assume FITS keywords, in the 0th extension, # and add them to the blueprint for caom2_key in convert.get_caom2_elements(key): - obs_blueprint.add_fits_attribute(caom2_key, value) + obs_blueprint.add_attribute(caom2_key, value) except ValueError: errors.append(f'{key}: {sys.exc_info()[1]}') logging.debug( diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml new file mode 100644 index 00000000..a66841a4 --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml @@ -0,0 +1,228 @@ + + + TAOSII + 20220201T200117 + 2018-05-21T02:07:22.0 + 129 + + exposure + + OBJECT + science + + 4239531 + + + FK5 + 2000.0 + + 75.63541666666666 + 20.64233888888889 + + + + + star04239531 + timeseries + 2 + + + cadc:TAOSII/taos2_20220201T201317Z_star04239531.h5 + science + data + application/x-hdf5 + 70014256 + md5:5a28f24e62324c1a12ff76a46c59bc54 + + + 0 + + + 1 + 2 + 3 + + + + RA---TAN-SIP + rad + + + DEC--TAN-SIP + rad + + + + 1920 + 4608 + + + + -284.445770264 + 1.3028748425170718 + + + -2009.44871521 + 0.36083190511928515 + + + -3.056099929280465e-06 + 5.26221921634137e-09 + -8.906590162255861e-09 + -3.0546760780702482e-06 + + + 2000.0 + + + + + TIME + s + + + + 0.0 + 59611.84305555555 + + + 143999.0 + 59611.92638888889 + + + + 0.08333333333575865 + + + + + + 1 + + + 1 + 2 + 3 + + + + RA---TAN-SIP + rad + + + DEC--TAN-SIP + rad + + + + 1920 + 4608 + + + + -279.44511413600003 + 1.3028748457982908 + + + -2001.461093903 + 0.3608318611649132 + + + -3.056099929280465e-06 + 5.26221921634137e-09 + -8.906590162255861e-09 + -3.0546760780702482e-06 + + + 2000.0 + + + + + TIME + s + + + + 0.0 + 59611.84305555555 + + + 143999.0 + 59611.92638888889 + + + + 0.08333333333575865 + + + + + + 2 + + + 1 + 2 + 3 + + + + RA---TAN-SIP + rad + + + DEC--TAN-SIP + rad + + + + 1920 + 4608 + + + + -288.449661255 + 1.3028749225543808 + + + -2014.4454422 + 0.36083189800706844 + + + -3.056099929280465e-06 + 5.26221921634137e-09 + -8.906590162255861e-09 + -3.0546760780702482e-06 + + + 2000.0 + + + + + TIME + s + + + + 0.0 + 59611.84305555555 + + + 143999.0 + 59611.92638888889 + + + + 0.08333333333575865 + + + + + + + + + + diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index e7b1ad31..bf2baa05 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -72,9 +72,9 @@ from cadcutils import net from cadcdata import FileInfo from caom2utils import FitsParser, FitsWcsParser, main_app, update_blueprint -from caom2utils import HDF5Parser, Hdf5WcsParser, BlueprintParser +from caom2utils import HDF5Parser, Hdf5WcsParser, ContentParser from caom2utils import Hdf5ObsBlueprint -from caom2utils import ObsBlueprint, GenericParser, gen_proc +from caom2utils import ObsBlueprint, BlueprintParser, gen_proc from caom2utils import get_gen_proc_arg_parser, augment from caom2utils.legacy import load_config from caom2utils.caom2blueprint import _visit, _load_plugin @@ -685,7 +685,7 @@ def test_augment_observation(): def test_augment_value_errors(): ob = ObsBlueprint(position_axes=(1, 2)) ob.set('Plane.productID', None) - test_parser = GenericParser(obs_blueprint=ob) + test_parser = BlueprintParser(obs_blueprint=ob) test_obs = SimpleObservation('collection', 'MA1_DRAO-ST', Algorithm('exposure')) with pytest.raises(ValueError): @@ -1046,7 +1046,7 @@ def _get_obs(from_xml_string): def test_generic_parser(): - """ Tests that GenericParser will be created.""" + """ Tests that BlueprintParser will be created.""" fname = f'file://{text_file}' with patch('sys.stdout', new_callable=BytesIO) as stdout_mock, \ @@ -1274,7 +1274,7 @@ def test_visit_generic_parser(): try: sys.argv = ['fits2caom2', '--local', 'fname', '--observation', 'test_collection_id', 'test_observation_id'] - test_parser = GenericParser() + test_parser = BlueprintParser() test_plugin = __name__ kwargs = {} test_obs = SimpleObservation(collection='test_collection', @@ -1336,7 +1336,7 @@ def test_generic_parser1(): test_blueprint = ObsBlueprint() test_blueprint.set(test_key, '2013-10-10') logging.error(test_blueprint) - test_parser = GenericParser() + test_parser = BlueprintParser() assert test_parser._blueprint._plan[test_key] == \ (['RELEASE', 'REL_DATE'], None), 'default value changed' test_parser.blueprint = test_blueprint @@ -1398,7 +1398,7 @@ def test_apply_blueprint(): test_blueprint.set_default('Plane.provenance.producer', 'abc') assert test_blueprint._get('Plane.provenance.producer') == (['ORIGIN'], 'abc') - test_blueprint.add_fits_attribute('Plane.provenance.producer', 'IMAGESWV') + test_blueprint.add_attribute('Plane.provenance.producer', 'IMAGESWV') assert test_blueprint._get('Plane.provenance.producer') == (['IMAGESWV', 'ORIGIN'], 'abc') @@ -1488,7 +1488,7 @@ def get_time_exposure(self, ext): test_blueprint2.configure_time_axis(1) test_blueprint2.set('Plane.calibrationLevel', 'getCalibrationLevel()') test_blueprint2.set('Plane.dataProductType', 'broken_function()') - test_parser2 = GenericParser(obs_blueprint=test_blueprint2) + test_parser2 = BlueprintParser(obs_blueprint=test_blueprint2) test_obs2 = SimpleObservation('collection', 'MA1_DRAO-ST', Algorithm('exposure')) with pytest.raises(ValueError): @@ -1503,7 +1503,7 @@ def test_apply_blueprint_execute_external(): # generic parser - function execution should have occurred, the return # value is dependent on the parameters to the call - test_gp = GenericParser(test_generic_blueprint) + test_gp = BlueprintParser(test_generic_blueprint) assert test_gp is not None, 'expect generic construction to complete' assert test_gp._get_from_list('Observation.type', index=0) \ == 'generic_parser_value', 'wrong generic plan value' diff --git a/caom2utils/caom2utils/tests/test_obs_blueprint.py b/caom2utils/caom2utils/tests/test_obs_blueprint.py index ad2bc6f0..7917854d 100644 --- a/caom2utils/caom2utils/tests/test_obs_blueprint.py +++ b/caom2utils/caom2utils/tests/test_obs_blueprint.py @@ -115,7 +115,7 @@ def test_obs_blueprint(): # set default ob.clear('Observation.instrument.keywords') - ob.add_fits_attribute('Observation.instrument.keywords', 'INSTMODE') + ob.add_attribute('Observation.instrument.keywords', 'INSTMODE') assert "Observation.instrument.keywords = ['INSTMODE'], default = None" \ in str(ob) ob.set_default('Observation.instrument.keywords', 'TEST') @@ -125,8 +125,8 @@ def test_obs_blueprint(): in str(ob) # set fits attribute - ob.add_fits_attribute('Observation.proposal.id', 'PROP') - ob.add_fits_attribute('Observation.proposal.id', 'PROP2') + ob.add_attribute('Observation.proposal.id', 'PROP') + ob.add_attribute('Observation.proposal.id', 'PROP2') ob.set_default('Observation.proposal.id', 'NOPROP') assert ob._plan['Observation.proposal.id'][0] == ['PROP2', 'PROP', 'RUNID'] assert ob._plan['Observation.proposal.id'][1] == 'NOPROP' @@ -143,9 +143,9 @@ def test_obs_blueprint(): assert 'Chunk.energy.velang = 33' in extension1_str # set fits attribute in extension - ob.add_fits_attribute('Chunk.energy.axis.axis.ctype', 'MYCTYPE', + ob.add_attribute('Chunk.energy.axis.axis.ctype', 'MYCTYPE', extension=1) - ob.add_fits_attribute('Chunk.energy.axis.axis.ctype', 'MYCTYPE2', + ob.add_attribute('Chunk.energy.axis.axis.ctype', 'MYCTYPE2', extension=1) ob.set_default('Chunk.energy.axis.axis.ctype', 'NOCTYPE', extension=1) extension1_str = str(ob)[str(ob).index('extension 1'):] @@ -213,7 +213,7 @@ def test_obs_blueprint(): with pytest.raises(KeyError): ob.set('Nonexistent', 33) with pytest.raises(KeyError): - ob.add_fits_attribute('Nonexistent', 33) + ob.add_attribute('Nonexistent', 33) with pytest.raises(KeyError): ob.set_default('Nonexistent', 33) with pytest.raises(KeyError): @@ -227,7 +227,7 @@ def test_obs_blueprint(): with pytest.raises(KeyError): ob.set('Chunk.Nonexistent', 33, extension=1) with pytest.raises(KeyError): - ob.add_fits_attribute('Chunk.Nonexistent', 33, extension=1) + ob.add_attribute('Chunk.Nonexistent', 33, extension=1) with pytest.raises(KeyError): ob.set_default('Chunk.Nonexistent', 33, extension=1) with pytest.raises(KeyError): @@ -243,7 +243,7 @@ def test_obs_blueprint(): with pytest.raises(ValueError): ob.set_default('Observation.observationID', 33, extension=1) with pytest.raises(ValueError): - ob.add_fits_attribute('Observation.observationID', 'AA', extension=1) + ob.add_attribute('Observation.observationID', 'AA', extension=1) with pytest.raises(ValueError): ob.delete('Observation.observationID', extension=1) with pytest.raises(ValueError): @@ -257,11 +257,11 @@ def test_obs_blueprint(): assert 'Chunk.energy.transition' not in ob._plan ob.set('Chunk.energy.transition', 'Name') with pytest.raises(AttributeError): - ob.add_fits_attribute('Chunk.energy.transition', 'BP') + ob.add_attribute('Chunk.energy.transition', 'BP') # call set_fits_attribute with argument other than list with pytest.raises(AttributeError): - ob.add_fits_attribute('Chunk.energy.transition', 33) + ob.add_attribute('Chunk.energy.transition', 33) # delete element from a non-existent extension with pytest.raises(ValueError): @@ -270,14 +270,14 @@ def test_obs_blueprint(): # adding the same thing twice does nothing - the test values are defaults result = ob._get('Observation.metaRelease') initial_result_length = (len(result[0])) - ob.add_fits_attribute('Observation.metaRelease', 'DATE-OBS') + ob.add_attribute('Observation.metaRelease', 'DATE-OBS') result = ob._get('Observation.metaRelease') add_result_length = (len(result[0])) assert initial_result_length == add_result_length # in an extension result = ob._get('Chunk.energy.specsys', extension=1) initial_result_length = (len(result[0])) - ob.add_fits_attribute('Chunk.energy.specsys', 'SPECSYS') + ob.add_attribute('Chunk.energy.specsys', 'SPECSYS') result = ob._get('Chunk.energy.specsys', extension=1) add_result_length = (len(result[0])) assert initial_result_length == add_result_length, result @@ -293,8 +293,8 @@ def test_load_from_file_configure(): assert not ob._obs_axis_configed, 'obs config' assert not ob._polarization_axis_configed, 'pol config' assert not ob._time_axis_configed, 'time config' - ob.add_fits_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE1') - ob.add_fits_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE2') + ob.add_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE1') + ob.add_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE2') ob.set('Chunk.energy.axis.axis.ctype', 'WAVE') ob._guess_axis_info_from_plan() assert ob._pos_axes_configed, 'Failure to call configure_position_axes' @@ -303,8 +303,8 @@ def test_load_from_file_configure(): ob._wcs_std['Chunk.energy.axis.axis.ctype'] ob = ObsBlueprint() - ob.add_fits_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE3') - ob.add_fits_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE4') + ob.add_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE3') + ob.add_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE4') ob.set('Chunk.energy.axis.axis.ctype', 'WAVE') ob._guess_axis_info_from_plan() assert ob._pos_axes_configed, 'Failure to call configure_position_axes' @@ -358,13 +358,13 @@ def test_load_from_file_configure(): # for the truly creative instrument scientist ob = ObsBlueprint() - ob.add_fits_attribute('Chunk.polarization.axis.axis.ctype', 'CTYPE1') - ob.add_fits_attribute('Chunk.custom.axis.axis.ctype', 'CTYPE2') - ob.add_fits_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE3') - ob.add_fits_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE4') - ob.add_fits_attribute('Chunk.time.axis.axis.ctype', 'CTYPE5') - ob.add_fits_attribute('Chunk.energy.axis.axis.ctype', 'CTYPE6') - ob.add_fits_attribute('Chunk.observable.axis.axis.ctype', 'CTYPE7') + ob.add_attribute('Chunk.polarization.axis.axis.ctype', 'CTYPE1') + ob.add_attribute('Chunk.custom.axis.axis.ctype', 'CTYPE2') + ob.add_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE3') + ob.add_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE4') + ob.add_attribute('Chunk.time.axis.axis.ctype', 'CTYPE5') + ob.add_attribute('Chunk.energy.axis.axis.ctype', 'CTYPE6') + ob.add_attribute('Chunk.observable.axis.axis.ctype', 'CTYPE7') ob._guess_axis_info_from_plan() assert ob._wcs_std['Chunk.polarization.axis.axis.ctype'] == 'CTYPE1', \ ob._wcs_std['Chunk.polarization.axis.axis.ctype'] @@ -383,7 +383,7 @@ def test_load_from_file_configure(): with pytest.raises(ValueError): ob = ObsBlueprint() - ob.add_fits_attribute('Chunk.polarization.axis.axis.ctype', 'CTYPE1') + ob.add_attribute('Chunk.polarization.axis.axis.ctype', 'CTYPE1') ob._guess_axis_info_from_plan() diff --git a/doc/user/script_description.md b/doc/user/script_description.md index 1b027220..e5c81fcd 100644 --- a/doc/user/script_description.md +++ b/doc/user/script_description.md @@ -2,13 +2,16 @@ Ensure the pre-conditions described [here](../README.md). -The method `caom2utils.fits2caom2.augment` uses the concept of a blueprint to capture the description of a CAOM2 Observation as a +The method `caom2utils.caom2blueprint.augment` uses the concept of a blueprint to capture the description of a CAOM2 Observation as a mapping of a Telescope Data Model (TDM) to the CAOM2 data model. This describes how to extend that application to customize the mapping for a `COLLECTION`. `augment` works by creating or augmenting a CAOM2 Observation record, which can then be loaded via the CADC service. -`augment` creates the Observation record using information contained in a FITS file. The python module `fits2caom2`, from the python package `caom2utils`, -examines the FITS file and uses a blueprint, embodied in an instance of the ObsBlueprint class, to define default values, override values, and mappings to augment the FITS header. The keywords and values in the augmented FITS header are then used to fill in corresponding CAOM2 entities and attributes. +`augment` creates the Observation record using information contained in a FITS file. The python module `caom2blueprint`, from the python package `caom2utils`, +examines the FITS or HDF5 file and uses a blueprint, embodied in an instance of the ObsBlueprint class, to define default values, override values, and mappings to augment the file +metadata content. The augmented keywords and values are then +checked for consistency with `astropy`'s [WCS package](https://docs.astr. +The validated metadata is then used to fill in corresponding CAOM2 entities and attributes. There are two alternate ways to provide input file metadata to the caom2gen application: * have the file located on disk, and use the --local parameter @@ -17,26 +20,27 @@ There are two alternate ways to provide input file metadata to the caom2gen appl ## Observation Blueprints The blueprint is one way to capture the mapping of the TDM to the CAOM2 data model. The blueprint can identify: -* what information to obtain from the FITS header, -* defaults in case the FITS header is incomplete, -* hard-coded value when the FITS header should be ignored, or doesn't have information, and -* python functions which will be loaded and executed at run-time to augment FITS keyword values. See [this section](https://github.com/opencadc/caom2tools/blob/master/doc/user/script_description.md#putting-it-all-together) for an example. +* what information to obtain from the FITS or HDF5 files, +* defaults in case the metadata is incomplete, +* hard-coded value when the metadata should be ignored, or doesn't have information, and +* python functions which will be loaded and executed at run-time to augment file metadata content. See [this section](https://github.com/opencadc/caom2tools/blob/master/doc/user/script_description.md#putting-it-all-together) for an example. The blueprint is a set of key-value pairs, where the values have three possible representations. -The three representations are: defaults, overrides, and FITS keyword mappings. +The three representations are: defaults, overrides, and keyword mappings. -There is a sample blueprint in [this file](https://github.com/opencadc-metadata-curation/collection2caom2/blob/master/test_obs.blueprint). +There is a sample FITS blueprint in [this file](https://github.com/opencadc/collection2caom2/blob/master/test_obs.blueprint). +There is a sample HDF5 blueprint in [this file](https://github.com/opencadc/caom2tools/blob/master/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.blueprint). The keys are the long-form names for the CAOM2 model elements and attributes. The complete set of valid keys can be found by executing the following: - pydoc caom2utils.fits2caom2.ObsBlueprint + pydoc caom2utils.caom2blueprint.ObsBlueprint ### Changing What a Blueprint Looks Like, By Extension A blueprint may be provided by one of two ways: as a file on disk, or programmatically. -#### File Blueprint Usage +#### FITS File Blueprint Usage Observation.observationID = ['OBSID'], default = TEST_OBS Plane.dataRelease = 2017-08-31T00:00:00 @@ -46,15 +50,15 @@ A blueprint may be provided by one of two ways: as a file on disk, or programmat * Plane.dataRelease provides an override value, which is always used. * Chunk.position.coordsys provides a list of FITS keywords to try. If the first value is not in the FITS header, the second one is queried. If neither of them exist, there will be no value for Chunk.position.coordsys in the CAOM2 observation. -#### Programmatic Blueprint Usage +#### Programmatic FITS Blueprint Usage -An example of this implementation is in (https://github.com/opencadc-metadata-curation/vlass2caom2) +An example of this implementation is in (https://github.com/opencadc/vlass2caom2) bp = ObsBlueprint(position_axes=(1,2), time_axis=3, energy_axis=4, polarization_axis=5, observable_axis=6) bp.set_default('Observation.observationID', 'TEST_OBS') bp.set('Plane.dataRelease', '2017-08-31T00:00:00') - bp.add_fits_attribute('Chunk.position.coordsys', 'RADECSYS') - bp.add_fits_attribute('Chunk.position.coordsys', 'RADESYS') + bp.add_attribute('Chunk.position.coordsys', 'RADECSYS') + bp.add_attribute('Chunk.position.coordsys', 'RADESYS') * Observation.observationID provides a default value of `TEST_OBS`, which is used if the `OBSID` keyword does not exist in the FITS file. * Plane.dataRelease provides an override value, which is always used when setting the plane-level data release date in the CAOM2 instance. @@ -70,6 +74,32 @@ To make WCS content available in the blueprint, instead of setting the indices i bp.configure_observable_axis(6) bp.configure_custom_axis(7) +#### HDF5 File Blueprint Usage + + Observation.observationID = ['//header/obs_id'], default = TEST_OBS + Plane.dataRelease = 2017-08-31T00:00:00 + Chunk.position.coordsys = ['//header/radecsys,//header/radesys'] + +* Observation.observationID provides a default value of `TEST_OBS`, which is used if the `/header/obs_id` attribute does not exist in the HDF5 file. +* Plane.dataRelease provides an override value, which is always used. +* Chunk.position.coordsys provides a list of attributes to try. If the first value is not in the metadata, the second one is queried. If neither of them exist, there will be no value for Chunk.position.coordsys in the CAOM2 observation. + +#### Programmatic HDF5 Blueprint Usage + +An example of this implementation is in (https://github.com/opencadc/taosii2caom2) + + bp = Hdf5ObsBlueprint(position_axes=(1,2), time_axis=3, energy_axis=4, polarization_axis=5, observable_axis=6) + bp.set('Observation.observationID', '//header/obs_id') + bp.set_default('Observation.observationID', 'TEST_OBS') + bp.set('Plane.dataRelease', '2017-08-31T00:00:00') + bp.add_attribute('Chunk.position.coordsys', '//header/radecsys') + bp.add_attribute('Chunk.position.coordsys', '//header/radesys') + +* Observation.observationID provides a default value of `TEST_OBS`, which is used if the `OBSID` keyword does not exist in the FITS file. +* Plane.dataRelease provides an override value, which is always used when setting the plane-level data release date in the CAOM2 instance. +* Chunk.position.coordsys provides a list of FITS keywords to try. The last keyword listed will be tried first, and the first keyword found will be used to set the value. + + ## Putting It All Together The following script is an end-to-end example of describing and loading a CAOM2 Observation to the CADC service, given a FITS file and programatically constructing a blueprint. @@ -123,7 +153,7 @@ The following script is an end-to-end example of describing and loading a CAOM2 bp.set('Plane.dataProductType', DataProductType.IMAGE) # add the FITS keyword 'RADECSYS' to the list of FITS keywords # checked for a value - bp.add_fits_attribute('Chunk.position.coordsys', 'RADECSYS') + bp.add_attribute('Chunk.position.coordsys', 'RADECSYS') # execute a function to set a value - parameter may be either # 'header' or 'uri' bp.set('Plane.metaRelease', 'get_meta_release(header)') From 4d62505d43687b2f29f26fdb1d2722f8d7ee5beb Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 23 Mar 2022 16:59:31 -0700 Subject: [PATCH 10/38] CADC-10809 - address code review comments. --- caom2utils/caom2utils/caom2blueprint.py | 63 ++++++++++++++----- .../taos_h5file/20220201T200117/taosii.py | 27 +++----- doc/user/script_description.md | 4 +- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 1b11b40e..f8600bdc 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -79,20 +79,19 @@ from caom2.caom_util import int_32 from caom2 import ( Artifact, Part, Chunk, Plane, Observation, CoordError, - RefCoord, CoordRange1D, CoordRange2D, Coord2D, + SpectralWCS, CoordAxis1D, Axis, CoordFunction1D, RefCoord, + SpatialWCS, Dimension2D, Coord2D, CoordFunction2D, + CoordAxis2D, CoordRange1D, PolarizationWCS, TemporalWCS, + ObservationReader, ObservationWriter, Algorithm, + ReleaseType, ProductType, ObservationIntentType, + DataProductType, Telescope, Environment, + Instrument, Proposal, Target, Provenance, Metrics, + CalibrationLevel, Requirements, DataQuality, PlaneURI, + SimpleObservation, DerivedObservation, ChecksumURI, + ObservationURI, ObservableAxis, Slice, Point, TargetPosition, + CoordRange2D, TypedSet, CustomWCS, Observable, + CompositeObservation, EnergyTransition ) -from caom2 import SpectralWCS, CoordAxis1D, Axis, CoordFunction1D, RefCoord -from caom2 import SpatialWCS, Dimension2D, Coord2D, CoordFunction2D -from caom2 import CoordAxis2D, CoordRange1D, PolarizationWCS, TemporalWCS -from caom2 import ObservationReader, ObservationWriter, Algorithm -from caom2 import ReleaseType, ProductType, ObservationIntentType -from caom2 import DataProductType, Telescope, Environment -from caom2 import Instrument, Proposal, Target, Provenance, Metrics -from caom2 import CalibrationLevel, Requirements, DataQuality, PlaneURI -from caom2 import SimpleObservation, DerivedObservation, ChecksumURI -from caom2 import ObservationURI, ObservableAxis, Slice, Point, TargetPosition -from caom2 import CoordRange2D, TypedSet, CustomWCS, Observable -from caom2 import CompositeObservation, EnergyTransition from caom2utils import data_util from caom2utils.caomvalidator import validate from caom2utils.wcsvalidator import InvalidWCSError @@ -113,8 +112,8 @@ APP_NAME = 'caom2gen' -__all__ = ['Caom2Exception', 'FitsParser', 'WcsParser', 'DispatchingFormatter', - 'ObsBlueprint', 'get_arg_parser', 'proc', +__all__ = ['Caom2Exception', 'ContentParser', 'FitsParser', 'FitsWcsParser', + 'DispatchingFormatter', 'ObsBlueprint', 'get_arg_parser', 'proc', 'POLARIZATION_CTYPES', 'gen_proc', 'get_gen_proc_arg_parser', 'BlueprintParser', 'augment', 'get_vos_headers', 'get_external_headers', 'HDF5Parser', 'Hdf5ObsBlueprint', @@ -1455,8 +1454,14 @@ def is_table(value): @staticmethod def is_function(value): + """ + Check if a blueprint value has Python 'function' syntax. Exclude + strings with syntax that enable addressing HDF5 arrays. + + :return: True if the value is the name of a function to be executed, + False, otherwise + """ return (not ObsBlueprint.needs_lookup(value) and isinstance(value, str) - # and isinstance(value, str) and '()' in value) and isinstance(value, str) and '(' in value and ')' in value and '/' not in value) @@ -1963,6 +1968,7 @@ def augment_artifact(self, artifact, index): """ Augments a given CAOM2 artifact with available information :param artifact: existing CAOM2 artifact to be augmented + :param index: int Part name, used in specializing classes """ self.logger.debug(f'Begin CAOM2 artifact augmentation for {self.uri}.') if artifact is None or not isinstance(artifact, Artifact): @@ -2198,6 +2204,7 @@ def augment_artifact(self, artifact, index): """ Augments a given CAOM2 artifact with available content information :param artifact: existing CAOM2 artifact to be augmented + :param index: int Part name """ super().augment_artifact(artifact, index) @@ -3607,10 +3614,34 @@ def _has_data_array(header): class HDF5Parser(ContentParser): + """ + Parses an HDF5 file and extracts the CAOM2 related information which + can be used to augment an existing CAOM2 observation, plane, or artifact. + + If there is per-Chunk metadata in the file, the constructor parameter + 'find_roots_here' is the location where the N Chunk metadata starts. + + The WCS-related keywords of the HDF5 files are used to create instances of + astropy.wcs.WCS so that verify might be called. + + There is no CADC support for the equivalent of the FITS --fhead parameter + for HDF5 files, which is why the name of the file on a local disk is + always required. + + """ def __init__( self, obs_blueprint, uri, local_f_name, find_roots_here='/sitedata' ): + """ + + :param obs_blueprint: Hdf5ObsBlueprint instance + :param uri: which artifact augmentation is basedd on + :param local_f_name: str file name on disk + :param find_roots_here: str location where Chunk metadata starts + """ + # h5py is an extra in this package since most collections do not + # require it import h5py self._file = h5py.File(local_f_name, 'r') super().__init__(obs_blueprint, uri) diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py index dd394a80..f39352bd 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.py @@ -23,25 +23,16 @@ def get_target_position_cval2(base): def _get_target_position(base): - import logging b = base.get('base') - try: - ra = b['header']['object']['obj_ra'] - dec = b['header']['object']['obj_dec'] - # logging.error(f'{ra} {dec}') - result = SkyCoord( - ra.decode('utf-8'), - dec.decode('utf-8'), - frame='icrs', - unit=(units.hourangle, units.deg), - ) - return result.ra.degree, result.dec.degree - except Exception as e: - import logging - import traceback - logging.error(e) - logging.error(traceback.format_exc()) - raise e + ra = b['header']['object']['obj_ra'] + dec = b['header']['object']['obj_dec'] + result = SkyCoord( + ra.decode('utf-8'), + dec.decode('utf-8'), + frame='icrs', + unit=(units.hourangle, units.deg), + ) + return result.ra.degree, result.dec.degree def get_time_axis_range_end(base): diff --git a/doc/user/script_description.md b/doc/user/script_description.md index e5c81fcd..f4c8aff1 100644 --- a/doc/user/script_description.md +++ b/doc/user/script_description.md @@ -10,8 +10,8 @@ mapping of a Telescope Data Model (TDM) to the CAOM2 data model. This describes `augment` creates the Observation record using information contained in a FITS file. The python module `caom2blueprint`, from the python package `caom2utils`, examines the FITS or HDF5 file and uses a blueprint, embodied in an instance of the ObsBlueprint class, to define default values, override values, and mappings to augment the file metadata content. The augmented keywords and values are then -checked for consistency with `astropy`'s [WCS package](https://docs.astr. -The validated metadata is then used to fill in corresponding CAOM2 entities and attributes. +used to construct an `astropy.wcs.WCS` instance. See the [astropy docs](https://docs.astropy.org/en/stable/wcs/index.html). +This has the benefit of resulting in an internally consistent WCS, which is then used to fill in corresponding CAOM2 entities and attributes. There are two alternate ways to provide input file metadata to the caom2gen application: * have the file located on disk, and use the --local parameter From 0d37431113935a510c1aff5b6ccd9c424137418d Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Fri, 25 Mar 2022 16:55:29 -0700 Subject: [PATCH 11/38] CADC-10809 - interim commit with caching of HDF5 lookup results in the Hdf5Blueprint implemented, and all unit tests passing. --- caom2utils/caom2utils/caom2blueprint.py | 857 +++++++++++------- .../caom2utils/tests/data/cfhtsg/mp9801/y.xml | 3 - .../caom2utils/tests/test_fits2caom2.py | 19 - 3 files changed, 541 insertions(+), 338 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index f8600bdc..36a60c90 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -253,8 +253,8 @@ class ObsBlueprint: ob = ObsBlueprint(position_axis=(1, 2), energy_axis=3, polarization_axis=4, time_axis=5)) ob.set('Observation.algorithm.name', 'exposure') - ob.set_fits_attribute('Chunk.energy.axis.axis.ctype', ['MYCTYPE'], - extension=1) + ob.add_attribute('Chunk.energy.axis.axis.ctype', ['MYCTYPE'], + extension=1) ob.add_attribute('Chunk.energy.axis.axis.ctype', 'MYCTYPE2', extension=1) ob.set('Chunk.energy.velang', 33, extension=1) @@ -962,12 +962,13 @@ def _guess_axis_info_from_plan(self): axis_info) else: value = self._plan[ii] + if value is None: + continue if (value.startswith('CTYPE')) and value[-1].isdigit(): value = value.split('-')[0] self._guess_axis_info_from_ctypes(ii, int(value[-1]), axis_info) - configured_index = 0 for ii in self._plan: if ii.startswith('Chunk.position') and ii.endswith('axis1.ctype') \ and not axis_info['ra'][1]: @@ -1142,6 +1143,8 @@ def load_from_file(self, file_name): cleaned_up_value = (temp_list_2, None) else: cleaned_up_value = value.strip('\n').strip() + if cleaned_up_value == 'None': + cleaned_up_value = None self.set(key.strip(), cleaned_up_value) self._guess_axis_info_from_plan() @@ -1510,6 +1513,39 @@ def update(self, value): class Hdf5ObsBlueprint(ObsBlueprint): + """ + Class that specializes the CAOM2 Observation construction based on HDF5 + file content. + + The blueprint designates the source of each of these attributes as either + HDF5 Dataset or Group values. Specific or default values may also be + indicated in the same fashion os for an ObsBlueprint. The blueprint can + be checked by simply displaying it. + + HDF5-specific example: + # create a blueprint and customize it + ob = Hdf5ObsBlueprint(position_axes=(1, 2) + + # lookup value starting with // means rooted at base of the hdf5 file + ob.add_attribute('Observation.target.name', '//header/object/obj_id') + + # lookup value starting with / means rooted at the base of the + # "find_roots_here" parameter for HDF5Parser + # + # (integer) means return only the value with the index of "integer" + # from a list + ob.add_attribute( + 'Chunk.position.axis.function.refCoord.coord1.pix', + '/header/wcs/crpix(0)') + + # (integer:integer) means return only the value with the index of + # "integer" from a list, followed by "integer" from the list in the + # list + ob.add_attribute( + 'Chunk.position.axis.function.cd11', '/header/wcs/cd(0:0)') + print(ob) + + """ def __init__(self, position_axes=None, energy_axis=None, polarization_axis=None, time_axis=None, obs_axis=None, custom_axis=None, module=None, @@ -1525,6 +1561,8 @@ def __init__(self, position_axes=None, energy_axis=None, update, instantiated_class, ) + # TODO - remove the defaults that have a value of None as they + # have no purpose tmp = {'Observation.metaRelease': ([], None), 'Observation.instrument.name': ([], None), 'Observation.type': ([], None), @@ -1560,19 +1598,12 @@ def __init__(self, position_axes=None, energy_axis=None, for key in tmp: self.set(key, tmp[key]) - # rules so far: - # - lookup value starting with // means rooted at base of the hdf5 file - # - lookup value starting with / means rooted at the base of the - # "find_roots_here" parameter for HDF5Parser - # - (integer) means return only the value with the index of "integer" - # from a list - # - (integer:integer) means return only the value with the index of - # "integer" from a list, followed by "integer" from the list in the - # list - # there are no sensible/known HDF5 defaults, so just try to make sure # the blueprint executes with a lot of None values + # keep the attribute _wcs_std, so that the list of WCS keywords used + # as input is known + def configure_custom_axis(self, axis, override=True): """ Set the expected custom keywords by index in the blueprint @@ -1633,9 +1664,9 @@ def configure_position_axes(self, axes, override=True): self.set('Chunk.position.axis.function.cd21', ([], None)) self.set('Chunk.position.axis.function.cd22', ([], None)) self.set('Chunk.position.axis.function.dimension.naxis1', - ([], None)) + ([], 1)) self.set('Chunk.position.axis.function.dimension.naxis2', - ([], None)) + ([], 1)) self.set('Chunk.position.axis.function.refCoord.coord1.pix', ([], None)) self.set('Chunk.position.axis.function.refCoord.coord1.val', @@ -1836,6 +1867,85 @@ def configure_time_axis(self, axis, override=True): self._time_axis_configed = True + def _guess_axis_info_from_plan(self): + """Look for info regarding axis types in the blueprint wcs_std. + Configure the blueprint according to the guesses. + """ + # a data structure to carry around twelve bits of data at a time: + # the first item in the set is the ctype index, and the second is + # whether or not the index means anything, resulting in a + # call to the blueprint configure_* methods if it's True. + axis_info = { + 'custom': (0, False), + 'dec': (0, False), + 'energy': (0, False), + 'obs': (0, False), + 'polarization': (0, False), + 'ra': (0, False), + 'time': (0, False)} + + for ii in self._plan: + if ii.startswith('Chunk.position') and ii.endswith('axis1.ctype') \ + and not axis_info['ra'][1]: + configured_index = self._get_configured_index(axis_info, 'ra') + axis_info['ra'] = (configured_index, True) + elif ii.startswith('Chunk.position') and \ + ii.endswith('axis2.ctype') and not axis_info['dec'][1]: + configured_index = self._get_configured_index(axis_info, + 'dec') + axis_info['dec'] = (configured_index, True) + elif ii.startswith('Chunk.energy') and not axis_info['energy'][1]: + configured_index = self._get_configured_index(axis_info, + 'energy') + axis_info['energy'] = (configured_index, True) + elif ii.startswith('Chunk.time') and not axis_info['time'][1]: + configured_index = self._get_configured_index(axis_info, + 'time') + axis_info['time'] = (configured_index, True) + elif ii.startswith('Chunk.polarization') \ + and not axis_info['polarization'][1]: + configured_index = self._get_configured_index(axis_info, + 'polarization') + axis_info['polarization'] = (configured_index, True) + elif ii.startswith('Chunk.observable') and not axis_info['obs'][1]: + configured_index = self._get_configured_index(axis_info, + 'obs') + axis_info['obs'] = (configured_index, True) + elif ii.startswith('Chunk.custom') and not axis_info['custom'][1]: + configured_index = self._get_configured_index(axis_info, + 'custom') + axis_info['custom'] = (configured_index, True) + + if axis_info['ra'][1] and axis_info['dec'][1]: + self.configure_position_axes( + (axis_info['ra'][0], axis_info['dec'][0]), False) + elif axis_info['ra'][1] or axis_info['dec'][1]: + raise ValueError('Only one positional axis found ' + '(ra/dec): {}/{}'. + format(axis_info['ra'][0], axis_info['dec'][0])) + else: + # assume that positional axis are 1 and 2 by default + if (axis_info['time'][0] in [1, 2] or + axis_info['energy'][0] in [1, 2] or + axis_info['polarization'][0] in [1, 2] or + axis_info['obs'][0] in [1, 2] or + axis_info['custom'][0] in [1, 2]): + raise ValueError('Cannot determine the positional axis') + else: + self.configure_position_axes((1, 2), False) + + if axis_info['time'][1]: + self.configure_time_axis(axis_info['time'][0], False) + if axis_info['energy'][1]: + self.configure_energy_axis(axis_info['energy'][0], False) + if axis_info['polarization'][1]: + self.configure_polarization_axis(axis_info['polarization'][0], + False) + if axis_info['obs'][1]: + self.configure_observable_axis(axis_info['obs'][0], False) + if axis_info['custom'][1]: + self.configure_custom_axis(axis_info['custom'][0], False) + class BlueprintParser: """ @@ -2352,9 +2462,6 @@ def augment_plane(self, plane, artifact_uri): self.logger.debug( f'End content plane augmentation for {artifact_uri}.') - def _content_lookup(self, key, extension=None): - raise NotImplementedError - def _get_algorithm(self, obs): """ Create an Algorithm instance populated with available content @@ -2450,7 +2557,7 @@ def _get_from_list(self, lookup, index, current=None): if ObsBlueprint.needs_lookup(keys): for ii in keys[0]: try: - value = self._content_lookup(ii, index) + value = self.headers[index].get(ii) if value: self.logger.debug( f'{lookup}: assigned value {value} based on ' @@ -2815,8 +2922,7 @@ def _get_set_from_list(self, lookup, index): if isinstance(keywords, tuple): for ii in keywords[0]: try: - # value = self.headers[index].get(ii) - value = self._content_lookup(ii, index) + value = self.headers[index].get(ii) break except KeyError: self.add_error(lookup, sys.exc_info()[1]) @@ -3524,9 +3630,6 @@ def augment_artifact(self, artifact, index=0): self.logger.debug( f'End artifact augmentation for {artifact.uri}.') - def _content_lookup(self, key, extension=None): - return self.headers[extension].get(key) - def _get_chunk_naxis(self, chunk, index=None): # NOTE: astropy.wcs does not distinguished between WCS axes and # data array axes. naxis in astropy.wcs represents in fact the @@ -3613,6 +3716,12 @@ def _has_data_array(header): return True +# h5py is an extra in this package since most collections do not +# require it +import h5py +from collections import defaultdict + + class HDF5Parser(ContentParser): """ Parses an HDF5 file and extracts the CAOM2 related information which @@ -3631,7 +3740,7 @@ class HDF5Parser(ContentParser): """ def __init__( - self, obs_blueprint, uri, local_f_name, find_roots_here='/sitedata' + self, obs_blueprint, uri, local_f_name, find_roots_here='sitedata' ): """ @@ -3640,60 +3749,176 @@ def __init__( :param local_f_name: str file name on disk :param find_roots_here: str location where Chunk metadata starts """ - # h5py is an extra in this package since most collections do not - # require it - import h5py - self._file = h5py.File(local_f_name, 'r') + self._file = h5py.File(local_f_name) + self._find_roots_here = find_roots_here + self._extension_names = [] super().__init__(obs_blueprint, uri) self._wcs_parser = None - self._roots = [] - self.apply_blueprint() - self._set_roots(find_roots_here, self._file) - def _set_roots(self, root_name, root): - bits = root_name.split('/') - if len(bits) == 2: - for key in root[root_name].keys(): - self._roots.append(root[root_name][key]) - else: - x = f'/{"/".join(ii for ii in bits[2:])}' - self._set_roots(x, root) + def apply_blueprint_from_file(self): + self.logger.debug('Begin apply_blueprint_from_file') + individual, multi = self._xxx() + # for every key in the HDF5 file, is the key referenced in the + # blueprint? if yes, capture the value referenced by the key back + # to the blueprint + + # for each Part, for every key in the HDF5 file, is the file key + # referenced in the blueprint Part? If yes, capture the value + # referenced by the key back to the blueprint for the Part + + def y(name, object): + if name == self._find_roots_here: + # print(f'{name} {type(object)} {dir(object)}') + for ii, key in enumerate(object.keys()): + temp = f'{name}/{key}' + self.logger.debug(f'Adding extension {temp}') + self._extension_names.append(temp) + self._blueprint._extensions[ii] = {} + + for part_index, part_name in enumerate(self._extension_names): + if ( + name.startswith(part_name) + and isinstance(object, h5py.Dataset) + and object.dtype.names is not None + ): + for d_name in object.dtype.names: + temp = f'{name.replace(part_name, "")}/{d_name}' + # print(temp) + for key in multi.keys(): + if key == temp: + for jj in multi.get(key): + self._blueprint.set( + jj, object[d_name], part_index + ) + elif key.startswith(temp) and '(' in key: + # print(f'temp {temp} key {key}') + z = key.split('(') + if ':' in z[1]: + a = z[1].split(')')[0].split(':') + if len(a) > 2: + raise NotImplementedError + for jj in multi.get(key): + self._blueprint.set( + jj, + object[d_name][int(a[0])][int(a[1])], + part_index, + ) + else: + index = int(z[1].split(')')[0]) + for jj in multi.get(key): + self._blueprint.set( + jj, + object[d_name][index], + part_index, + ) + + if isinstance(object, h5py.Dataset): + if object.dtype.names is not None: + for d_name in object.dtype.names: + temp = f'//{name}/{d_name}' + if temp in individual.keys(): + for jj in individual.get(temp): + self._blueprint.set(jj, object[d_name], 0) + + self._file.visititems(y) + self.logger.debug('Done apply_blueprint_from_file') + + def _xxx(self): + individual = defaultdict(list) + multi = defaultdict(list) + for key, value in self._blueprint._plan.items(): + if ObsBlueprint.needs_lookup(value): + for ii in value[0]: + if ii.startswith('//'): + individual[ii].append(key) + else: + multi[ii].append(key) + return individual, multi - def augment_artifact(self, artifact, index=0): - for i, root in enumerate(self._roots): - self._wcs_parser = Hdf5WcsParser(root, self.blueprint, self._file) - super().augment_artifact(artifact, i) + def apply_blueprint(self): + """ + Different implementation than BlueprintParser, because of the + extensions. + """ + self.logger.debug('Begin apply_blueprint') + self.apply_blueprint_from_file() - def _content_lookup(self, key, extension=None): - if isinstance(extension, int): - extension = self._roots[extension] + # after the apply_blueprint_from_file call, all the metadata from the + # file has been applied to the blueprint, so now do the bits that + # require no file access - if isinstance(key, list): - # TODO - document why this is the case - return None - if key.startswith('//'): - key = key.replace('//', '/') - extension = self._file - bits = key.split('/') - if len(bits) == 2: - if '(' in bits[1]: - x = bits[1].split('(') - if ':' in x[1]: - a = x[1].split(')')[0].split(':') - if len(a) > 2: - raise NotImplementedError - y = extension[x[0]][int(a[0])][int(a[1])] - return y - else: - index = int(x[1].split(')')[0]) - return extension[x[0]][index] - else: - return extension[bits[1]] - else: - # the 2 is because there's always a leading slash, so the - # first bit is an empty string - temp = f'/{"/".join(ii for ii in bits[2:])}' - return self._content_lookup(temp, extension[bits[1]]) + # pointers that are short to type + exts = self._blueprint._extensions + plan = self._blueprint._plan + + # apply the functions + if (self._blueprint._module is not None or + self._blueprint._module_instance is not None): + for key, value in plan.items(): + if ObsBlueprint.is_function(value): + if self._blueprint._module_instance is None: + plan[key] = self._execute_external(value, key, 0) + else: + plan[key] = self._execute_external_instance( + value, key, 0) + for extension in exts: + for key, value in exts[extension].items(): + if ObsBlueprint.is_function(value): + if self._blueprint._module_instance is None: + exts[extension][key] = self._execute_external( + value, key, extension) + else: + exts[extension][key] = \ + self._execute_external_instance( + value, key, extension) + + # blueprint already contains all the overrides, only need to make + # sure the overrides get applied to all the extensions + for extension in exts: + for key, value in exts[extension].items(): + if ( + ObsBlueprint.is_table(value) + # already been looked up + or ObsBlueprint.needs_lookup(value) + # already been executed + or ObsBlueprint.is_function(value) + # nothing to assign + or ObsBlueprint.has_no_value(value) + ): + continue + exts[extension][key] = value + self.logger.debug( + f'{key}: set to {value} in extension {extension}') + + # if no values have been set by file lookups, function execution, + # or applying overrides, apply defaults, including to all extensions + for key, value in plan.items(): + if ObsBlueprint.needs_lookup(value) and value[1]: + # there is a default value in the blueprint that can be used + for extension in exts: + q = exts[extension].get(key) + if q is None: + exts[extension][key] = value[1] + self.logger.debug( + f'Add {key} and assign default value of {value[1]} ' + f'in extension {extension}.') + elif ObsBlueprint.needs_lookup(value): + exts[extension][key] = value[1] + self.logger.debug( + f'{key}: set value to default of {value[1]} in ' + f'extension {extension}.') + plan[key] = value[1] + self.logger.debug(f'{key}: set value to default of {value[1]}') + + self.logger.debug('Done apply_blueprint') + return + + def augment_artifact(self, artifact, index=0): + self._wcs_parser = Hdf5WcsParser(self._blueprint, 0) + super().augment_artifact(artifact, 0) + for ii in range(1, len(self._blueprint._extensions)): + self._wcs_parser = Hdf5WcsParser(self._blueprint, ii) + super().augment_artifact(artifact, ii) def _get_chunk_naxis(self, chunk, index): chunk.naxis = self._get_from_list('Chunk.naxis', index, chunk.naxis) @@ -3702,6 +3927,72 @@ def ignore_chunks(self, artifact, index=0): artifact.parts.add(Part(str(index))) return False + # def _get_from_list(self, lookup, index, current=None): + # value = None + # try: + # keys = self.blueprint._get(lookup) + # except KeyError: + # self.add_error(lookup, sys.exc_info()[1]) + # self.logger.debug( + # f'Could not find {lookup!r} in caom2blueprint configuration.') + # if current: + # self.logger.debug( + # f'{lookup}: using current value of {current!r}.') + # value = current + # return value + # + # if ObsBlueprint.needs_lookup(keys): + # for ii in keys[0]: + # try: + # value = self.headers[index].get(ii) + # if value: + # self.logger.debug( + # f'{lookup}: assigned value {value} based on ' + # f'keyword {ii}.') + # break + # except (KeyError, IndexError): + # if keys[0].index(ii) == len(keys[0]) - 1: + # self.add_error(lookup, sys.exc_info()[1]) + # # assign a default value, if one exists + # if keys[1]: + # if current is None: + # value = keys[1] + # self.logger.debug( + # f'{lookup}: assigned default value {value}.') + # else: + # value = current + # if value is None: + # # checking current does not work in the general case, + # # because current might legitimately be 'None' + # if self._blueprint.update: + # if ( + # current is not None + # or (current is None and isinstance(value, bool)) + # ): + # value = current + # self.logger.debug( + # f'{lookup}: used current value {value}.') + # else: + # # assign a default value, if one exists + # if keys[1]: + # if current is None: + # value = keys[1] + # self.logger.debug( + # f'{lookup}: assigned default value {value}.') + # else: + # value = current + # + # if (keys is not None) and (keys != ''): + # if keys == 'None': + # value = None + # else: + # value = keys + # elif current: + # value = current + # + # self.logger.debug(f'{lookup}: value is {value}') + # return value + class WcsParser: """ @@ -4143,10 +4434,13 @@ def _get_axis_length(self, for_axis): class Hdf5WcsParser(WcsParser): + """ + This class initializes an astropy.wcs instance with metadata from an + Hdf5Parser. + """ - def __init__(self, root, blueprint, base): + def __init__(self, blueprint, extension): """ - :param root: h5py.h5p.Dataset or h5py.h5p.Group :param blueprint: ObsBlueprint """ super().__init__() @@ -4160,8 +4454,10 @@ def __init__(self, root, blueprint, base): 'observable': [0, False], 'custom': [0, False], } - self._base = base - self._set_wcs(root, blueprint) + self._blueprint = blueprint + # int - index into blueprint._plan extensions + self._extension = extension + self._set_wcs() @property def wcs(self): @@ -4198,219 +4494,172 @@ def _get_axis_length(self, for_axis): result = self._wcs.array_shape[for_axis-1] return _to_int(result) - def _set_wcs(self, root, blueprint): - self._wcs = WCS(naxis=blueprint.get_configed_axes_count()) - array_shape = [0] * blueprint.get_configed_axes_count() - count = 0 + def assign_sanitize(self, assignee, index, key, sanitize=True): + """ + Do not want to blindly assign None to astropy.wcs attributes, so + use this method for conditional assignment. - def assign_sanitize(assignee, index, key, sanitize=True): - x = self._attribute_lookup(blueprint._get(key), root) - if sanitize: - x = self._sanitize(x) - if x is not None: - assignee[index] = x + If someone wants to assign None to a value, either use 'set', and + specifically assign None, or execute a function to set it to None + conditionally. There will be no support for a Default value of None + with HDF5 files. That's a decision for today, anyway. + """ + x = self._blueprint._get(key, self._extension) + if sanitize: + x = self._sanitize(x) + if x is not None and not ObsBlueprint.needs_lookup(x): + assignee[index] = x + + def _set_wcs(self): + self._wcs = WCS(naxis=self._blueprint.get_configed_axes_count()) + array_shape = [0] * self._blueprint.get_configed_axes_count() + count = 0 - if blueprint._pos_axes_configed: + if self._blueprint._pos_axes_configed: self._axes['ra'][1] = True self._axes['dec'][1] = True self._axes['ra'][0] = count self._axes['dec'][0] = count + 1 - temp = [0] * blueprint.get_configed_axes_count() + temp = [0] * self._blueprint.get_configed_axes_count() cd = [temp.copy() - for ii in range(blueprint.get_configed_axes_count())] - assign_sanitize(self._wcs.wcs.ctype, count, - 'Chunk.position.axis.axis1.ctype') - assign_sanitize(self._wcs.wcs.ctype, count + 1, - 'Chunk.position.axis.axis2.ctype') - assign_sanitize(self._wcs.wcs.cunit, count, - 'Chunk.position.axis.axis1.cunit') - assign_sanitize(self._wcs.wcs.cunit, count + 1, - 'Chunk.position.axis.axis2.cunit') - array_shape[count] = self._attribute_lookup( - blueprint._get( - 'Chunk.position.axis.function.dimension.naxis1'), root - ) - array_shape[count + 1] = self._attribute_lookup( - blueprint._get( - 'Chunk.position.axis.function.dimension.naxis2'), root - ) - assign_sanitize(self._wcs.wcs.crpix, count, - 'Chunk.position.axis.function.refCoord.coord1.pix') - assign_sanitize(self._wcs.wcs.crpix, count + 1, - 'Chunk.position.axis.function.refCoord.coord2.pix') - assign_sanitize(self._wcs.wcs.crval, count, - 'Chunk.position.axis.function.refCoord.coord1.val') - assign_sanitize(self._wcs.wcs.crval, count + 1, - 'Chunk.position.axis.function.refCoord.coord2.val') - cd[count][0] = self._attribute_lookup( - blueprint._get('Chunk.position.axis.function.cd11'), root - ) - cd[count][1] = self._attribute_lookup( - blueprint._get('Chunk.position.axis.function.cd12'), root - ) - cd[count + 1][0] = self._attribute_lookup( - blueprint._get('Chunk.position.axis.function.cd21'), root - ) - cd[count + 1][1] = self._attribute_lookup( - blueprint._get('Chunk.position.axis.function.cd22'), root - ) - assign_sanitize(self._wcs.wcs.crder, count, - 'Chunk.position.axis.error1.rnder') - assign_sanitize(self._wcs.wcs.crder, count + 1, - 'Chunk.position.axis.error2.rnder') - assign_sanitize(self._wcs.wcs.csyer, count, - 'Chunk.position.axis.error1.syser') - assign_sanitize(self._wcs.wcs.csyer, count + 1, - 'Chunk.position.axis.error2.syser') - self._finish_position(blueprint) + for ii in range(self._blueprint.get_configed_axes_count())] + self.assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.position.axis.axis1.ctype') + self.assign_sanitize(self._wcs.wcs.ctype, count + 1, + 'Chunk.position.axis.axis2.ctype') + self.assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.position.axis.axis1.cunit') + self.assign_sanitize(self._wcs.wcs.cunit, count + 1, + 'Chunk.position.axis.axis2.cunit') + array_shape[count] = self._blueprint._get( + 'Chunk.position.axis.function.dimension.naxis1') + array_shape[count + 1] = self._blueprint._get( + 'Chunk.position.axis.function.dimension.naxis2') + self.assign_sanitize( + self._wcs.wcs.crpix, count, + 'Chunk.position.axis.function.refCoord.coord1.pix') + self.assign_sanitize( + self._wcs.wcs.crpix, count + 1, + 'Chunk.position.axis.function.refCoord.coord2.pix') + self.assign_sanitize( + self._wcs.wcs.crval, count, + 'Chunk.position.axis.function.refCoord.coord1.val') + self.assign_sanitize( + self._wcs.wcs.crval, count + 1, + 'Chunk.position.axis.function.refCoord.coord2.val') + x = self._blueprint._get('Chunk.position.axis.function.cd11', + self._extension) + if x is not None and not ObsBlueprint.needs_lookup(x): + cd[count][0] = x + x = self._blueprint._get('Chunk.position.axis.function.cd12', + self._extension) + if x is not None and not ObsBlueprint.needs_lookup(x): + cd[count][1] = x + x = self._blueprint._get('Chunk.position.axis.function.cd21', + self._extension) + if x is not None and not ObsBlueprint.needs_lookup(x): + cd[count + 1][0] = x + x = self._blueprint._get('Chunk.position.axis.function.cd22', + self._extension) + if x is not None and not ObsBlueprint.needs_lookup(x): + cd[count + 1][1] = x + self.assign_sanitize(self._wcs.wcs.crder, count, + 'Chunk.position.axis.error1.rnder') + self.assign_sanitize(self._wcs.wcs.crder, count + 1, + 'Chunk.position.axis.error2.rnder') + self.assign_sanitize(self._wcs.wcs.csyer, count, + 'Chunk.position.axis.error1.syser') + self.assign_sanitize(self._wcs.wcs.csyer, count + 1, + 'Chunk.position.axis.error2.syser') + self._finish_position() self._wcs.wcs.cd = cd count += 2 - if blueprint._time_axis_configed: + if self._blueprint._time_axis_configed: self._axes['time'][1] = True self._axes['time'][0] = count - assign_sanitize(self._wcs.wcs.ctype, count, - 'Chunk.time.axis.axis.ctype', False) - assign_sanitize(self._wcs.wcs.cunit, count, - 'Chunk.time.axis.axis.cunit', False) - array_shape[count] = self._attribute_lookup( - blueprint._get('Chunk.time.axis.function.naxis'), root - ) - assign_sanitize(self._wcs.wcs.crpix, count, - 'Chunk.time.axis.function.refCoord.pix', False) - assign_sanitize(self._wcs.wcs.crval, count, - 'Chunk.time.axis.function.refCoord.val', False) - assign_sanitize(self._wcs.wcs.crder, count, - 'Chunk.time.axis.error.rnder') - assign_sanitize(self._wcs.wcs.csyer, count, - 'Chunk.time.axis.error.syser') - self._finish_time(blueprint) + self.assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.time.axis.axis.ctype', False) + self.assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.time.axis.axis.cunit', False) + array_shape[count] = self._blueprint._get( + 'Chunk.time.axis.function.naxis', self._extension) + self.assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.time.axis.function.refCoord.pix', False) + self.assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.time.axis.function.refCoord.val', False) + self.assign_sanitize(self._wcs.wcs.crder, count, + 'Chunk.time.axis.error.rnder') + self.assign_sanitize(self._wcs.wcs.csyer, count, + 'Chunk.time.axis.error.syser') + self._finish_time() count += 1 - if blueprint._energy_axis_configed: + if self._blueprint._energy_axis_configed: self._axes['energy'][1] = True self._axes['energy'][0] = count - assign_sanitize(self._wcs.wcs.ctype, count, - 'Chunk.energy.axis.axis.ctype', False) - assign_sanitize(self._wcs.wcs.cunit, count, - 'Chunk.energy.axis.axis.cunit', False) - array_shape[count] = self._attribute_lookup( - blueprint._get('Chunk.energy.axis.function.naxis'), root - ) - assign_sanitize(self._wcs.wcs.crpix, count, - 'Chunk.energy.axis.function.refCoord.pix', False) - assign_sanitize(self._wcs.wcs.crval, count, - 'Chunk.energy.axis.function.refCoord.val', False) - assign_sanitize(self._wcs.wcs.crder, count, - 'Chunk.energy.axis.error.rnder') - assign_sanitize(self._wcs.wcs.csyer, count, - 'Chunk.energy.axis.error.syser') - self._finish_energy(blueprint) + self.assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.energy.axis.axis.ctype', False) + self.assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.energy.axis.axis.cunit', False) + array_shape[count] = self._blueprint._get( + 'Chunk.energy.axis.function.naxis', self._extension) + self.assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.energy.axis.function.refCoord.pix', False) + self.assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.energy.axis.function.refCoord.val', False) + self.assign_sanitize(self._wcs.wcs.crder, count, + 'Chunk.energy.axis.error.rnder') + self.assign_sanitize(self._wcs.wcs.csyer, count, + 'Chunk.energy.axis.error.syser') + self._finish_energy() count += 1 - if blueprint._polarization_axis_configed: + if self._blueprint._polarization_axis_configed: self._axes['polarization'][1] = True self._axes['polarization'][0] = count - assign_sanitize(self._wcs.wcs.ctype, count, - 'Chunk.polarization.axis.axis.ctype', False) - assign_sanitize(self._wcs.wcs.cunit, count, - 'Chunk.polarization.axis.axis.cunit', False) - array_shape[count] = self._attribute_lookup( - blueprint._get('Chunk.polarization.axis.function.naxis'), root - ) - assign_sanitize(self._wcs.wcs.crpix, count, - 'Chunk.polarization.axis.function.refCoord.pix', - False) - assign_sanitize(self._wcs.wcs.crval, count, - 'Chunk.polarization.axis.function.refCoord.val', - False) + self.assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.polarization.axis.axis.ctype', False) + self.assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.polarization.axis.axis.cunit', False) + array_shape[count] = self._blueprint._get( + 'Chunk.polarization.axis.function.naxis', self._extension) + self.assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.polarization.axis.function.refCoord.pix', False) + self.assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.polarization.axis.function.refCoord.val', False) count += 1 # TODO - where's the delta? - if blueprint._obs_axis_configed: + if self._blueprint._obs_axis_configed: self._axes['observable'][1] = True self._axes['observable'][0] = count - assign_sanitize(self._wcs.wcs.ctype, count, - 'Chunk.observable.axis.axis.ctype', False) - assign_sanitize(self._wcs.wcs.cunit, count, - 'Chunk.observable.axis.axis.cunit', False) + self.assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.observable.axis.axis.ctype', False) + self.assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.observable.axis.axis.cunit', False) array_shape[count] = 1.0 - assign_sanitize(self._wcs.wcs.crpix, count, - 'Chunk.observable.axis.function.refCoord.pix', - False) + self.assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.observable.axis.function.refCoord.pix', + False) self._wcs.wcs.crval[count] = 0.0 count += 1 - if blueprint._custom_axis_configed: + if self._blueprint._custom_axis_configed: self._axes['custom'][1] = True self._axes['custom'][0] = count - assign_sanitize(self._wcs.wcs.ctype, count, - 'Chunk.custom.axis.axis.ctype', False) - assign_sanitize(self._wcs.wcs.cunit, count, - 'Chunk.custom.axis.axis.cunit', False) - array_shape[count] = self._attribute_lookup( - blueprint._get('Chunk.custom.axis.function.naxis'), root - ) + self.assign_sanitize(self._wcs.wcs.ctype, count, + 'Chunk.custom.axis.axis.ctype', False) + self.assign_sanitize(self._wcs.wcs.cunit, count, + 'Chunk.custom.axis.axis.cunit', False) + array_shape[count] = self._blueprint._get( + 'Chunk.custom.axis.function.naxis', self._extension) # TODO delta - assign_sanitize(self._wcs.wcs.crpix, count, - 'Chunk.custom.axis.function.refCoord.pix', False) - assign_sanitize(self._wcs.wcs.crval, count, - 'Chunk.custom.axis.function.refCoord.val', False) + self.assign_sanitize(self._wcs.wcs.crpix, count, + 'Chunk.custom.axis.function.refCoord.pix', + False) + self.assign_sanitize(self._wcs.wcs.crval, count, + 'Chunk.custom.axis.function.refCoord.val', + False) count += 1 self._wcs.array_shape = array_shape - def _append_cd_value(self, cd, cd_value, count): - prefix = [] - suffix = [] - for ii in range(0, len(cd)): - prefix.append(0.0) - for ii in range(len(cd), count): - suffix.append(0.0) - cd.append(prefix + [cd_value] + suffix) - - def _attribute_lookup(self, key, root): - if key is None: - # the blueprint lookup returned None, which is a valid value - # assignment - return None - if isinstance(key, tuple): - result = None - for ii in key[0]: - result = self._attribute_lookup(ii, root) - if result is None and key[1] is not None: - # apply the default value - result = key[1] - return result - elif isinstance(key, str): - if key.startswith('//'): - key = key.replace('//', '/') - root = self._base - if key.startswith('/'): - bits = key.split('/') - if len(bits) == 2: - if '(' in bits[1]: - x = bits[1].split('(') - if ':' in x[1]: - a = x[1].split(')')[0].split(':') - if len(a) > 2: - raise NotImplementedError - y = root[x[0]][int(a[0])][int(a[1])] - return y - else: - index = int(x[1].split(')')[0]) - y = root[x[0]][index] - return y - else: - return root[bits[1]] - else: - # the 2 is because there's always a leading slash, so the - # first bit is an empty string - temp = f'/{"/".join(ii for ii in bits[2:])}' - return self._attribute_lookup(temp, root[bits[1]]) - else: - # a value has been set - if key == 'None': - return None - return key - else: - return key - def _finish_chunk_observable(self, chunk): ctype = self._wcs.wcs.ctype[chunk.observable_axis-1] cunit = self._wcs.wcs.ctype[chunk.observable_axis-1] @@ -4428,77 +4677,53 @@ def _finish_chunk_time(self, chunk): # to a single value # TODO chunk.time.mjdref = self._wcs.to_header().get('MJDREF') - def _finish_energy(self, blueprint): - x = self._attribute_lookup( - blueprint._get('Chunk.energy.specsys'), self._base) - if x: + def _finish_energy(self): + x = self._blueprint._get('Chunk.energy.specsys', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.specsys = x - x = self._attribute_lookup( - blueprint._get('Chunk.energy.ssysobs'), self._base) - if x: + x = self._blueprint._get('Chunk.energy.ssysobs', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.ssysobs = x - x = _to_float( - self._attribute_lookup( - blueprint._get('Chunk.energy.restfrq'), self._base) - ) - if x: - self._wcs.wcs.restfrq = x - x = self._attribute_lookup( - blueprint._get('Chunk.energy.restwav'), self._base) - if x: + x = self._blueprint._get('Chunk.energy.restfrq', self._extension) + if x and not ObsBlueprint.needs_lookup(x): + self._wcs.wcs.restfrq = _to_float(x) + x = self._blueprint._get('Chunk.energy.restwav', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.restwav = x - x = self._attribute_lookup( - blueprint._get('Chunk.energy.velosys'), self._base) - if x: + x = self._blueprint._get('Chunk.energy.velosys', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.velosys = x - x = self._attribute_lookup( - blueprint._get('Chunk.energy.zsource'), self._base) - if x: + x = self._blueprint._get('Chunk.energy.zsource', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.zsource = x - x = self._attribute_lookup( - blueprint._get('Chunk.energy.ssyssrc'), self._base) - if x: + x = self._blueprint._get('Chunk.energy.ssyssrc', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.ssyssrc = x - x = self._attribute_lookup( - blueprint._get('Chunk.energy.velang'), self._base) - if x: + x = self._blueprint._get('Chunk.energy.velang', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.velangl = x return - def _finish_position(self, blueprint): - x = self._attribute_lookup( - blueprint._get('Chunk.position.coordsys'), self._base - ) - if x: + def _finish_position(self): + x = self._blueprint._get('Chunk.position.coordsys', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.radesys = x - x = _to_float( - self._attribute_lookup( - blueprint._get('Chunk.position.equinox'), self._base - ) - ) - if x: - self._wcs.wcs.equinox = x + x = self._blueprint._get('Chunk.position.equinox', self._extension) + if x and not ObsBlueprint.needs_lookup(x): + self._wcs.wcs.equinox = _to_float(x) - def _finish_time(self, blueprint): - x = self._attribute_lookup( - blueprint._get('Chunk.time.exposure'), self._base - ) - if x: + def _finish_time(self): + x = self._blueprint._get('Chunk.time.exposure', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.xposure = x - x = self._attribute_lookup( - blueprint._get('Chunk.time.timesys'), self._base - ) - if x: + x = self._blueprint._get('Chunk.time.timesys', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.timesys = x - x = self._attribute_lookup( - blueprint._get('Chunk.time.trefpos'), self._base - ) - if x: + x = self._blueprint._get('Chunk.time.trefpos', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.trefpos = x - x = self._attribute_lookup( - blueprint._get('Chunk.time.mjdref'), self._base - ) - if x: + x = self._blueprint._get('Chunk.time.mjdref', self._extension) + if x and not ObsBlueprint.needs_lookup(x): self._wcs.wcs.mjdref = x diff --git a/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml b/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml index 0b93b2ee..b9a57bea 100644 --- a/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml +++ b/caom2utils/caom2utils/tests/data/cfhtsg/mp9801/y.xml @@ -34,9 +34,6 @@ CADC http://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/en/megapipe/ 2018-03-14T16:36:50.000 - - None - caom:CFHT/1756572/1756572p caom:CFHT/1756574/1756574p diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index 009259e2..3c6cdfef 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -1615,25 +1615,6 @@ def test_parser_construction(vos_mock, stdout_mock): os.unlink(test_out_fqn) -def test_content_lookup_hdf5(): - from caom2utils import caom2blueprint - test_key = 'Chunk.position.axis.axis1.ctype' - test_path = '/header/wcs/ctype(0)' - test_blueprint = ObsBlueprint(position_axes=(1, 2)) - test_blueprint.set(test_key, test_path) - test_f_name = 'taos2_20220201T201317Z_star04239531.h5' - test_uri = f'cadc:TAOSII/{test_f_name}' - test_local_fqn = f'{TESTDATA_DIR}/taos_h5file/20220201T200117/{test_f_name}' - test_subject = caom2blueprint.HDF5Parser( - test_blueprint, test_uri, test_local_fqn, '/sitedata' - ) - assert len(test_subject._roots) == 3, 'wrong number of roots' - test_result = test_subject._content_lookup( - test_path, test_subject._roots[0] - ) - assert test_result == b'RA---TAN-SIP' - - def _get_local_headers(file_name): return _get_headers(file_name, None) From 0c53eca2c7ae42405f52df0ce2489513cdc0cab5 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Sat, 26 Mar 2022 13:51:33 -0700 Subject: [PATCH 12/38] CADC-10809 - make some names better, move some code around so that other code can be removed, increase coverage, flake8. --- caom2utils/caom2utils/caom2blueprint.py | 784 ++++++++---------- .../caom2utils/tests/test_fits2caom2.py | 12 +- .../caom2utils/tests/test_obs_blueprint.py | 20 +- 3 files changed, 352 insertions(+), 464 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 36a60c90..d64fa667 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -103,6 +103,7 @@ import sys import tempfile import traceback +from collections import defaultdict from urllib.parse import urlparse from cadcutils import net, util from cadcdata import FileInfo @@ -116,7 +117,7 @@ 'DispatchingFormatter', 'ObsBlueprint', 'get_arg_parser', 'proc', 'POLARIZATION_CTYPES', 'gen_proc', 'get_gen_proc_arg_parser', 'BlueprintParser', 'augment', 'get_vos_headers', - 'get_external_headers', 'HDF5Parser', 'Hdf5ObsBlueprint', + 'get_external_headers', 'Hdf5Parser', 'Hdf5ObsBlueprint', 'Hdf5WcsParser', 'update_artifact_meta'] CUSTOM_CTYPES = [ @@ -589,6 +590,18 @@ def __init__(self, position_axes=None, energy_axis=None, self._module_instance = instantiated_class # if True, existing values are used instead of defaults self._update = update + # a data structure to carry around twelve bits of data at a time: + # the first item in the set is the ctype index, and the second is + # whether or not the index means anything, resulting in a + # call to the blueprint configure_* methods if it's True. + self._axis_info = { + 'custom': (0, False), + 'dec': (0, False), + 'energy': (0, False), + 'obs': (0, False), + 'polarization': (0, False), + 'ra': (0, False), + 'time': (0, False)} def configure_custom_axis(self, axis, override=True): """ @@ -936,102 +949,97 @@ def configure_time_axis(self, axis, override=True): self._time_axis_configed = True - def _guess_axis_info_from_plan(self): + def _guess_axis_info(self): """Look for info regarding axis types in the blueprint wcs_std. Configure the blueprint according to the guesses. """ - # a data structure to carry around twelve bits of data at a time: - # the first item in the set is the ctype index, and the second is - # whether or not the index means anything, resulting in a - # call to the blueprint configure_* methods if it's True. - axis_info = { - 'custom': (0, False), - 'dec': (0, False), - 'energy': (0, False), - 'obs': (0, False), - 'polarization': (0, False), - 'ra': (0, False), - 'time': (0, False)} - for ii in self._plan: if isinstance(self._plan[ii], tuple): for value in self._plan[ii][0]: if (value.startswith('CTYPE')) and value[-1].isdigit(): value = value.split('-')[0] - self._guess_axis_info_from_ctypes(ii, int(value[-1]), - axis_info) + self._guess_axis_info_from_ctypes(ii, int(value[-1])) else: value = self._plan[ii] if value is None: continue if (value.startswith('CTYPE')) and value[-1].isdigit(): value = value.split('-')[0] - self._guess_axis_info_from_ctypes(ii, int(value[-1]), - axis_info) + self._guess_axis_info_from_ctypes(ii, int(value[-1])) + self._guess_axis_info_from_plan() + + def _guess_axis_info_from_plan(self): for ii in self._plan: if ii.startswith('Chunk.position') and ii.endswith('axis1.ctype') \ - and not axis_info['ra'][1]: - configured_index = self._get_configured_index(axis_info, 'ra') - axis_info['ra'] = (configured_index, True) + and not self._axis_info['ra'][1]: + configured_index = self._get_configured_index( + self._axis_info, 'ra') + self._axis_info['ra'] = (configured_index, True) elif ii.startswith('Chunk.position') and \ - ii.endswith('axis2.ctype') and not axis_info['dec'][1]: - configured_index = self._get_configured_index(axis_info, + ii.endswith('axis2.ctype') and not \ + self._axis_info['dec'][1]: + configured_index = self._get_configured_index(self._axis_info, 'dec') - axis_info['dec'] = (configured_index, True) - elif ii.startswith('Chunk.energy') and not axis_info['energy'][1]: - configured_index = self._get_configured_index(axis_info, + self._axis_info['dec'] = (configured_index, True) + elif ii.startswith('Chunk.energy') and not \ + self._axis_info['energy'][1]: + configured_index = self._get_configured_index(self._axis_info, 'energy') - axis_info['energy'] = (configured_index, True) - elif ii.startswith('Chunk.time') and not axis_info['time'][1]: - configured_index = self._get_configured_index(axis_info, + self._axis_info['energy'] = (configured_index, True) + elif ii.startswith('Chunk.time') and not \ + self._axis_info['time'][1]: + configured_index = self._get_configured_index(self._axis_info, 'time') - axis_info['time'] = (configured_index, True) + self._axis_info['time'] = (configured_index, True) elif ii.startswith('Chunk.polarization') \ - and not axis_info['polarization'][1]: - configured_index = self._get_configured_index(axis_info, + and not self._axis_info['polarization'][1]: + configured_index = self._get_configured_index(self._axis_info, 'polarization') - axis_info['polarization'] = (configured_index, True) - elif ii.startswith('Chunk.observable') and not axis_info['obs'][1]: - configured_index = self._get_configured_index(axis_info, + self._axis_info['polarization'] = (configured_index, True) + elif ii.startswith('Chunk.observable') and not \ + self._axis_info['obs'][1]: + configured_index = self._get_configured_index(self._axis_info, 'obs') - axis_info['obs'] = (configured_index, True) - elif ii.startswith('Chunk.custom') and not axis_info['custom'][1]: - configured_index = self._get_configured_index(axis_info, + self._axis_info['obs'] = (configured_index, True) + elif ii.startswith('Chunk.custom') and not \ + self._axis_info['custom'][1]: + configured_index = self._get_configured_index(self._axis_info, 'custom') - axis_info['custom'] = (configured_index, True) + self._axis_info['custom'] = (configured_index, True) - if axis_info['ra'][1] and axis_info['dec'][1]: + if self._axis_info['ra'][1] and self._axis_info['dec'][1]: self.configure_position_axes( - (axis_info['ra'][0], axis_info['dec'][0]), False) - elif axis_info['ra'][1] or axis_info['dec'][1]: + (self._axis_info['ra'][0], self._axis_info['dec'][0]), False) + elif self._axis_info['ra'][1] or self._axis_info['dec'][1]: raise ValueError('Only one positional axis found ' '(ra/dec): {}/{}'. - format(axis_info['ra'][0], axis_info['dec'][0])) + format(self._axis_info['ra'][0], + self._axis_info['dec'][0])) else: # assume that positional axis are 1 and 2 by default - if (axis_info['time'][0] in [1, 2] or - axis_info['energy'][0] in [1, 2] or - axis_info['polarization'][0] in [1, 2] or - axis_info['obs'][0] in [1, 2] or - axis_info['custom'][0] in [1, 2]): + if (self._axis_info['time'][0] in [1, 2] or + self._axis_info['energy'][0] in [1, 2] or + self._axis_info['polarization'][0] in [1, 2] or + self._axis_info['obs'][0] in [1, 2] or + self._axis_info['custom'][0] in [1, 2]): raise ValueError('Cannot determine the positional axis') else: self.configure_position_axes((1, 2), False) - if axis_info['time'][1]: - self.configure_time_axis(axis_info['time'][0], False) - if axis_info['energy'][1]: - self.configure_energy_axis(axis_info['energy'][0], False) - if axis_info['polarization'][1]: - self.configure_polarization_axis(axis_info['polarization'][0], - False) - if axis_info['obs'][1]: - self.configure_observable_axis(axis_info['obs'][0], False) - if axis_info['custom'][1]: - self.configure_custom_axis(axis_info['custom'][0], False) + if self._axis_info['time'][1]: + self.configure_time_axis(self._axis_info['time'][0], False) + if self._axis_info['energy'][1]: + self.configure_energy_axis(self._axis_info['energy'][0], False) + if self._axis_info['polarization'][1]: + self.configure_polarization_axis( + self._axis_info['polarization'][0], False) + if self._axis_info['obs'][1]: + self.configure_observable_axis(self._axis_info['obs'][0], False) + if self._axis_info['custom'][1]: + self.configure_custom_axis(self._axis_info['custom'][0], False) - def _guess_axis_info_from_ctypes(self, lookup, counter, axis_info): + def _guess_axis_info_from_ctypes(self, lookup, counter): """ Check for the presence of blueprint keys in the plan, and whether or not they indicate an index in their configuration. @@ -1042,21 +1050,21 @@ def _guess_axis_info_from_ctypes(self, lookup, counter, axis_info): configured, and what is it's value. """ if lookup.startswith('Chunk.energy'): - axis_info['energy'] = (counter, True) + self._axis_info['energy'] = (counter, True) elif lookup.startswith('Chunk.polarization'): - axis_info['polarization'] = (counter, True) + self._axis_info['polarization'] = (counter, True) elif lookup.startswith('Chunk.time'): - axis_info['time'] = (counter, True) + self._axis_info['time'] = (counter, True) elif lookup.startswith('Chunk.position') and lookup.endswith( 'axis1.ctype'): - axis_info['ra'] = (counter, True) + self._axis_info['ra'] = (counter, True) elif lookup.startswith('Chunk.position') and lookup.endswith( 'axis2.ctype'): - axis_info['dec'] = (counter, True) + self._axis_info['dec'] = (counter, True) elif lookup.startswith('Chunk.observable'): - axis_info['obs'] = (counter, True) + self._axis_info['obs'] = (counter, True) elif lookup.startswith('Chunk.custom'): - axis_info['custom'] = (counter, True) + self._axis_info['custom'] = (counter, True) else: raise ValueError( f'Unrecognized axis type: {lookup}') @@ -1146,7 +1154,7 @@ def load_from_file(self, file_name): if cleaned_up_value == 'None': cleaned_up_value = None self.set(key.strip(), cleaned_up_value) - self._guess_axis_info_from_plan() + self._guess_axis_info() @classproperty def CAOM2_ELEMENTS(cls): @@ -1252,8 +1260,8 @@ def add_attribute(self, caom2_element, attribute, extension=0): insert(0, attribute) else: raise AttributeError( - (f'No attributes in extension {extension} associated ' - 'with keyword {caom2_element}')) + (f'No attributes in extension {extension} ' + f'associated with keyword {caom2_element}')) else: self._extensions[extension][caom2_element] = \ ([attribute], None) @@ -1443,12 +1451,6 @@ def has_chunk(self, extension): value = self._plan['Chunk'] return not value == '{ignore}' - @staticmethod - def is_fits(value): - """Hide the blueprint structure from clients - they shouldn't need - to know that a value of type tuple requires special processing.""" - return isinstance(value, tuple) - @staticmethod def is_table(value): """Hide the blueprint structure from clients - they shouldn't need @@ -1530,7 +1532,7 @@ class Hdf5ObsBlueprint(ObsBlueprint): ob.add_attribute('Observation.target.name', '//header/object/obj_id') # lookup value starting with / means rooted at the base of the - # "find_roots_here" parameter for HDF5Parser + # "find_roots_here" parameter for Hdf5Parser # # (integer) means return only the value with the index of "integer" # from a list @@ -1550,6 +1552,13 @@ def __init__(self, position_axes=None, energy_axis=None, polarization_axis=None, time_axis=None, obs_axis=None, custom_axis=None, module=None, update=True, instantiated_class=None): + """ + There are no sensible/known HDF5 defaults for WCS construction, so + default to ensuring the blueprint executes with mostly values of None. + + Use the attribute _wcs_std, so that the list of WCS keywords used + as input is known. + """ super().__init__( position_axes, energy_axis, @@ -1561,36 +1570,10 @@ def __init__(self, position_axes=None, energy_axis=None, update, instantiated_class, ) - # TODO - remove the defaults that have a value of None as they - # have no purpose - tmp = {'Observation.metaRelease': ([], None), - 'Observation.instrument.name': ([], None), - 'Observation.type': ([], None), - 'Observation.environment.ambientTemp': ([], - None), - # set the default for SimpleObservation construction + tmp = { 'Observation.algorithm.name': ([], 'exposure'), - 'Observation.instrument.keywords': ([], None), - 'Observation.proposal.id': ([], None), - 'Observation.target.name': ([], None), - 'Observation.telescope.name': ([], None), - 'Observation.telescope.geoLocationX': ([], - None), - 'Observation.telescope.geoLocationY': ([], - None), - 'Observation.telescope.geoLocationZ': ([], - None), - 'Observation.observationID': ([], None), 'Plane.calibrationLevel': ([], CalibrationLevel.RAW_STANDARD), 'Plane.dataProductType': ([], DataProductType.IMAGE), - 'Plane.metaRelease': ([], None), - 'Plane.dataRelease': ([], None), - 'Plane.productID': ([], None), - 'Plane.provenance.name': ([], None), - 'Plane.provenance.project': ([], None), - 'Plane.provenance.producer': ([], None), - 'Plane.provenance.reference': ([], None), - 'Plane.provenance.lastExecuted': ([], None), 'Artifact.releaseType': ([], ReleaseType.DATA), 'Chunk': 'include' } @@ -1598,12 +1581,6 @@ def __init__(self, position_axes=None, energy_axis=None, for key in tmp: self.set(key, tmp[key]) - # there are no sensible/known HDF5 defaults, so just try to make sure - # the blueprint executes with a lot of None values - - # keep the attribute _wcs_std, so that the list of WCS keywords used - # as input is known - def configure_custom_axis(self, axis, override=True): """ Set the expected custom keywords by index in the blueprint @@ -1611,7 +1588,6 @@ def configure_custom_axis(self, axis, override=True): :param axis: The index expected for the custom axis. :param override: Set to False when reading from a file. - :return: """ if self._custom_axis_configed: self.logger.debug( @@ -1626,7 +1602,6 @@ def configure_custom_axis(self, axis, override=True): self.set('Chunk.custom.axis.function.refCoord.pix', ([], None)) self.set('Chunk.custom.axis.function.refCoord.val', ([], None)) - # TODO - what goes here? self._wcs_std['Chunk.custom.axis.axis.ctype'] = '' self._wcs_std['Chunk.custom.axis.axis.cunit'] = '' self._wcs_std['Chunk.custom.axis.function.naxis'] = '' @@ -1641,10 +1616,10 @@ def configure_position_axes(self, axes, override=True): the wcs_std lookup. :param axes: The index expected for the position axes. - :return: + :param override: Set to False when reading from a file. """ if self._pos_axes_configed: - self.logger.error( + self.logger.debug( 'Attempt to configure already-configured position axes.') return @@ -1848,103 +1823,19 @@ def configure_time_axis(self, axis, override=True): self._wcs_std['Chunk.time.trefpos'] = '' self._wcs_std['Chunk.time.mjdref'] = '' - self._wcs_std['Chunk.time.axis.axis.ctype'] = \ - f'CTYPE{axis}' - self._wcs_std['Chunk.time.axis.axis.cunit'] = \ - f'CUNIT{axis}' - self._wcs_std['Chunk.time.axis.error.syser'] = \ - f'CSYER{axis}' - self._wcs_std['Chunk.time.axis.error.rnder'] = \ - f'CRDER{axis}' - self._wcs_std['Chunk.time.axis.function.naxis'] = \ - f'NAXIS{axis}' - self._wcs_std['Chunk.time.axis.function.delta'] = \ - f'CDELT{axis}' - self._wcs_std['Chunk.time.axis.function.refCoord.pix'] = \ - f'CRPIX{axis}' - self._wcs_std['Chunk.time.axis.function.refCoord.val'] = \ - f'CRVAL{axis}' + self._wcs_std['Chunk.time.axis.axis.ctype'] = '' + self._wcs_std['Chunk.time.axis.axis.cunit'] = '' + self._wcs_std['Chunk.time.axis.error.syser'] = '' + self._wcs_std['Chunk.time.axis.error.rnder'] = '' + self._wcs_std['Chunk.time.axis.function.naxis'] = '' + self._wcs_std['Chunk.time.axis.function.delta'] = '' + self._wcs_std['Chunk.time.axis.function.refCoord.pix'] = '' + self._wcs_std['Chunk.time.axis.function.refCoord.val'] = '' self._time_axis_configed = True - def _guess_axis_info_from_plan(self): - """Look for info regarding axis types in the blueprint wcs_std. - Configure the blueprint according to the guesses. - """ - # a data structure to carry around twelve bits of data at a time: - # the first item in the set is the ctype index, and the second is - # whether or not the index means anything, resulting in a - # call to the blueprint configure_* methods if it's True. - axis_info = { - 'custom': (0, False), - 'dec': (0, False), - 'energy': (0, False), - 'obs': (0, False), - 'polarization': (0, False), - 'ra': (0, False), - 'time': (0, False)} - - for ii in self._plan: - if ii.startswith('Chunk.position') and ii.endswith('axis1.ctype') \ - and not axis_info['ra'][1]: - configured_index = self._get_configured_index(axis_info, 'ra') - axis_info['ra'] = (configured_index, True) - elif ii.startswith('Chunk.position') and \ - ii.endswith('axis2.ctype') and not axis_info['dec'][1]: - configured_index = self._get_configured_index(axis_info, - 'dec') - axis_info['dec'] = (configured_index, True) - elif ii.startswith('Chunk.energy') and not axis_info['energy'][1]: - configured_index = self._get_configured_index(axis_info, - 'energy') - axis_info['energy'] = (configured_index, True) - elif ii.startswith('Chunk.time') and not axis_info['time'][1]: - configured_index = self._get_configured_index(axis_info, - 'time') - axis_info['time'] = (configured_index, True) - elif ii.startswith('Chunk.polarization') \ - and not axis_info['polarization'][1]: - configured_index = self._get_configured_index(axis_info, - 'polarization') - axis_info['polarization'] = (configured_index, True) - elif ii.startswith('Chunk.observable') and not axis_info['obs'][1]: - configured_index = self._get_configured_index(axis_info, - 'obs') - axis_info['obs'] = (configured_index, True) - elif ii.startswith('Chunk.custom') and not axis_info['custom'][1]: - configured_index = self._get_configured_index(axis_info, - 'custom') - axis_info['custom'] = (configured_index, True) - - if axis_info['ra'][1] and axis_info['dec'][1]: - self.configure_position_axes( - (axis_info['ra'][0], axis_info['dec'][0]), False) - elif axis_info['ra'][1] or axis_info['dec'][1]: - raise ValueError('Only one positional axis found ' - '(ra/dec): {}/{}'. - format(axis_info['ra'][0], axis_info['dec'][0])) - else: - # assume that positional axis are 1 and 2 by default - if (axis_info['time'][0] in [1, 2] or - axis_info['energy'][0] in [1, 2] or - axis_info['polarization'][0] in [1, 2] or - axis_info['obs'][0] in [1, 2] or - axis_info['custom'][0] in [1, 2]): - raise ValueError('Cannot determine the positional axis') - else: - self.configure_position_axes((1, 2), False) - - if axis_info['time'][1]: - self.configure_time_axis(axis_info['time'][0], False) - if axis_info['energy'][1]: - self.configure_energy_axis(axis_info['energy'][0], False) - if axis_info['polarization'][1]: - self.configure_polarization_axis(axis_info['polarization'][0], - False) - if axis_info['obs'][1]: - self.configure_observable_axis(axis_info['obs'][0], False) - if axis_info['custom'][1]: - self.configure_custom_axis(axis_info['custom'][0], False) + def _guess_axis_info(self): + self._guess_axis_info_from_plan() class BlueprintParser: @@ -1975,7 +1866,7 @@ def apply_blueprint(self): # first apply the functions if (self.blueprint._module is not None or - self.blueprint._module_instance is not None): + self.blueprint._module_instance is not None): for key, value in plan.items(): if ObsBlueprint.is_function(value): if self._blueprint._module_instance is None: @@ -1986,7 +1877,7 @@ def apply_blueprint(self): # apply defaults for key, value in plan.items(): - if ObsBlueprint.needs_lookup(value) and value[1]: + if ObsBlueprint.has_default_value(value): # there is a default value set if key in plan: plan[key] = value[1] @@ -2004,10 +1895,6 @@ def augment_observation(self, observation, artifact_uri, product_id=None): raise ValueError( f'Observation type mis-match for {observation}.') - temp = self._get_from_list( - 'Observation.metaRelease', index=0, - current=observation.meta_release - ) observation.meta_release = self._get_datetime(self._get_from_list( 'Observation.metaRelease', index=0, current=observation.meta_release)) @@ -2139,6 +2026,30 @@ def _get_from_list(self, lookup, index, current=None): self.logger.debug(f'{lookup}: value is {value}') return value + def _get_set_from_list(self, lookup, index): + value = None + keywords = None + try: + keywords = self.blueprint._get(lookup) + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + self.logger.debug(f'Could not find \'{lookup}\' in caom2blueprint ' + f'configuration.') + + # if there's something useful as a value in the keywords, + # extract it + if keywords: + if ObsBlueprint.needs_lookup(keywords): + # if there's a default value use it + if keywords[1]: + value = keywords[1] + self.logger.debug( + f'{lookup}: assigned default value {value}.') + elif not ObsBlueprint.is_function(keywords): + value = keywords + self.logger.debug(f'{lookup}: assigned value {value}.') + return value + def add_error(self, key, message): self._errors.append('{} {} {}'.format( datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), key, message)) @@ -2540,72 +2451,6 @@ def _get_environment(self, current): self.logger.debug('End Environment augmentation.') return enviro - def _get_from_list(self, lookup, index, current=None): - value = None - try: - keys = self.blueprint._get(lookup) - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - self.logger.debug( - f'Could not find {lookup!r} in caom2blueprint configuration.') - if current: - self.logger.debug( - f'{lookup}: using current value of {current!r}.') - value = current - return value - - if ObsBlueprint.needs_lookup(keys): - for ii in keys[0]: - try: - value = self.headers[index].get(ii) - if value: - self.logger.debug( - f'{lookup}: assigned value {value} based on ' - f'keyword {ii}.') - break - except (KeyError, IndexError): - if keys[0].index(ii) == len(keys[0]) - 1: - self.add_error(lookup, sys.exc_info()[1]) - # assign a default value, if one exists - if keys[1]: - if current is None: - value = keys[1] - self.logger.debug( - f'{lookup}: assigned default value {value}.') - else: - value = current - if value is None: - # checking current does not work in the general case, - # because current might legitimately be 'None' - if self._blueprint.update: - if ( - current is not None - or (current is None and isinstance(value, bool)) - ): - value = current - self.logger.debug( - f'{lookup}: used current value {value}.') - else: - # assign a default value, if one exists - if keys[1]: - if current is None: - value = keys[1] - self.logger.debug( - f'{lookup}: assigned default value {value}.') - else: - value = current - - elif (keys is not None) and (keys != ''): - if keys == 'None': - value = None - else: - value = keys - elif current: - value = current - - self.logger.debug(f'{lookup}: value is {value}') - return value - def _get_instrument(self, current): """ Create an Instrument instance populated with available content @@ -2711,7 +2556,7 @@ def _get_metrics(self, current): metrics = None if (source_number_density or background or background_stddev or - flux_density_limit or mag_limit or sample_snr): + flux_density_limit or mag_limit or sample_snr): metrics = Metrics() metrics.source_number_density = source_number_density metrics.background = background @@ -2761,7 +2606,7 @@ def _get_naxis(self, label, index): aug_function = None if (aug_length is not None and aug_delta is not None and - aug_ref_coord is not None): + aug_ref_coord is not None): aug_function = \ CoordFunction1D(aug_length, aug_delta, aug_ref_coord) self.logger.debug( @@ -2789,7 +2634,8 @@ def _get_naxis(self, label, index): def _get_observable(self, current): """ - Create a Observable instance populated with available content information. + Create a Observable instance populated with available content + information. :return: Observable """ self.logger.debug('Begin Observable augmentation.') @@ -2802,7 +2648,8 @@ def _get_observable(self, current): def _get_proposal(self, current): """ - Create a Proposal instance populated with available content information. + Create a Proposal instance populated with available content + information. :return: Proposal """ self.logger.debug('Begin Proposal augmentation.') @@ -2829,7 +2676,8 @@ def _get_proposal(self, current): def _get_provenance(self, current): """ - Create a Provenance instance populated with available Content information. + Create a Provenance instance populated with available Content + information. :return: Provenance """ self.logger.debug('Begin Provenance augmentation.') @@ -2909,34 +2757,6 @@ def _get_requirements(self, current): self.logger.debug('End Requirement augmentation.') return reqts - def _get_set_from_list(self, lookup, index): - value = None - keywords = None - try: - keywords = self.blueprint._get(lookup) - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - self.logger.debug( - f'Could not find \'{lookup}\' in caom2blueprint configuration.') - - if isinstance(keywords, tuple): - for ii in keywords[0]: - try: - value = self.headers[index].get(ii) - break - except KeyError: - self.add_error(lookup, sys.exc_info()[1]) - if keywords[1]: - value = keywords[1] - self.logger.debug( - '{}: assigned default value {}.'.format(lookup, - value)) - elif keywords: - value = keywords - self.logger.debug(f'{lookup}: assigned value {value}.') - - return value - def _get_target(self, current): """ Create a Target instance populated with available content information. @@ -3001,7 +2821,8 @@ def _get_target_position(self, current): def _get_telescope(self, current): """ - Create a Telescope instance populated with available content information. + Create a Telescope instance populated with available content + information. :return: Telescope """ self.logger.debug('Begin Telescope augmentation.') @@ -3138,8 +2959,7 @@ def _try_polarization_with_blueprint(self, chunk, index): else: chunk.polarization = PolarizationWCS(aug_naxis) self.logger.debug( - 'Creating PolarizationWCS for {} from blueprint'. - format(self.uri)) + f'Creating PolarizationWCS for {self.uri} from blueprint') self.logger.debug('End augmentation with blueprint for polarization.') @@ -3224,7 +3044,7 @@ def _try_position_with_blueprint(self, chunk, index): aug_function = None if (aug_dimension is not None and aug_ref_coord is not None and aug_cd11 is not None and aug_cd12 is not None and - aug_cd21 is not None and aug_cd22 is not None): + aug_cd21 is not None and aug_cd22 is not None): aug_function = CoordFunction2D(aug_dimension, aug_ref_coord, aug_cd11, aug_cd12, aug_cd21, aug_cd22) @@ -3233,7 +3053,7 @@ def _try_position_with_blueprint(self, chunk, index): aug_axis = None if (aug_x_axis is not None and aug_y_axis is not None and - aug_function is not None): + aug_function is not None): aug_axis = CoordAxis2D(aug_x_axis, aug_y_axis, aug_x_error, aug_y_error, None, None, aug_function) self.logger.debug( @@ -3552,8 +3372,7 @@ def apply_blueprint(self): extension)) # apply defaults to all extensions for key, value in plan.items(): - if ObsBlueprint.needs_lookup(value) and value[1]: - # there is a default value set + if ObsBlueprint.has_default_value(value): for index, header in enumerate(self.headers): for keywords in value[0]: for keyword in keywords.split(','): @@ -3643,6 +3462,72 @@ def _get_chunk_naxis(self, chunk, index=None): else: super()._get_chunk_naxis(chunk) + def _get_from_list(self, lookup, index, current=None): + value = None + try: + keys = self.blueprint._get(lookup) + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + self.logger.debug( + f'Could not find {lookup!r} in caom2blueprint configuration.') + if current: + self.logger.debug( + f'{lookup}: using current value of {current!r}.') + value = current + return value + + if ObsBlueprint.needs_lookup(keys): + for ii in keys[0]: + try: + value = self.headers[index].get(ii) + if value: + self.logger.debug( + f'{lookup}: assigned value {value} based on ' + f'keyword {ii}.') + break + except (KeyError, IndexError): + if keys[0].index(ii) == len(keys[0]) - 1: + self.add_error(lookup, sys.exc_info()[1]) + # assign a default value, if one exists + if keys[1]: + if current is None: + value = keys[1] + self.logger.debug( + f'{lookup}: assigned default value {value}.') + else: + value = current + if value is None: + # checking current does not work in the general case, + # because current might legitimately be 'None' + if self._blueprint.update: + if ( + current is not None + or (current is None and isinstance(value, bool)) + ): + value = current + self.logger.debug( + f'{lookup}: used current value {value}.') + else: + # assign a default value, if one exists + if keys[1]: + if current is None: + value = keys[1] + self.logger.debug( + f'{lookup}: assigned default value {value}.') + else: + value = current + + elif (keys is not None) and (keys != ''): + if keys == 'None': + value = None + else: + value = keys + elif current: + value = current + + self.logger.debug(f'{lookup}: value is {value}') + return value + def _get_from_table(self, lookup, extension): """ Return a space-delimited list of all the row values from a column. @@ -3681,6 +3566,34 @@ def _get_from_table(self, lookup, extension): self.logger.debug(f'{lookup}: value is {value}') return value + def _get_set_from_list(self, lookup, index): + value = None + keywords = None + try: + keywords = self.blueprint._get(lookup) + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + self.logger.debug(f'Could not find \'{lookup}\' in caom2blueprint ' + f'configuration.') + + if isinstance(keywords, tuple): + for ii in keywords[0]: + try: + value = self.headers[index].get(ii) + break + except KeyError: + self.add_error(lookup, sys.exc_info()[1]) + if keywords[1]: + value = keywords[1] + self.logger.debug( + '{}: assigned default value {}.'.format(lookup, + value)) + elif keywords: + value = keywords + self.logger.debug(f'{lookup}: assigned value {value}.') + + return value + @staticmethod def _has_data_array(header): """ @@ -3716,65 +3629,85 @@ def _has_data_array(header): return True -# h5py is an extra in this package since most collections do not -# require it -import h5py -from collections import defaultdict - - -class HDF5Parser(ContentParser): +class Hdf5Parser(ContentParser): """ Parses an HDF5 file and extracts the CAOM2 related information which can be used to augment an existing CAOM2 observation, plane, or artifact. If there is per-Chunk metadata in the file, the constructor parameter - 'find_roots_here' is the location where the N Chunk metadata starts. + 'find_roots_here' is the address location in the file where the N Chunk + metadata starts. The WCS-related keywords of the HDF5 files are used to create instances of astropy.wcs.WCS so that verify might be called. There is no CADC support for the equivalent of the FITS --fhead parameter for HDF5 files, which is why the name of the file on a local disk is - always required. - + required. + + How the classes work together for HDF5 files: + - build an HDF5ObsBlueprint, with _CAOM2_ELEMENT keys, and HDF5 metadata + path names as keys + - cache the metadata from an HDF5 file in the HDF5ObsBlueprint. This + caching is done in the "apply_blueprint_from_file" method in the + Hdf5Parser class, and replaces the path names in the blueprint with + the values from the HDF5 file. The caching is done so that all HDF5 + file access is isolated to one point in time. + - use the cached metadata to build astropy.wcs instances for verification + in Hdf5WcsParser. + - use the astropy.wcs instance and other blueprint metadata to fill the + CAOM2 record. """ def __init__( self, obs_blueprint, uri, local_f_name, find_roots_here='sitedata' ): """ - :param obs_blueprint: Hdf5ObsBlueprint instance - :param uri: which artifact augmentation is basedd on + :param uri: which artifact augmentation is based on :param local_f_name: str file name on disk :param find_roots_here: str location where Chunk metadata starts """ + # h5py is an extra in this package since most collections do not + # require it + import h5py self._file = h5py.File(local_f_name) + # where N Chunk metadata starts self._find_roots_here = find_roots_here + # the length of the array is the number of Parts in an HDF5 file, + # and the values are HDF5 lookup path names. self._extension_names = [] super().__init__(obs_blueprint, uri) self._wcs_parser = None def apply_blueprint_from_file(self): + """ + Retrieve metadata from file, cache in the blueprint. + """ self.logger.debug('Begin apply_blueprint_from_file') - individual, multi = self._xxx() - # for every key in the HDF5 file, is the key referenced in the - # blueprint? if yes, capture the value referenced by the key back - # to the blueprint - - # for each Part, for every key in the HDF5 file, is the file key - # referenced in the blueprint Part? If yes, capture the value - # referenced by the key back to the blueprint for the Part - - def y(name, object): + # h5py is an extra in this package since most collections do not + # require it + import h5py + individual, multi = self._extract_path_names_from_blueprint() + + def _extract_from_item(name, object): + """ + Function signature dictated by h5py visititems implementation. + Executed for each dataset/group in an HDF5 file. + + :param name: fully-qualified HDF5 path name + :param object: what the HDF5 path name points to + """ if name == self._find_roots_here: - # print(f'{name} {type(object)} {dir(object)}') - for ii, key in enumerate(object.keys()): - temp = f'{name}/{key}' + for ii, path_name in enumerate(object.keys()): + # store the names and locations of the Part/Chunk metadata + temp = f'{name}/{path_name}' self.logger.debug(f'Adding extension {temp}') self._extension_names.append(temp) self._blueprint._extensions[ii] = {} + # If it's the Part/Chunk metadata, capture it to extensions. + # Syntax of the keys described in Hdf5ObsBlueprint class. for part_index, part_name in enumerate(self._extension_names): if ( name.startswith(part_name) @@ -3782,36 +3715,38 @@ def y(name, object): and object.dtype.names is not None ): for d_name in object.dtype.names: - temp = f'{name.replace(part_name, "")}/{d_name}' - # print(temp) - for key in multi.keys(): - if key == temp: - for jj in multi.get(key): + temp_path = f'{name.replace(part_name, "")}/{d_name}' + for path_name in multi.keys(): + if path_name == temp_path: + for jj in multi.get(path_name): self._blueprint.set( jj, object[d_name], part_index ) - elif key.startswith(temp) and '(' in key: - # print(f'temp {temp} key {key}') - z = key.split('(') + elif (path_name.startswith(temp_path) + and '(' in path_name): + z = path_name.split('(') if ':' in z[1]: a = z[1].split(')')[0].split(':') if len(a) > 2: raise NotImplementedError - for jj in multi.get(key): + for jj in multi.get(path_name): self._blueprint.set( jj, - object[d_name][int(a[0])][int(a[1])], + object[d_name][int(a[0])][ + int(a[1])], part_index, ) else: index = int(z[1].split(')')[0]) - for jj in multi.get(key): + for jj in multi.get(path_name): self._blueprint.set( jj, object[d_name][index], part_index, ) + # if it's Observation/Plane/Artifact metadata, capture it to + # the base blueprint if isinstance(object, h5py.Dataset): if object.dtype.names is not None: for d_name in object.dtype.names: @@ -3820,10 +3755,18 @@ def y(name, object): for jj in individual.get(temp): self._blueprint.set(jj, object[d_name], 0) - self._file.visititems(y) + self._file.visititems(_extract_from_item) self.logger.debug('Done apply_blueprint_from_file') - def _xxx(self): + def _extract_path_names_from_blueprint(self): + """ + :return: individual - a dictionary of lists, keys are unique path + names for finding metadata once per file. Values are + _CAOM2_ELEMENT strings. + multiple - a dictionary of lists, keys are unique path names for + finding metadata N times per file. Values are _CAOM2_ELEMENT + strings. + """ individual = defaultdict(list) multi = defaultdict(list) for key, value in self._blueprint._plan.items(): @@ -3836,16 +3779,12 @@ def _xxx(self): return individual, multi def apply_blueprint(self): - """ - Different implementation than BlueprintParser, because of the - extensions. - """ self.logger.debug('Begin apply_blueprint') self.apply_blueprint_from_file() # after the apply_blueprint_from_file call, all the metadata from the # file has been applied to the blueprint, so now do the bits that - # require no file access + # require no access to file content # pointers that are short to type exts = self._blueprint._extensions @@ -3853,7 +3792,7 @@ def apply_blueprint(self): # apply the functions if (self._blueprint._module is not None or - self._blueprint._module_instance is not None): + self._blueprint._module_instance is not None): for key, value in plan.items(): if ObsBlueprint.is_function(value): if self._blueprint._module_instance is None: @@ -3900,8 +3839,8 @@ def apply_blueprint(self): if q is None: exts[extension][key] = value[1] self.logger.debug( - f'Add {key} and assign default value of {value[1]} ' - f'in extension {extension}.') + f'Add {key} and assign default value of ' + f'{value[1]} in extension {extension}.') elif ObsBlueprint.needs_lookup(value): exts[extension][key] = value[1] self.logger.debug( @@ -3910,6 +3849,7 @@ def apply_blueprint(self): plan[key] = value[1] self.logger.debug(f'{key}: set value to default of {value[1]}') + self._file.close() self.logger.debug('Done apply_blueprint') return @@ -3927,72 +3867,6 @@ def ignore_chunks(self, artifact, index=0): artifact.parts.add(Part(str(index))) return False - # def _get_from_list(self, lookup, index, current=None): - # value = None - # try: - # keys = self.blueprint._get(lookup) - # except KeyError: - # self.add_error(lookup, sys.exc_info()[1]) - # self.logger.debug( - # f'Could not find {lookup!r} in caom2blueprint configuration.') - # if current: - # self.logger.debug( - # f'{lookup}: using current value of {current!r}.') - # value = current - # return value - # - # if ObsBlueprint.needs_lookup(keys): - # for ii in keys[0]: - # try: - # value = self.headers[index].get(ii) - # if value: - # self.logger.debug( - # f'{lookup}: assigned value {value} based on ' - # f'keyword {ii}.') - # break - # except (KeyError, IndexError): - # if keys[0].index(ii) == len(keys[0]) - 1: - # self.add_error(lookup, sys.exc_info()[1]) - # # assign a default value, if one exists - # if keys[1]: - # if current is None: - # value = keys[1] - # self.logger.debug( - # f'{lookup}: assigned default value {value}.') - # else: - # value = current - # if value is None: - # # checking current does not work in the general case, - # # because current might legitimately be 'None' - # if self._blueprint.update: - # if ( - # current is not None - # or (current is None and isinstance(value, bool)) - # ): - # value = current - # self.logger.debug( - # f'{lookup}: used current value {value}.') - # else: - # # assign a default value, if one exists - # if keys[1]: - # if current is None: - # value = keys[1] - # self.logger.debug( - # f'{lookup}: assigned default value {value}.') - # else: - # value = current - # - # if (keys is not None) and (keys != ''): - # if keys == 'None': - # value = None - # else: - # value = keys - # elif current: - # value = current - # - # self.logger.debug(f'{lookup}: value is {value}') - # return value - class WcsParser: """ @@ -4327,7 +4201,7 @@ def _get_spatial_axis(self, xindex, yindex): aug_cd11 is not None and \ aug_cd12 is not None and \ aug_cd21 is not None and \ - aug_cd22 is not None: + aug_cd22 is not None: aug_function = CoordFunction2D(aug_dimension, aug_ref_coord, aug_cd11, aug_cd12, aug_cd21, aug_cd22) @@ -4436,7 +4310,7 @@ def _get_axis_length(self, for_axis): class Hdf5WcsParser(WcsParser): """ This class initializes an astropy.wcs instance with metadata from an - Hdf5Parser. + Hdf5ObsBlueprint populated using an Hdf5Parser. """ def __init__(self, blueprint, extension): @@ -4499,10 +4373,11 @@ def assign_sanitize(self, assignee, index, key, sanitize=True): Do not want to blindly assign None to astropy.wcs attributes, so use this method for conditional assignment. - If someone wants to assign None to a value, either use 'set', and + The current implementation is that ff there is a legitimate need to + assign None to a value, either use 'set' in the Hdf5ObsBlueprint, and specifically assign None, or execute a function to set it to None conditionally. There will be no support for a Default value of None - with HDF5 files. That's a decision for today, anyway. + with HDF5 files. """ x = self._blueprint._get(key, self._extension) if sanitize: @@ -4514,7 +4389,6 @@ def _set_wcs(self): self._wcs = WCS(naxis=self._blueprint.get_configed_axes_count()) array_shape = [0] * self._blueprint.get_configed_axes_count() count = 0 - if self._blueprint._pos_axes_configed: self._axes['ra'][1] = True self._axes['dec'][1] = True @@ -4583,10 +4457,12 @@ def _set_wcs(self): 'Chunk.time.axis.axis.cunit', False) array_shape[count] = self._blueprint._get( 'Chunk.time.axis.function.naxis', self._extension) - self.assign_sanitize(self._wcs.wcs.crpix, count, - 'Chunk.time.axis.function.refCoord.pix', False) - self.assign_sanitize(self._wcs.wcs.crval, count, - 'Chunk.time.axis.function.refCoord.val', False) + self.assign_sanitize( + self._wcs.wcs.crpix, count, + 'Chunk.time.axis.function.refCoord.pix', False) + self.assign_sanitize( + self._wcs.wcs.crval, count, + 'Chunk.time.axis.function.refCoord.val', False) self.assign_sanitize(self._wcs.wcs.crder, count, 'Chunk.time.axis.error.rnder') self.assign_sanitize(self._wcs.wcs.csyer, count, @@ -4602,14 +4478,16 @@ def _set_wcs(self): 'Chunk.energy.axis.axis.cunit', False) array_shape[count] = self._blueprint._get( 'Chunk.energy.axis.function.naxis', self._extension) - self.assign_sanitize(self._wcs.wcs.crpix, count, + self.assign_sanitize( + self._wcs.wcs.crpix, count, 'Chunk.energy.axis.function.refCoord.pix', False) - self.assign_sanitize(self._wcs.wcs.crval, count, + self.assign_sanitize( + self._wcs.wcs.crval, count, 'Chunk.energy.axis.function.refCoord.val', False) - self.assign_sanitize(self._wcs.wcs.crder, count, - 'Chunk.energy.axis.error.rnder') - self.assign_sanitize(self._wcs.wcs.csyer, count, - 'Chunk.energy.axis.error.syser') + self.assign_sanitize( + self._wcs.wcs.crder, count, 'Chunk.energy.axis.error.rnder') + self.assign_sanitize( + self._wcs.wcs.csyer, count, 'Chunk.energy.axis.error.syser') self._finish_energy() count += 1 if self._blueprint._polarization_axis_configed: @@ -4621,9 +4499,11 @@ def _set_wcs(self): 'Chunk.polarization.axis.axis.cunit', False) array_shape[count] = self._blueprint._get( 'Chunk.polarization.axis.function.naxis', self._extension) - self.assign_sanitize(self._wcs.wcs.crpix, count, + self.assign_sanitize( + self._wcs.wcs.crpix, count, 'Chunk.polarization.axis.function.refCoord.pix', False) - self.assign_sanitize(self._wcs.wcs.crval, count, + self.assign_sanitize( + self._wcs.wcs.crval, count, 'Chunk.polarization.axis.function.refCoord.val', False) count += 1 # TODO - where's the delta? @@ -5015,8 +4895,8 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, parser = FitsParser(local, blueprint, uri=uri) elif '.h5' in local: logging.debug( - f'Using an HDF5Parser for local file {local}') - parser = HDF5Parser(blueprint, uri, local) + f'Using an Hdf5Parser for local file {local}') + parser = Hdf5Parser(blueprint, uri, local) else: # explicitly ignore headers for txt and image files logging.debug(f'Using a BlueprintParser for {local}') diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index 3c6cdfef..a28b2244 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -72,7 +72,7 @@ from cadcutils import net from cadcdata import FileInfo from caom2utils import FitsParser, FitsWcsParser, main_app, update_blueprint -from caom2utils import HDF5Parser, Hdf5WcsParser, ContentParser +from caom2utils import Hdf5Parser, Hdf5WcsParser, ContentParser from caom2utils import Hdf5ObsBlueprint from caom2utils import ObsBlueprint, BlueprintParser, gen_proc from caom2utils import get_gen_proc_arg_parser, augment @@ -172,6 +172,14 @@ def test_hdf5_wcs_parser_set_wcs(): test_fqn = f'{TESTDATA_DIR}/taos_h5file/20220201T200117/{test_f_name}' test_artifact = Artifact(test_uri, ProductType.SCIENCE, ReleaseType.DATA) + # check the error messages + test_position_bp.configure_position_axes((4, 5)) + test_energy_bp.configure_energy_axis(2) + test_time_bp.configure_time_axis(2) + test_polarization_bp.configure_polarization_axis(2) + test_observable_bp.configure_observable_axis(2) + test_custom_bp.configure_custom_axis(2) + for bp in [ test_position_bp, test_energy_bp, @@ -180,7 +188,7 @@ def test_hdf5_wcs_parser_set_wcs(): test_observable_bp, test_custom_bp, ]: - test_subject = HDF5Parser(bp, test_uri, test_fqn) + test_subject = Hdf5Parser(bp, test_uri, test_fqn) assert test_subject is not None, 'expect a result' test_subject.augment_artifact(test_artifact) if bp == test_position_bp: diff --git a/caom2utils/caom2utils/tests/test_obs_blueprint.py b/caom2utils/caom2utils/tests/test_obs_blueprint.py index 7917854d..4bd26b74 100644 --- a/caom2utils/caom2utils/tests/test_obs_blueprint.py +++ b/caom2utils/caom2utils/tests/test_obs_blueprint.py @@ -296,7 +296,7 @@ def test_load_from_file_configure(): ob.add_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE1') ob.add_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE2') ob.set('Chunk.energy.axis.axis.ctype', 'WAVE') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._pos_axes_configed, 'Failure to call configure_position_axes' assert ob._energy_axis_configed, 'Failure to call configure_energy_axis' assert ob._wcs_std['Chunk.energy.axis.axis.ctype'] == 'CTYPE3', \ @@ -306,7 +306,7 @@ def test_load_from_file_configure(): ob.add_attribute('Chunk.position.axis.axis1.ctype', 'CTYPE3') ob.add_attribute('Chunk.position.axis.axis2.ctype', 'CTYPE4') ob.set('Chunk.energy.axis.axis.ctype', 'WAVE') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._pos_axes_configed, 'Failure to call configure_position_axes' assert ob._energy_axis_configed, 'Failure to call configure_energy_axis' assert ob._wcs_std['Chunk.energy.axis.axis.ctype'] == 'CTYPE1', \ @@ -314,41 +314,41 @@ def test_load_from_file_configure(): ob = ObsBlueprint() ob.set('Chunk.energy.axis.axis.ctype', 'WAVE') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._wcs_std['Chunk.energy.axis.axis.ctype'] == 'CTYPE3', \ ob._wcs_std['Chunk.energy.axis.axis.ctype'] ob = ObsBlueprint() ob.set('Chunk.polarization.axis.axis.ctype', 'STOKES') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._wcs_std['Chunk.polarization.axis.axis.ctype'] == 'CTYPE5', \ ob._wcs_std['Chunk.polarization.axis.axis.ctype'] assert ob._polarization_axis_configed, 'pol config' ob = ObsBlueprint() ob.set('Chunk.observable.axis.axis.ctype', 'COUNT') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._wcs_std['Chunk.observable.axis.axis.ctype'] == 'CTYPE6', \ ob._wcs_std['Chunk.observable.axis.axis.ctype'] assert ob._obs_axis_configed, 'obs config' ob = ObsBlueprint() ob.set('Chunk.custom.axis.axis.ctype', 'FARDEP') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._wcs_std['Chunk.custom.axis.axis.ctype'] == 'CTYPE7', \ ob._wcs_std['Chunk.custom.axis.axis.ctype'] assert ob._custom_axis_configed, 'custom config' ob = ObsBlueprint() ob.set('Chunk.time.axis.axis.ctype', 'TIME') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._wcs_std['Chunk.time.axis.axis.ctype'] == 'CTYPE4', \ ob._wcs_std['Chunk.time.axis.axis.ctype'] assert ob._time_axis_configed, 'time config' # should get the position axes by default ob = ObsBlueprint() - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._pos_axes_configed, 'pos config' assert not ob._energy_axis_configed, 'energy config' assert not ob._custom_axis_configed, 'custom config' @@ -365,7 +365,7 @@ def test_load_from_file_configure(): ob.add_attribute('Chunk.time.axis.axis.ctype', 'CTYPE5') ob.add_attribute('Chunk.energy.axis.axis.ctype', 'CTYPE6') ob.add_attribute('Chunk.observable.axis.axis.ctype', 'CTYPE7') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() assert ob._wcs_std['Chunk.polarization.axis.axis.ctype'] == 'CTYPE1', \ ob._wcs_std['Chunk.polarization.axis.axis.ctype'] assert ob._wcs_std['Chunk.custom.axis.axis.ctype'] == 'CTYPE2', \ @@ -384,7 +384,7 @@ def test_load_from_file_configure(): with pytest.raises(ValueError): ob = ObsBlueprint() ob.add_attribute('Chunk.polarization.axis.axis.ctype', 'CTYPE1') - ob._guess_axis_info_from_plan() + ob._guess_axis_info() def test_has_chunk(): From 5475259d6e9ac94333cb29a3babe01efbdc822d8 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Mon, 28 Mar 2022 10:33:47 -0700 Subject: [PATCH 13/38] CADC-10809 - make a small h5 file for testing. --- .../20220201T200117/20220201T200117.xml | 2 +- .../taos_h5file/20220201T200117/taos2_test.h5 | Bin 0 -> 24384 bytes caom2utils/caom2utils/tests/test_fits2caom2.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos2_test.h5 diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml index a66841a4..bb179533 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml @@ -31,7 +31,7 @@ science data application/x-hdf5 - 70014256 + 24384 md5:5a28f24e62324c1a12ff76a46c59bc54 diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos2_test.h5 b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taos2_test.h5 new file mode 100644 index 0000000000000000000000000000000000000000..2cad2d2f4445d5efdd11741a8dc6a05e7f269f48 GIT binary patch literal 24384 zcmeGk3qVxG_AVb?K@p7bSGuAC8rXt>i0*Z9Sp@|pz%pLBtgy-&f0mDgNiQ?hG^F=> zMk*?bl=*v#V7WI@ODTV;rG>vr4O28t{3mbb&ddew?%lO#nZl0VJ9FloIWu$S%*>fH zdzljwJgSpj4?7A5v6!-<9N8p!HR2#wp&SAed@N3ga9n`nax0V&QScu{b;J2im_MH` zI#Q{m043p1VqAUwT`#2GIRD$C@SkEv>C1}*~nskcF(CRet zYMq+o6a5Sz8TT0r>2HWNVvvCK(wzPVh!MT?KI^rRwJr(p^;eF&SAg}0^foRjB_m#w zMWsxhris&0RBDPgS*J~&O7Q8K$!t=mP10m&(zTilgdzG()>%RyHEw*ch_a%v`cV?B zw}hUr$7Fr=z>El81cp<45YBBY7E!kBK89X!r$Q1X^0B58Sd<%(uQL!097k1RN?da( zJpqWJe3LViAev1>81WFJGAYTF6UZ9~9ynAGhlh-%C$zSpewenhvSPEuc!#wX<#zzt zAi!2M4A$BJZLm9+PDxM8m^2|MusY*P!^;gKXQyx5sl#2F{O{x2mF_MM}udu_mCKl0ZxRg zb-Dyiwj@lGkPP>7ewfsuPq*5c6b1QsNx^@}DbU*&HC+kFk zeg_~TI7`G-c=rk2-Mc^AeT~f~o3HJ@vx^M!^74uf8s`-i8Uc$3D-|u^bagz;<^msH z;&k12 ze*QF*8GMnV>HI$HPv?ek_<@UmD(R7!$IM&(+uS`DIx_P5u6G8Fl>?c!48PRerrIy> z%j{HqAXQw_8Q=+Z!;-%D7U$#&;t3r;9@Teo{H?UP%-|uHYeNQ9%D$l^HZDzc;x6!rdC_TTP z`C0vrI}|uGU+m3|_3*g6jgKdAyTiLmrn*n#_>+8o{L`U?^m|49M~n?v>EK+plR4@* zx@^bv6?C8JL-(K9*r3-YA9FbWxl~p$?91-+oR2b^q4uLD=ateM*PL1X#3_m??)t`} z`Dr0sJytKLR_>?s=t0?D`Pt8RqDSUwceosu8}#G5fiEw$J$_lPCinpx9iYu!=4C7 zz4mUqvC}&1nP*F7Iq{B57~hfIV%A+3(eIXC7=FG(FjtS$Gn=n`x@sPy@wBV=?^3Vt zz9_f1OfpJF>Lc;*H{&h`ZE>c@t!!YPTXSYO$>+yb%M`=5rcC4b^YH}c=D#<};pRVv zo6Uc3WAlICQyrE2&KS=BGMjZaHFjUyas1g;8m-elaDBmWGyXot_)|F@mBpcU2J6du zn@XE+?M|XIJ?V@kek3Iz0v;D=z)fVWIAeCDqX?p0z&QbK65w2*(ze_djP}bBJC$B^k zNNi$AAFqj{cfWid9(0)J7j)RL0MTG$T6s;yP{=Ov5 zvZF}5h|DA?Tkvg6W;#tGD>HNQCUrr2D6;^;S`dQJLr7R|XRzF0@0$XVTb-cXmLdHV zQ&b@+mXfZWszpcCCMHHwq)jQ4Gg#Gx`f4f=dQDC$OD{1+3~+>An>3ZBzh6BB=WA7l zCe5rK!uEnLgCM7188|0fN{&Ugc)>PFKo01$COxakvQ20~WeAXRQ?!X1wsDxGz>u^> z4s0W`WF1Y6yC>c!3R{#V8o2D^Vazo@bDFc0bisUO0^>%caw53!Jp3qpQSzF)|k zAGG{w#FCuzxe6?aHX#ANA5?;SUC8q*LjS!K;9v~RZ5Ttd7%ZV|Ft_JnGA4)`;li;fpU_od_*gi~Z!K@3b)Z{s)Wd*2C^okai2ETr z35qQ5HYNf7I^co(KN0H>5eK--(*hHZnvxQi32n7hZB7O_vQgP_$Z{j|4Npr_OfW|4 z06D!<)UUk|DdEG{V?jo39AW-GZv$&<9LNfswKW-WDNRlkGxCGy6}Sd#!LcYE z*y_pnWJK0wq2i<*dw&RyiNMZhNhLxBa6UO73ETBg*&|Z9QBFv&Y`Fd9rmImhE|c^l zqCb~qG(q;4ST}?wdRl&;78Z(wVi2T5~4FfFv-^2guYek+9+;vf)xLXhR%RwTgRZzZ_n zd&tI%VfdRbxWUG2bnHH>O=6=5I5(R6-A63T+YQg>oMr<8RTj=gxaI*g8yvwjLTN{8 zI?jVM=4gfIXS~4tjKJLg+=zb;kqj`+pX?Jk&zPno^SZ#(G+k@Kn%^h>ZoNu;sCxD2 z%R~2&<$>-61MtE@EPi4?SyR8V{&zQUYqJd4sBz6M_~2S10Z;RFhqEg)leN$kg?N*6 z@qhpXId}*iYzlcmlI3sAHy)P`PB{P{3m#Y&t^v*4V!TY2IuX#N{(&-TKnp`Ycwjwy zzysT7iysgPwsA57Va6g2Bug56Y^5#pxynWc}{B@m?4CH)KurS|WSMG0= zjLRhJHD&aNgd7>?UvUgXc|=bGFgy|es#1cBgakE}eH>Hf_3E+n*I~WkTWVy)OplV# ze#;+!bNlUS8LEez>rK~c!L~LD@b_B@?qCk@SJ`dQUnej>BmNQ<;QQ+i^5oC2lawhcsdA#E zy|dG{JGZ9>98;ETf1&z?vGAKf_o|(cqUIKh!Sa zDH*m72^WgmHS0jt^e_4|!3Ej{h1FRGctUyPZkJncHRK862^~KkHXu|!yk{Y^;6slY z5mzf@c|m7)&nj_!25lvZ?(5r1FVjv4x|jlbmOkKyo5KQS`T{(`t@~@g_FdYJ{>``Qy$@7lxOy&C zJe^{vn!)%_h|9ejdqqFae(XrA`lfFmNxj}``K+}kJJVGax*n6X;WCoXk6#M=ZRHM! zWR5={Pv8fgpJA1Ln=gX->F_~&@u~Oy>Aj;|`eaBH%*5Oqmrj6hb{)jjLGSkI+r_q& z>z?T)+pFWRs^I*5@nGb~#^&dks#Hby;^oZum-ZFObPcjiHJ`da`E%1ZyAGRX&rVpd zin+Vx+9cZ7imrdk?_iF5lN=94zW#2*H!)22;R#(APqSyLcjo+b!!y7@PDK%^Pp{eE zpSicHu~?ZVJU?%#pYMP5&>#P1cmLurpZSaCFv`zQ&aAkyS*B4Q$ts@e#I+-_*j9b~ zbTMuH$-sr-_9AA{oV^)+_bE8MXk*bOd)rLyym>Rddc>?cpgn-q|lM_~eP}sT_YY{``21$F*){i~5ZytSq`168AY%d$pr&!$-^M z-Gk>@hs|ive=~Vrd9Qk@?7R598t*CJGo9>iOj=*Iif;7UdS>AOE9Tqq`9r?!)3nYU zp8x!j)m>s}uj}hmzdC78A1->|YNcYRK|7b;P1Zj~ds}BVFctdzY=t^M*hzox$ba%L z*H$whY}FO-+F8up9sT{Ruv--Uc4$<=zzPLdkL!7dyzGIQ%zr;WL%&vVUVk~Q_KTFG zhTrU-`lnR6{>3Nhb205^U&tRIBkkeGzu)}o!;pSy9DhE(wZKCD7yj?D&G}y(pFgIz z)+hnK{*wE;DxRC{ufDH~zl;tu-hk~qC`*YFf5*k@+Fw_&6jZEF^$aY)#G+ww@tuLNk+`{Knd-){P8!p z-xj`z>LL1Mx>gIewMl@#-%4-?J@EQL`rG#I>vjpuPo5z{1oVJ0|GqAtCx1SBvQHd~ z;bU6DWM0?(qh)?;h0O00fA@Xe-iNAJkG?#PJY;!T?(3kfXt}SGQ{-=!!L?BM_w~}z KXsv8{U-v&~!+3Q7 literal 0 HcmV?d00001 diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index a28b2244..3230657c 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -167,7 +167,7 @@ def test_hdf5_wcs_parser_set_wcs(): test_polarization_bp = Hdf5ObsBlueprint(polarization_axis=1) test_observable_bp = Hdf5ObsBlueprint(obs_axis=1) test_custom_bp = Hdf5ObsBlueprint(custom_axis=1) - test_f_name = 'taos2_20220201T201317Z_star04239531.h5' + test_f_name = 'taos2_test.h5' test_uri = f'cadc:TEST/{test_f_name}' test_fqn = f'{TESTDATA_DIR}/taos_h5file/20220201T200117/{test_f_name}' test_artifact = Artifact(test_uri, ProductType.SCIENCE, ReleaseType.DATA) From 3260d2524ab8ccc21e23a9de90f5cd6bf76e7072 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Mon, 28 Mar 2022 13:15:11 -0700 Subject: [PATCH 14/38] CADC-10809 - add extras h5, test, remove dev_requirements.txt. --- .github/workflows/cibuild.yml | 8 ++++++++ caom2utils/dev_requirements.txt | 6 ------ caom2utils/setup.cfg | 11 ++++++++++- caom2utils/tox.ini | 7 +++++-- 4 files changed, 23 insertions(+), 9 deletions(-) delete mode 100644 caom2utils/dev_requirements.txt diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml index 201be3e4..a3c17523 100644 --- a/.github/workflows/cibuild.yml +++ b/.github/workflows/cibuild.yml @@ -9,6 +9,14 @@ on: types: [created] jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Install hdf5 library + run: sudo apt-get install -y libhdf5-dev + egginfo: runs-on: ubuntu-latest steps: diff --git a/caom2utils/dev_requirements.txt b/caom2utils/dev_requirements.txt deleted file mode 100644 index c70690ad..00000000 --- a/caom2utils/dev_requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ --e ../caom2 --e . -pytest>=6.2 -pytest-cov>=2.12 -flake8>=3.9 -mock>=4.0 diff --git a/caom2utils/setup.cfg b/caom2utils/setup.cfg index 4ac723c2..bd3bbe01 100644 --- a/caom2utils/setup.cfg +++ b/caom2utils/setup.cfg @@ -29,7 +29,7 @@ description = CAOM-2.4 utils author = Canadian Astronomy Data Centre author_email = cadc@nrc-cnrc.gc.ca license = AGPLv3 -url = http://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2 +url = https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2 edit_on_github = False github_project = opencadc/caom2tools install_requires = cadcdata==2.0 caom2>=2.4 astropy>=2.0 spherical-geometry>=1.2.11;python_version>="3.6" vos>=3.1.1 @@ -37,6 +37,15 @@ install_requires = cadcdata==2.0 caom2>=2.4 astropy>=2.0 spherical-geometry>=1.2 version = 1.6.4 +[options.extras_require] +h5 = + h5py + +test = + pytest>=6.2 + pytest-cov>=2.12 + flake8>=3.9 + mock>=4.0 [entry_points] fits2caom2 = caom2utils.legacy:main_app diff --git a/caom2utils/tox.ini b/caom2utils/tox.ini index 644b5753..f072ed5a 100644 --- a/caom2utils/tox.ini +++ b/caom2utils/tox.ini @@ -13,10 +13,14 @@ requires = description = run tests changedir = {toxinidir} passenv = HOME -deps = -rdev_requirements.txt +deps = + -e ../caom2 commands = pip freeze pytest {[package]name} +extras = + h5 + test [testenv:egg_info] description = ensure egg_info works without dependencies @@ -27,7 +31,6 @@ commands = description = determine the code coverage deps: coverage>=4.5.4 - -rdev_requirements.txt commands = pytest {[package]name} --cov {[package]name} --cov-report xml --cov-config={toxinidir}/setup.cfg From a97b06af327b30c740610b8eba35cac0679e8761 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Mon, 28 Mar 2022 17:20:11 -0700 Subject: [PATCH 15/38] CADC-10809 - make the action work with an extra. --- .github/workflows/cibuild.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml index a3c17523..9eecfe0e 100644 --- a/.github/workflows/cibuild.yml +++ b/.github/workflows/cibuild.yml @@ -18,6 +18,7 @@ jobs: run: sudo apt-get install -y libhdf5-dev egginfo: + needs: build runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 From f28fc0f880feda3a4a8c6da400b9c77908bc109a Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 30 Mar 2022 10:27:51 -0700 Subject: [PATCH 16/38] CADC-10809 - tox configuration. --- .../taos_h5file/20220201T200117/taosii.module | 41 +++++++++++++++++++ caom2utils/setup.cfg | 9 +++- caom2utils/setup.py | 2 +- caom2utils/tox.ini | 6 +-- 4 files changed, 51 insertions(+), 7 deletions(-) create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.module diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.module b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.module new file mode 100644 index 00000000..f39352bd --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/taosii.module @@ -0,0 +1,41 @@ +from astropy import units +from astropy.coordinates import SkyCoord + + +def get_exposure(base): + b = base.get('base') + mjdrunstart = b['header']['timeseries']['mjdrunstart'] + mjdrunend = b['header']['timeseries']['mjdrunend'] + result = 0.0 + if mjdrunstart is not None and mjdrunend is not None: + result = mjdrunend - mjdrunstart + return result + + +def get_target_position_cval1(base): + ra, dec_ignore = _get_target_position(base) + return ra + + +def get_target_position_cval2(base): + ra_ignore, dec = _get_target_position(base) + return dec + + +def _get_target_position(base): + b = base.get('base') + ra = b['header']['object']['obj_ra'] + dec = b['header']['object']['obj_dec'] + result = SkyCoord( + ra.decode('utf-8'), + dec.decode('utf-8'), + frame='icrs', + unit=(units.hourangle, units.deg), + ) + return result.ra.degree, result.dec.degree + + +def get_time_axis_range_end(base): + b = base.get('base') + x = b['header']['timeseries']['numepochs'] + return x - 1 diff --git a/caom2utils/setup.cfg b/caom2utils/setup.cfg index bd3bbe01..ca5f3f60 100644 --- a/caom2utils/setup.cfg +++ b/caom2utils/setup.cfg @@ -32,15 +32,20 @@ license = AGPLv3 url = https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2 edit_on_github = False github_project = opencadc/caom2tools -install_requires = cadcdata==2.0 caom2>=2.4 astropy>=2.0 spherical-geometry>=1.2.11;python_version>="3.6" vos>=3.1.1 # version should be PEP386 compatible (http://www.python.org/dev/peps/pep-0386) version = 1.6.4 +[options] +install_requires = + cadcdata==2.0 + caom2>=2.4 + astropy>=2.0 + spherical-geometry>=1.2.11;python_version>="3.6" + vos>=3.1.1 [options.extras_require] h5 = h5py - test = pytest>=6.2 pytest-cov>=2.12 diff --git a/caom2utils/setup.py b/caom2utils/setup.py index 07977899..26fe2a69 100755 --- a/caom2utils/setup.py +++ b/caom2utils/setup.py @@ -117,7 +117,7 @@ def run(self): 'Programming Language :: Python', 'Programming Language :: Python :: 3', ], - cmdclass = { + cmdclass={ 'coverage': PyTest, 'inttest': IntTestCommand } diff --git a/caom2utils/tox.ini b/caom2utils/tox.ini index f072ed5a..5eff62df 100644 --- a/caom2utils/tox.ini +++ b/caom2utils/tox.ini @@ -11,16 +11,14 @@ requires = [testenv] description = run tests -changedir = {toxinidir} passenv = HOME deps = -e ../caom2 + .[h5] + .[test] commands = pip freeze pytest {[package]name} -extras = - h5 - test [testenv:egg_info] description = ensure egg_info works without dependencies From c0f099c62f99fb73839ff87cca97037f08a8919d Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 30 Mar 2022 10:41:36 -0700 Subject: [PATCH 17/38] CADC-10809 - tox coverage reporting. --- caom2utils/setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/caom2utils/setup.cfg b/caom2utils/setup.cfg index ca5f3f60..6d5a93c0 100644 --- a/caom2utils/setup.cfg +++ b/caom2utils/setup.cfg @@ -51,6 +51,7 @@ test = pytest-cov>=2.12 flake8>=3.9 mock>=4.0 + xml-compare>=1.0.5 [entry_points] fits2caom2 = caom2utils.legacy:main_app From 5baf58024d254bfdc2bb0b846fa710d3b816b8f6 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 30 Mar 2022 10:53:31 -0700 Subject: [PATCH 18/38] CADC-10809 - add tox testenv:cov dependencies. --- caom2utils/tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/caom2utils/tox.ini b/caom2utils/tox.ini index 5eff62df..b9e75abf 100644 --- a/caom2utils/tox.ini +++ b/caom2utils/tox.ini @@ -29,6 +29,8 @@ commands = description = determine the code coverage deps: coverage>=4.5.4 + .[h5] + .[test] commands = pytest {[package]name} --cov {[package]name} --cov-report xml --cov-config={toxinidir}/setup.cfg From 3eb8d8e10d7a68e70a2ae7c7d88699c56f3f5b94 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 30 Mar 2022 18:01:38 -0700 Subject: [PATCH 19/38] CADC-10715 - refactor to use the BlueprintParser/ContentParser/FitsParser hierarchy from caom2utils (CADC-10808). Also use the latest caom2pipe.manage_composable.StorageName for BriteName (CADC-10275). --- caom2utils/caom2utils/data_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caom2utils/caom2utils/data_util.py b/caom2utils/caom2utils/data_util.py index 8a33690b..76b221aa 100644 --- a/caom2utils/caom2utils/data_util.py +++ b/caom2utils/caom2utils/data_util.py @@ -420,7 +420,7 @@ def get_file_encoding(fqn): def get_file_type(fqn): """Basic header extension to content_type lookup.""" if (fqn.endswith('.header') or fqn.endswith('.txt') or - fqn.endswith('.cat')): + fqn.endswith('.cat') or fqn.endswith('.dat')): return 'text/plain' elif fqn.endswith('.gif'): return 'image/gif' From f7a52c536f99fb521a660d1742f545659ee8d64d Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 31 Mar 2022 08:53:05 -0700 Subject: [PATCH 20/38] CADC-10809 - no md5sum schemes. --- caom2utils/caom2utils/data_util.py | 9 +++++---- caom2utils/caom2utils/tests/test_data_util.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/caom2utils/caom2utils/data_util.py b/caom2utils/caom2utils/data_util.py index 76b221aa..3c8bd234 100644 --- a/caom2utils/caom2utils/data_util.py +++ b/caom2utils/caom2utils/data_util.py @@ -210,7 +210,7 @@ def info(self, uri): Retrieve the descriptive metdata associated with a file. :param uri: str that is an Artifact URI, representing the file for which to retrieve metadata - :return: cadcdata.FileInfo instance + :return: cadcdata.FileInfo instance, no scheme for md5sum """ self._logger.debug(f'Begin info for {uri}') try: @@ -218,6 +218,7 @@ def info(self, uri): result = self._cadc_client.cadcinfo(uri) # make the result look like the other possible ways to # obtain metadata + result.md5sum = result.md5sum.replace('md5:', '') else: archive, f_name = StorageClientWrapper._decompose(uri) temp = self._cadc_client.get_file_info(archive, f_name) @@ -225,7 +226,7 @@ def info(self, uri): id=uri, size=temp.get('size'), file_type=temp.get('type'), - md5sum=temp.get('md5sum') + md5sum=temp.get('md5sum').replace('md5:', '') ) except exceptions.NotFoundException: self._logger.info(f'cadcinfo:: {uri} not found') @@ -390,7 +391,7 @@ def get_local_file_info(fqn): """ Gets descriptive metadata for a file on disk. :param fqn: Fully-qualified name of the file on disk. - :return: FileInfo + :return: FileInfo, no scheme on the md5sum value. """ s = stat(fqn) # copy and paste from cadcdata/storageinventory.py @@ -401,7 +402,7 @@ def get_local_file_info(fqn): meta = FileInfo( id=path.basename(fqn), size=s.st_size, - md5sum=f'md5:{hash_md5.hexdigest()}', + md5sum=hash_md5.hexdigest(), file_type=get_file_type(fqn), ) return meta diff --git a/caom2utils/caom2utils/tests/test_data_util.py b/caom2utils/caom2utils/tests/test_data_util.py index 1c936285..886fcbab 100644 --- a/caom2utils/caom2utils/tests/test_data_util.py +++ b/caom2utils/caom2utils/tests/test_data_util.py @@ -369,7 +369,7 @@ def _check_put_result(client_mock): replace=True, file_type='application/fits', file_encoding=None, - md5_checksum='md5:3c66ee2cb6e0c2cfb5cd6824d353dc11', + md5_checksum='3c66ee2cb6e0c2cfb5cd6824d353dc11', ) From 44988e0c0ed9653c7703a1fd3bd1d054a38ae7c2 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Tue, 12 Apr 2022 11:30:23 -0700 Subject: [PATCH 21/38] CADC-10809 - merge CADC-10275. --- caom2utils/caom2utils/data_util.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/caom2utils/caom2utils/data_util.py b/caom2utils/caom2utils/data_util.py index 3c8bd234..aa80da6b 100644 --- a/caom2utils/caom2utils/data_util.py +++ b/caom2utils/caom2utils/data_util.py @@ -259,6 +259,11 @@ def put(self, working_directory, uri, stream='default'): cadc_meta = self.info(uri) if cadc_meta is None: replace = False + self._logger.debug( + f'uri {uri} src {fqn} replace {replace} file_type ' + f'{local_meta.file_type} encoding {encoding} md5_checksum ' + f'{local_meta.md5sum}' + ) self._cadc_client.cadcput( uri, src=fqn, @@ -271,6 +276,11 @@ def put(self, working_directory, uri, stream='default'): archive, f_name = self._decompose(uri) # libmagic does a worse job with guessing file types # than ad for .fits.gz => it will say 'binary' + self._logger.debug( + f'archive {archive} f_name {f_name} archive_stream ' + f'{stream} mime_type {local_meta.file_type} ' + f'mime_encoding {encoding} md5_check True ' + ) self._cadc_client.put_file( archive, f_name, From 9ccac8cb99e9446d207d2579dd72d2f8bd9c64ce Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 13 Apr 2022 09:07:29 -0700 Subject: [PATCH 22/38] CADC-10809 - address code review comments. --- caom2utils/caom2utils/caom2blueprint.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index d64fa667..3763fd6e 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -1460,8 +1460,9 @@ def is_table(value): @staticmethod def is_function(value): """ - Check if a blueprint value has Python 'function' syntax. Exclude - strings with syntax that enable addressing HDF5 arrays. + Check if a blueprint value has Python 'function' syntax. The + "'/' not in value" clause excludes strings with syntax that enables + addressing HDF5 arrays. :return: True if the value is the name of a function to be executed, False, otherwise @@ -2138,6 +2139,11 @@ def _execute_external_instance(self, value, key, extension): :param value the name of the function to apply. :param key: :param extension: the current extension name or number. + :raise Caom2Exception exception raised when there is a recognizable + error in the information being used to create a CAOM2 record. A + correct and consistent CAOM2 record cannot be created from the + input metadata. The client should treat the Observation instance + under construction as invalid. """ result = '' try: From 1c0fcd580997acd666844f62fdaa801e25789b00 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 13 Apr 2022 09:17:23 -0700 Subject: [PATCH 23/38] CADC-10809 - address code review comments. --- caom2utils/caom2utils/caom2blueprint.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 3763fd6e..f606f9a1 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -3684,6 +3684,8 @@ def __init__( # and the values are HDF5 lookup path names. self._extension_names = [] super().__init__(obs_blueprint, uri) + # used to set the astropy wcs info, resulting in a validated wcs + # that can be used to construct a valid CAOM2 record self._wcs_parser = None def apply_blueprint_from_file(self): From c7481ee99f6476b160d357a3b3f3fa036b7e4d27 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 21 Apr 2022 08:23:49 -0700 Subject: [PATCH 24/38] CADC-10809 - fix a logging message. --- caom2utils/caom2utils/caom2blueprint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index f606f9a1..09ccfd46 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -2071,7 +2071,7 @@ def _to_enum_type(self, value, to_enum_type): if value is None: raise ValueError( f'Must set a value of {to_enum_type.__name__} for ' - f'{self._uri}.') + f'{self.uri}.') elif isinstance(value, to_enum_type): return value else: From a7e7f6f4524a513d61aeceb07166746a405c3f19 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Tue, 26 Apr 2022 17:29:04 -0700 Subject: [PATCH 25/38] CADC-10275 - fix a typo. --- caom2utils/caom2utils/caom2blueprint.py | 2 +- caom2utils/caom2utils/data_util.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index f606f9a1..09ccfd46 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -2071,7 +2071,7 @@ def _to_enum_type(self, value, to_enum_type): if value is None: raise ValueError( f'Must set a value of {to_enum_type.__name__} for ' - f'{self._uri}.') + f'{self.uri}.') elif isinstance(value, to_enum_type): return value else: diff --git a/caom2utils/caom2utils/data_util.py b/caom2utils/caom2utils/data_util.py index aa80da6b..b73a862a 100644 --- a/caom2utils/caom2utils/data_util.py +++ b/caom2utils/caom2utils/data_util.py @@ -226,7 +226,8 @@ def info(self, uri): id=uri, size=temp.get('size'), file_type=temp.get('type'), - md5sum=temp.get('md5sum').replace('md5:', '') + md5sum=temp.get('md5sum').replace('md5:', ''), + encoding=temp.get('encoding'), ) except exceptions.NotFoundException: self._logger.info(f'cadcinfo:: {uri} not found') From 131caa199ac6622156a20619edb0398454786cc6 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 5 May 2022 09:30:17 -0700 Subject: [PATCH 26/38] CADC-10275 - increase the sensitivity of the 'isclose' implementation in get_differences. --- caom2/caom2/diff.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/caom2/caom2/diff.py b/caom2/caom2/diff.py index 62581210..51876d36 100644 --- a/caom2/caom2/diff.py +++ b/caom2/caom2/diff.py @@ -82,6 +82,7 @@ from caom2 import Chunk from . import caom_util +import logging __all__ = ['get_differences'] @@ -413,7 +414,7 @@ def _not_equal(rhs, lhs): else: # if only using python 3.5+, use math.isclose, instead of this # description of math.isclose from the python documentation - result = abs(rhs-lhs) <= max(1e-10 * max(abs(rhs), abs(lhs)), 1e-9) + result = abs(rhs-lhs) <= max(1e-12 * max(abs(rhs), abs(lhs)), 1e-11) else: result = rhs == lhs return not result From 6b496661d963932b0e77711450b3cf030879edf4 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 5 May 2022 14:53:20 -0700 Subject: [PATCH 27/38] CADC-10809 - for when values come back from the HDF5 file as bytes. --- caom2utils/caom2utils/caom2blueprint.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 09ccfd46..75164048 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -2009,7 +2009,10 @@ def _get_from_list(self, lookup, index, current=None): return value if (keywords and not ObsBlueprint.needs_lookup(keywords) and not ObsBlueprint.is_function(keywords)): - value = keywords + if isinstance(keywords, numpy.bytes_): + value = keywords.decode('utf-8') + else: + value = keywords elif self._blueprint.update: # The first clause: boolean attributes are used to represent # three different values: True, False, and unknown. For boolean From 8724e35413a52adb2fe3629f64e8cc6c04f1d98c Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Mon, 9 May 2022 17:50:35 -0700 Subject: [PATCH 28/38] CADC-10809 - integration with taosii2caom2 pipeline resulted in a change to the Hdf5Parser API. --- caom2utils/caom2utils/caom2blueprint.py | 33 ++++++++++++------- .../dao_c122_2016_007777.xml | 2 +- .../dao_c122_2016_007830.xml | 2 +- .../dao_c122_2016_007939.xml | 2 +- .../dao_c122_2016_007942.xml | 2 +- .../20220201T200117/20220201T200117.xml | 14 ++++---- .../caom2utils/tests/test_fits2caom2.py | 5 ++- 7 files changed, 36 insertions(+), 24 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 75164048..81a1ed73 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -1835,6 +1835,19 @@ def configure_time_axis(self, axis, override=True): self._time_axis_configed = True + def set(self, caom2_element, value, extension=0): + """ + Sets the value associated with an element in the CAOM2 model. Value + cannot be a tuple. + :param caom2_element: name CAOM2 element (as in + ObsBlueprint.CAOM2_ELEMEMTS) + :param value: new value of the CAOM2 element + :param extension: extension number (used only for Chunk elements) + """ + if isinstance(value, numpy.bytes_): + value = value.decode('utf-8') + super().set(caom2_element, value, extension) + def _guess_axis_info(self): self._guess_axis_info_from_plan() @@ -2009,9 +2022,6 @@ def _get_from_list(self, lookup, index, current=None): return value if (keywords and not ObsBlueprint.needs_lookup(keywords) and not ObsBlueprint.is_function(keywords)): - if isinstance(keywords, numpy.bytes_): - value = keywords.decode('utf-8') - else: value = keywords elif self._blueprint.update: # The first clause: boolean attributes are used to represent @@ -3669,18 +3679,15 @@ class Hdf5Parser(ContentParser): """ def __init__( - self, obs_blueprint, uri, local_f_name, find_roots_here='sitedata' + self, obs_blueprint, uri, h5_file, find_roots_here='sitedata' ): """ :param obs_blueprint: Hdf5ObsBlueprint instance :param uri: which artifact augmentation is based on - :param local_f_name: str file name on disk + :param h5_file: h5py file handle :param find_roots_here: str location where Chunk metadata starts """ - # h5py is an extra in this package since most collections do not - # require it - import h5py - self._file = h5py.File(local_f_name) + self._file = h5_file # where N Chunk metadata starts self._find_roots_here = find_roots_here # the length of the array is the number of Parts in an HDF5 file, @@ -4240,8 +4247,6 @@ def _sanitize(self, value): return None elif not str(value): return None # empty string - elif isinstance(value, numpy.bytes_): - return value.decode('utf-8') else: return value @@ -4907,7 +4912,11 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, elif '.h5' in local: logging.debug( f'Using an Hdf5Parser for local file {local}') - parser = Hdf5Parser(blueprint, uri, local) + # h5py is an extra in this package since most collections do + # not require it + import h5py + temp = h5py.File(local) + parser = Hdf5Parser(blueprint, uri, temp) else: # explicitly ignore headers for txt and image files logging.debug(f'Using a BlueprintParser for {local}') diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml index f2736fd6..a1759979 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007777/dao_c122_2016_007777.xml @@ -20,7 +20,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml index 945b61d6..a48babf3 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007830/dao_c122_2016_007830.xml @@ -20,7 +20,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml index 0b0a1152..7544e18a 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007939/dao_c122_2016_007939.xml @@ -19,7 +19,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml index 8de706e7..dafb284c 100644 --- a/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml +++ b/caom2utils/caom2utils/tests/data/dao/dao_c122_2016_007942/dao_c122_2016_007942.xml @@ -19,7 +19,7 @@ DAO 1.2-m - -2331226.78834 + -2331226.7883358444 -3532798.9829 4755607.32183 diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml index bb179533..c1cb7bed 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml +++ b/caom2utils/caom2utils/tests/data/taos_h5file/20220201T200117/20220201T200117.xml @@ -66,9 +66,9 @@ 0.36083190511928515 - -3.056099929280465e-06 - 5.26221921634137e-09 - -8.906590162255861e-09 + -3.055789796926023e-06 + 5.17684229864217e-09 + -8.84247710317511e-09 -3.0546760780702482e-06 @@ -128,10 +128,10 @@ 0.3608318611649132 - -3.056099929280465e-06 - 5.26221921634137e-09 - -8.906590162255861e-09 - -3.0546760780702482e-06 + -3.056038798826708e-06 + 5.311589798573419e-09 + -8.530093204777965e-09 + -3.054771726598754e-06 2000.0 diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index 3230657c..de4e6329 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -188,7 +188,10 @@ def test_hdf5_wcs_parser_set_wcs(): test_observable_bp, test_custom_bp, ]: - test_subject = Hdf5Parser(bp, test_uri, test_fqn) + # limit the cases where h5py needs to be installed + import h5py + temp = h5py.File(test_fqn) + test_subject = Hdf5Parser(bp, test_uri, temp) assert test_subject is not None, 'expect a result' test_subject.augment_artifact(test_artifact) if bp == test_position_bp: From 39f979cd4b80755ca879cac6ad5ab0be4c6e2aee Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Fri, 12 Aug 2022 13:45:39 -0700 Subject: [PATCH 29/38] CADC-11683 - a different HDF5 structure. --- caom2utils/caom2utils/caom2blueprint.py | 138 ++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 67520921..02111adc 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -3888,6 +3888,144 @@ def ignore_chunks(self, artifact, index=0): return False +class Hdf5Parser2(Hdf5Parser): + + def __init__( + self, obs_blueprint, uri, h5_file + ): + """ + :param obs_blueprint: Hdf5ObsBlueprint instance + :param uri: which artifact augmentation is based on + :param h5_file: h5py file handle + :param find_roots_here: str location where Chunk metadata starts + """ + self._file = h5_file + # # the length of the array is the number of Parts in an HDF5 file, + # # and the values are HDF5 lookup path names. + # self._extension_names = [] + # assume no extensions for now + super().__init__(obs_blueprint, uri, h5_file, find_roots_here=None) + # used to set the astropy wcs info, resulting in a validated wcs + # that can be used to construct a valid CAOM2 record + self._wcs_parser = None + + def apply_blueprint_from_file(self): + """ + Retrieve metadata from file, cache in the blueprint. + """ + self.logger.debug('Begin apply_blueprint_from_file') + # h5py is an extra in this package since most collections do not + # require it + import h5py + individual, multi, attributes = self._extract_path_names_from_blueprint() + + def _extract_from_item(name, object): + """ + Function signature dictated by h5py visititems implementation. + Executed for each dataset/group in an HDF5 file. + + :param name: fully-qualified HDF5 path name + :param object: what the HDF5 path name points to + """ + if name == self._find_roots_here: + for ii, path_name in enumerate(object.keys()): + # store the names and locations of the Part/Chunk metadata + temp = f'{name}/{path_name}' + self.logger.debug(f'Adding extension {temp}') + self._extension_names.append(temp) + self._blueprint._extensions[ii] = {} + + # If it's the Part/Chunk metadata, capture it to extensions. + # Syntax of the keys described in Hdf5ObsBlueprint class. + for part_index, part_name in enumerate(self._extension_names): + if ( + name.startswith(part_name) + and isinstance(object, h5py.Dataset) + and object.dtype.names is not None + ): + for d_name in object.dtype.names: + temp_path = f'{name.replace(part_name, "")}/{d_name}' + for path_name in multi.keys(): + if path_name == temp_path: + for jj in multi.get(path_name): + self._blueprint.set( + jj, object[d_name], part_index + ) + elif (path_name.startswith(temp_path) + and '(' in path_name): + z = path_name.split('(') + if ':' in z[1]: + a = z[1].split(')')[0].split(':') + if len(a) > 2: + raise NotImplementedError + for jj in multi.get(path_name): + self._blueprint.set( + jj, + object[d_name][int(a[0])][ + int(a[1])], + part_index, + ) + else: + index = int(z[1].split(')')[0]) + for jj in multi.get(path_name): + self._blueprint.set( + jj, + object[d_name][index], + part_index, + ) + + # if it's Observation/Plane/Artifact metadata, capture it to + # the base blueprint + if isinstance(object, h5py.Dataset): + if object.dtype.names is not None: + for d_name in object.dtype.names: + temp = f'//{name}/{d_name}' + if temp in individual.keys(): + for jj in individual.get(temp): + self._blueprint.set(jj, object[d_name], 0) + + if len(attributes) == 0: + self._file.visititems(_extract_from_item) + else: + self._extract_from_attrs(attributes) + self.logger.debug('Done apply_blueprint_from_file') + + def _extract_from_attrs(self, attributes): + # logging.error(self._blueprint) + # I don't currently see any way to have more than one Part + part_index = 0 + # v == list of blueprint keys + for k, v in attributes.items(): + # logging.error(f'k {k} v {v}') + if k in self._file.attrs: + value = self._file.attrs[k] + for entry in v: + self._blueprint.set(entry, value, part_index) + + def _extract_path_names_from_blueprint(self): + """ + :return: individual - a dictionary of lists, keys are unique path + names for finding metadata once per file. Values are + _CAOM2_ELEMENT strings. + multiple - a dictionary of lists, keys are unique path names for + finding metadata N times per file. Values are _CAOM2_ELEMENT + strings. + """ + individual = defaultdict(list) + multi = defaultdict(list) + attributes = defaultdict(list) + for key, value in self._blueprint._plan.items(): + if ObsBlueprint.needs_lookup(value): + for ii in value[0]: + if ii.startswith('//'): + individual[ii].append(key) + elif ii.startswith('/'): + multi[ii].append(key) + else: + attributes[ii].append(key) + return individual, multi, attributes + + class WcsParser: """ WCS axes methods. From b3f9d970ddd8888039fdff414ce71b41bf4ccfcc Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 25 Aug 2022 15:52:16 -0700 Subject: [PATCH 30/38] CADC-11683 - integrate the '.attrs' handling to the Hdf5Parser class. --- caom2utils/caom2utils/caom2blueprint.py | 163 +++--------------------- 1 file changed, 21 insertions(+), 142 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 02111adc..812c1a89 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -3708,7 +3708,7 @@ def apply_blueprint_from_file(self): # h5py is an extra in this package since most collections do not # require it import h5py - individual, multi = self._extract_path_names_from_blueprint() + individual, multi, attributes = self._extract_path_names_from_blueprint() def _extract_from_item(name, object): """ @@ -3775,9 +3775,23 @@ def _extract_from_item(name, object): for jj in individual.get(temp): self._blueprint.set(jj, object[d_name], 0) - self._file.visititems(_extract_from_item) + if len(attributes) == 0: + self._file.visititems(_extract_from_item) + else: + self._extract_from_attrs(attributes) self.logger.debug('Done apply_blueprint_from_file') + def _extract_from_attrs(self, attributes): + # I don't currently see any way to have more than one Part, if relying on + # attrs for metadata + part_index = 0 + # v == list of blueprint keys + for k, v in attributes.items(): + if k in self._file.attrs: + value = self._file.attrs[k] + for entry in v: + self._blueprint.set(entry, value, part_index) + def _extract_path_names_from_blueprint(self): """ :return: individual - a dictionary of lists, keys are unique path @@ -3789,14 +3803,17 @@ def _extract_path_names_from_blueprint(self): """ individual = defaultdict(list) multi = defaultdict(list) + attributes = defaultdict(list) for key, value in self._blueprint._plan.items(): if ObsBlueprint.needs_lookup(value): for ii in value[0]: if ii.startswith('//'): individual[ii].append(key) - else: + elif ii.startswith('/'): multi[ii].append(key) - return individual, multi + else: + attributes[ii].append(key) + return individual, multi, attributes def apply_blueprint(self): self.logger.debug('Begin apply_blueprint') @@ -3888,144 +3905,6 @@ def ignore_chunks(self, artifact, index=0): return False -class Hdf5Parser2(Hdf5Parser): - - def __init__( - self, obs_blueprint, uri, h5_file - ): - """ - :param obs_blueprint: Hdf5ObsBlueprint instance - :param uri: which artifact augmentation is based on - :param h5_file: h5py file handle - :param find_roots_here: str location where Chunk metadata starts - """ - self._file = h5_file - # # the length of the array is the number of Parts in an HDF5 file, - # # and the values are HDF5 lookup path names. - # self._extension_names = [] - # assume no extensions for now - super().__init__(obs_blueprint, uri, h5_file, find_roots_here=None) - # used to set the astropy wcs info, resulting in a validated wcs - # that can be used to construct a valid CAOM2 record - self._wcs_parser = None - - def apply_blueprint_from_file(self): - """ - Retrieve metadata from file, cache in the blueprint. - """ - self.logger.debug('Begin apply_blueprint_from_file') - # h5py is an extra in this package since most collections do not - # require it - import h5py - individual, multi, attributes = self._extract_path_names_from_blueprint() - - def _extract_from_item(name, object): - """ - Function signature dictated by h5py visititems implementation. - Executed for each dataset/group in an HDF5 file. - - :param name: fully-qualified HDF5 path name - :param object: what the HDF5 path name points to - """ - if name == self._find_roots_here: - for ii, path_name in enumerate(object.keys()): - # store the names and locations of the Part/Chunk metadata - temp = f'{name}/{path_name}' - self.logger.debug(f'Adding extension {temp}') - self._extension_names.append(temp) - self._blueprint._extensions[ii] = {} - - # If it's the Part/Chunk metadata, capture it to extensions. - # Syntax of the keys described in Hdf5ObsBlueprint class. - for part_index, part_name in enumerate(self._extension_names): - if ( - name.startswith(part_name) - and isinstance(object, h5py.Dataset) - and object.dtype.names is not None - ): - for d_name in object.dtype.names: - temp_path = f'{name.replace(part_name, "")}/{d_name}' - for path_name in multi.keys(): - if path_name == temp_path: - for jj in multi.get(path_name): - self._blueprint.set( - jj, object[d_name], part_index - ) - elif (path_name.startswith(temp_path) - and '(' in path_name): - z = path_name.split('(') - if ':' in z[1]: - a = z[1].split(')')[0].split(':') - if len(a) > 2: - raise NotImplementedError - for jj in multi.get(path_name): - self._blueprint.set( - jj, - object[d_name][int(a[0])][ - int(a[1])], - part_index, - ) - else: - index = int(z[1].split(')')[0]) - for jj in multi.get(path_name): - self._blueprint.set( - jj, - object[d_name][index], - part_index, - ) - - # if it's Observation/Plane/Artifact metadata, capture it to - # the base blueprint - if isinstance(object, h5py.Dataset): - if object.dtype.names is not None: - for d_name in object.dtype.names: - temp = f'//{name}/{d_name}' - if temp in individual.keys(): - for jj in individual.get(temp): - self._blueprint.set(jj, object[d_name], 0) - - if len(attributes) == 0: - self._file.visititems(_extract_from_item) - else: - self._extract_from_attrs(attributes) - self.logger.debug('Done apply_blueprint_from_file') - - def _extract_from_attrs(self, attributes): - # logging.error(self._blueprint) - # I don't currently see any way to have more than one Part - part_index = 0 - # v == list of blueprint keys - for k, v in attributes.items(): - # logging.error(f'k {k} v {v}') - if k in self._file.attrs: - value = self._file.attrs[k] - for entry in v: - self._blueprint.set(entry, value, part_index) - - def _extract_path_names_from_blueprint(self): - """ - :return: individual - a dictionary of lists, keys are unique path - names for finding metadata once per file. Values are - _CAOM2_ELEMENT strings. - multiple - a dictionary of lists, keys are unique path names for - finding metadata N times per file. Values are _CAOM2_ELEMENT - strings. - """ - individual = defaultdict(list) - multi = defaultdict(list) - attributes = defaultdict(list) - for key, value in self._blueprint._plan.items(): - if ObsBlueprint.needs_lookup(value): - for ii in value[0]: - if ii.startswith('//'): - individual[ii].append(key) - elif ii.startswith('/'): - multi[ii].append(key) - else: - attributes[ii].append(key) - return individual, multi, attributes - - class WcsParser: """ WCS axes methods. From 51eb23c7746ceab558f80b2b97ae8de96cf6c92b Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 20 Oct 2022 08:32:18 -0700 Subject: [PATCH 31/38] CADC-11894 - fix trefpos null value handling. --- caom2utils/caom2utils/caom2blueprint.py | 3 ++- caom2utils/setup.cfg | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 812c1a89..479e58d7 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -4587,7 +4587,8 @@ def _finish_chunk_time(self, chunk): if not math.isnan(self._wcs.wcs.xposure): chunk.time.exposure = self._wcs.wcs.xposure chunk.time.timesys = self._wcs.wcs.timesys - chunk.time.trefpos = self._wcs.wcs.trefpos + if self._wcs.wcs.trefpos is not None and self._wcs.wcs.trefpos != '': + chunk.time.trefpos = self._wcs.wcs.trefpos # convert from the numpy array length 2 of self._wcs.wcs.mjdref # to a single value # TODO chunk.time.mjdref = self._wcs.to_header().get('MJDREF') diff --git a/caom2utils/setup.cfg b/caom2utils/setup.cfg index 4b70fefe..b5b84abd 100644 --- a/caom2utils/setup.cfg +++ b/caom2utils/setup.cfg @@ -33,7 +33,7 @@ url = https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2 edit_on_github = False github_project = opencadc/caom2tools # version should be PEP386 compatible (http://www.python.org/dev/peps/pep-0386) -version = 1.6.5 +version = 1.6.6 [options] install_requires = From 3a443bea5497545aa03dd6122b2b1ea2e433c08e Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Thu, 27 Oct 2022 16:41:01 -0700 Subject: [PATCH 32/38] CADC-11894 - code review comments. --- caom2/caom2/diff.py | 1 - .../tests/data/diff-expected-CAOM-2.3.xml | 4 ++-- caom2utils/caom2utils/caom2blueprint.py | 23 +++++++++---------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/caom2/caom2/diff.py b/caom2/caom2/diff.py index 51876d36..72cd01af 100644 --- a/caom2/caom2/diff.py +++ b/caom2/caom2/diff.py @@ -82,7 +82,6 @@ from caom2 import Chunk from . import caom_util -import logging __all__ = ['get_differences'] diff --git a/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml b/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml index 046d468a..19d65158 100644 --- a/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml +++ b/caom2/caom2/tests/data/diff-expected-CAOM-2.3.xml @@ -119,11 +119,11 @@ 58979.5015625 - 58979.5017365 + 58979.50173611111 58979.5015625 - 58979.5017365 + 58979.50173611111 diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 479e58d7..2a151f0a 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -71,7 +71,6 @@ from logging.handlers import TimedRotatingFileHandler import math -import numpy from astropy.wcs import Wcsprm, WCS from astropy.io import fits from astropy.time import Time @@ -1844,7 +1843,7 @@ def set(self, caom2_element, value, extension=0): :param value: new value of the CAOM2 element :param extension: extension number (used only for Chunk elements) """ - if isinstance(value, numpy.bytes_): + if hasattr(value, 'decode'): value = value.decode('utf-8') super().set(caom2_element, value, extension) @@ -2022,7 +2021,7 @@ def _get_from_list(self, lookup, index, current=None): return value if (keywords and not ObsBlueprint.needs_lookup(keywords) and not ObsBlueprint.is_function(keywords)): - value = keywords + value = keywords elif self._blueprint.update: # The first clause: boolean attributes are used to represent # three different values: True, False, and unknown. For boolean @@ -3775,10 +3774,10 @@ def _extract_from_item(name, object): for jj in individual.get(temp): self._blueprint.set(jj, object[d_name], 0) - if len(attributes) == 0: - self._file.visititems(_extract_from_item) - else: + if len(individual) == 0 and len(multi) == 0: self._extract_from_attrs(attributes) + else: + self._file.visititems(_extract_from_item) self.logger.debug('Done apply_blueprint_from_file') def _extract_from_attrs(self, attributes): @@ -3794,12 +3793,12 @@ def _extract_from_attrs(self, attributes): def _extract_path_names_from_blueprint(self): """ - :return: individual - a dictionary of lists, keys are unique path - names for finding metadata once per file. Values are - _CAOM2_ELEMENT strings. - multiple - a dictionary of lists, keys are unique path names for - finding metadata N times per file. Values are _CAOM2_ELEMENT - strings. + :return: individual - a dictionary of lists, keys are unique path names for finding metadata once per file. + Values are _CAOM2_ELEMENT strings. + multiple - a dictionary of lists, keys are unique path names for finding metadata N times per file. Values + are _CAOM2_ELEMENT strings. + attributes - a dictionary of lists, keys reference expected content from the h5py.File().attrs data + structure and its keys. """ individual = defaultdict(list) multi = defaultdict(list) From b4dd5b383326b0d50c8cfda63481499f2ea7ee23 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Fri, 28 Oct 2022 11:49:58 -0700 Subject: [PATCH 33/38] CADC-11894 - replace fits2caom2 with caom2blueprint in setup.cfg entry_points. --- caom2utils/setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caom2utils/setup.cfg b/caom2utils/setup.cfg index b5b84abd..462b5417 100644 --- a/caom2utils/setup.cfg +++ b/caom2utils/setup.cfg @@ -55,4 +55,4 @@ test = [entry_points] fits2caom2 = caom2utils.legacy:main_app -caom2gen = caom2utils.fits2caom2:caom2gen +caom2gen = caom2utils.caom2blueprint:caom2gen From 10bad15de9fdc26615b2e054dd34221127a11846 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Fri, 28 Oct 2022 15:11:43 -0700 Subject: [PATCH 34/38] CADC-11894 - increase test coverage. --- .../data/taos_h5file/def/attrs.blueprint | 51 ++++++++ .../tests/data/taos_h5file/def/def.h5 | Bin 0 -> 32784 bytes .../tests/data/taos_h5file/def/def.module | 53 ++++++++ .../tests/data/taos_h5file/def/def.py | 53 ++++++++ .../tests/data/taos_h5file/def/def.xml | 114 ++++++++++++++++++ .../caom2utils/tests/test_collections.py | 8 +- 6 files changed, 276 insertions(+), 3 deletions(-) create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/def/def.h5 create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/def/def.module create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/def/def.py create mode 100644 caom2utils/caom2utils/tests/data/taos_h5file/def/def.xml diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint b/caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint new file mode 100644 index 00000000..6089b467 --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/attrs.blueprint @@ -0,0 +1,51 @@ +Observation.algorithm.name = ['PROGRAM'] +Observation.metaRelease = 2018-05-21T02:07:22.0 +Observation.type = OBJECT +Observation.target.name = ['object_name'] +Plane.calibrationLevel = 2 +Plane.dataProductType = timeseries + +Chunk.position.axis.function.dimension.naxis1 = ['NAXIS1'] +Chunk.position.axis.function.dimension.naxis2 = ['NAXIS2'] +Chunk.position.axis.function.refCoord.coord1.pix = ['CRPIX1'] +Chunk.position.axis.function.refCoord.coord1.val = ['CRVAL1'] +Chunk.position.axis.function.refCoord.coord2.pix = ['CRPIX2'] +Chunk.position.axis.function.refCoord.coord2.val = ['CRVAL2'] +Chunk.position.axis.axis1.ctype = ['CTYPE1'] +Chunk.position.axis.axis1.cunit = ['CUNIT1'] +Chunk.position.axis.axis2.ctype = ['CTYPE2'] +Chunk.position.axis.axis2.cunit = ['CUNIT2'] +Chunk.position.axis.function.cd11 = ['PC1_1'] +Chunk.position.axis.function.cd12 = ['PC1_2'] +Chunk.position.axis.function.cd21 = ['PC2_1'] +Chunk.position.axis.function.cd22 = ['PC2_2'] +Chunk.position.equinox = ['EQUINOX'] +Chunk.position.axis.error1.syser = None +Chunk.position.axis.error1.rnder= None +Chunk.position.axis.error2.syser = None +Chunk.position.axis.error2.rnder = None +Chunk.position.coordsys = None + +Chunk.energyAxis = 4 +Chunk.energy.bandpassName = ['filter_name'] +Chunk.energy.resolvingPower = _get_energy_resolving_power() +Chunk.energy.specsys = TOPOCENT +Chunk.energy.axis.function.naxis = 1 +Chunk.energy.axis.axis.ctype = WAVE +Chunk.energy.axis.axis.cunit = nm +Chunk.energy.axis.function.delta = ['exposure_time'] +Chunk.energy.axis.function.refCoord.pix = 0.5 +Chunk.energy.axis.function.refCoord.val = _get_fwhm() + +Chunk.timeAxis = 3 +Chunk.time.axis.axis.ctype = TIME +Chunk.time.axis.axis.cunit = s +Chunk.time.axis.function.naxis = 1 +Chunk.time.axis.function.delta = ['exposure_time'] +Chunk.time.axis.function.refCoord.pix = 0.5 +Chunk.time.axis.function.refCoord.val = _get_datetime() +Chunk.time.axis.error.rnder = None +Chunk.time.axis.error.syser = None +Chunk.time.exposure = _get_exposure() +Chunk.time.timesys = MJD +Chunk.time.mjdref = None diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.h5 b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.h5 new file mode 100644 index 0000000000000000000000000000000000000000..937a2aea1260acff615176376c5a329324da85be GIT binary patch literal 32784 zcmeHwcU%+Qvv;syELgCM1q*fvAgJi-4n$OxfKu%gDS|X12sZ4BH4(em5xZhn?245= zXYF7YyJD9FJHBT(yH9*jZhU^f-@Whsys!I5zS-S#W@qNi%qg>FM$c~DO)Au@AmKKt zR8mG#mH#XLd>6K=6Ld$!eet}%@W)8NWC1sp()}@#43}`{>IlcH3iQoO>keEjqeCwb zXJ_u8NMA(V*+0MkziXhUvtu7l2mJ&^NCiA6*qN})+Zy+uFDxGXH&C=!(SWNC(-EsA zvF866NyRd-v$2={YppWE&a>-w*N+b55o5EUjK|5e!4za|97w7@K^PH99@6o`qLEB_tpHu ze{p?M|2MARw@TE;Ax(Od`tW!Cd-d<*Os-$o$cT?jT=diFT-S=C{wr~{bcgu!zobL> z_$PEY)BTB#x0?%R4~FRQ<{|LI{r@GM-{9v*I$S8$(?ry(TpOZl{A%w^S{9hx9POl0_ z&)(btLwe@02VbteQzrw$`2O?K|1qT3Oq6w6k{56{sSV z6Xo)BYsViE%z=#FbXy60+E{b!_OY{(DQv85{JMt)M+DjZ_}`BZEWYojtK#S0!^YYw zJUY;-JMl)HcoAW7O8$b?gzJKUR>g@F-~TU&4hssgHx=mokIG|f^_M)=|6}slT5J6;T%ar3`H$D<17WPLV`=zn#@0o}Yi*ktugc^~ALe{q=O~seC=MepOOE z?l`yoRL|Ydvv>cJ>*;E7^?LGujmSy>x6VaoBYxsq7k9$Yx(q*#6nNm)B7Q~B%SGYL zU*tFF@#A@c9tZk$9%CJU9^Lq%>hG-+n?v#XedIQN+{%Zc`o`A5Ce2FKKK85nq&W=M zU$x=1R8@Mz(Vy$v{?_&5rKL^M%Kf_j?_7WLBKK0&ifWN{av
sh+>zCIrAJvE+C?&lzgp z@6S^0Op4YM`w;z9N_dk>xcYu>P9^$VFJXa5uMeHycsY9XaQ5=+$LX5e3|*fu^i@T! z5By)z{{%Y3b9LKq(c$w6f0x5o?&;Xi*|S7B^l?hfPqlUSQAZLG7E9Lc;!5V99uaK1$1#-5@kC1cy<9@5?bzB@n*xB0=gv3(zYC=I$1y$$9@ib z&W$`;K$nEC$I|vg4i?ZQAz^Bb^S%PQB&=J;r?Ta)0=gvJ-sAr08ruu#lCT?ToiCdT z=#r3cuJi)BB#h?Nwx)nKggl!o3h0t>*-~yRpbeqyPBY_)3kzsNc-nkN?67$S^zR~P zeeQD#XhZmR!IKv zmRPr=e*yiwSadb-!A{VbpMq5XYKI6#eRzO@!#!7|0%yOPUqi^OJeJYLdxt@!bh#(gD?7scZxjW@;PC5>0b<0XxE#pA`t$;J0e8fTX@J{OM{AJ-S( zFFqd-_lwUT#Ql=y9mVG<;(qaYP4Rtk9#kA(a>K+I1$1%bM)xb;6wt--1V1e(phe`T zH6IG-l5i0>4f|3+i>RALYJ~Lk&qxM$^Xrt+&>~*rx_POFE{@#n#if#lE{>buT~-8E z)zBjH!`d1ex;T#KFQB1|BX_l}^)sGoTEtdl7N%{fp^GCC zhNw_Oi>Mp7Xz1d|jiPCL4K3moB1Jz34P6}h;cFKSE#gLQTHvUmMdYVN&KkNnj^h=j zp+)4UsR|7(q6cRNeKmA((fW>94J{&32AR-mXc5Kviiq57{Zo>L7O@YZW;$6zi%5)x zxTJ;_(Sq-IHMEFa>kU$AXc0ei(~S8VT10-Dwpc@pc!8`Yq^D|V5&7x%Dh(~-I&OI& zO+$;wyVC{@E#fHNz%;aoyqWFL&?35V&A&%Oi^w<60S(=h@1TE1Ufkmv`Xk5IDLwLJ z&S_{7G2%82&5iONU)1ATGTHxjRYQx&X{*)^4J~2}YGJ9rH(B0uear=bhEWecqy`Q?^R8k*r&9lq(2ck@zOotqO}&AFYi9=X|EmkL@s zo_8=k^5$f!rA6$;3!`%= zJATvSO0r7RyMvY%k)K|5*3xmjL+g=WMv!aip1dOU$S?Es*3u350fip9Rih7{TKYW~ z*81s@4>yCf!u&^%x^b{pnAd5A`J`5ur)q_HuO4;dJ*_Yw*9!A^J?h3|T46mvE37~0 zQ8)h43hNzu)Qw-X!n%qcb>k7Oux_Ij)_3%%8%Jn`bt66M#s^wqJxVLATj^2PziWkc zFs-m&rbk^rtrgbaw8HwFR#?B&qpsi83hRMdVZBf*tRw1C*I#Od{!uIRhkDfcb)n$T zder%IA+7W0LcyO4X`TNT3jSLt_-i4p^VdSbUke5QEEN2*kk;F>5Kr!Sk2Oq(zwcDYt8(|u>*-X`@s}7D=QlJumXP8%Lvmi7Vz;WNlKgboQ42}^PGDJrb zvugSE6B0saiIAV<=+uz}lBWUuIHnBIDIrfO@;6gj-}jn|fk~IsmG{*Qj9M;Bc{PuL z5h0lwpHdhYW-Z5M7ck)e8Rb`7$bj!DA8Ery47knBvuM4Tf$sgn#C2Hti?;TdZgcvjXY`A`}I_b%K^Kf8{B+@)bzSJD|^hZf}BThG90%W*|7 zHZXAHgK_11Zngee9LMCl?qDFXhqtu}x213uAXlVgIJ~xP%FuHR?2g!%5q+M4E$so1$!1`!Bs+gP!@#m^ zPw4{0z`O-_ELLL%l81!Kx3UaOX!*|nzy$^p-j7Z?b&-MS!)2CVyu?7rw7HqLFEcQt zPo3OnR~YcFw^pOMO5}ai#?&Z>fo|LP%1m+@=$uF^>RxA{ozwYniyI8IuH>0)bCZE4 zIk(fh++v{a>X0m#+YD3-f1BrhhXJYG=%RqT41E7qx_;C>20omf<1jjpf!FhD`%b&h zz>|S%lqnAwxMR^KW#vN#uD#ilvH1}LD6=cx_n3jK$>;JZ(m z>@5RPp{-4+cMJsE?2+|+&%od>T@+pg40xSB8}6@Rz*W^ZIYP^Ty#KBA#6ku-H4V<1 z@__-{SFiKb9~o%1Ke1@}Ck7f%ELDHgX9nu@p6#&r3jY z6y?@H;8ez_{-c@*9N3u-Z)+j2SyIFIYi$HA16L}gbrG1iZJAP|9)jA1J2TA95L7wd z8Mm#EzCF-Bh-_iHs|A9M9d^o&wnVVXsIwxw6@tag zsqmcE2<9$yOMcJ>!8E_?>95-&7}qK=>$4>SWxoBk@J1)(D19|8CaE2Eo9- zGaTC3BJeP)?%Saqf?ki8E9G_w95b4yxZ5M>IBG}6!1f5N<(+V72LvrlPUXjSM9?T# zA)VlWpw^mPi`ktJR3086U(y*tS^F3M>nH?Yi|GEn8kwKx*bBiNP!=hABbZvD$ZUWsg0WYpJA^0@#I3C68|#K(_^?!E zk~@MxTZ@#LeGm-zx;K7m-%JI1R>b}=~>>` zUp#O%e7NtfY=rdx7ymBmpo}Hih*Na0ymyJzo zu0+uO!k1Lrcm!5Ur)G8?iNIoLmE2wl2pU)})%YYLsPUnR>CjOKDxKIWiyn=j^sM%Z zF=G&X@j4zpeJq0a4SOZeABW)ivn%PVk`UbAH8^X_cm&tSKF!-d0l~%YBZ{&nA~;+1 zi`m6V2#(#H;&6L1f_>{M`#zh3U|ZA@rDiIE^o~tZB-0TrFSRwJQZj-C$R5|3fne66 zWBJWyBAD#&BDI-?V07y%7F}i|7@-*?cbS7A?C2AJ@3{#4lVg$sR0#Tcd`^v2BXF-j zC3Dm~1U;UZt#^Fjp8D%&geEkaQL`qA){ixE^` z+cTM6f}mo=<@B3N5tM2_DC@~G1fRb@&U>GV;NAJ?qN3#pp3VPcR&gbQyup(lYOg|& z+rq@R>1qTQ-Yrtf)*v`@$ULR=8yN`P&Tf)D-hrU|ympFrI}vmlcqIJWE(G=#J(A1sLD2Ti#q^qc z5j4#lkkxn}0<+1F@+|iwFjYhqIUGPxq4r0!?wJUT?oV{^IEdiW=8C>U4k37}T&N5` zjNob4MkynYAh;*plriZjf}G2?cccY#QkqQ6d~+7TjF%O1znnubao++>xoiZ96B?S< zV2HfEHpH;KjzBVdL&lLC z2tIn-;PW>TylHeO|Jp4CPo8&^KDdqG?rzrN^&JG)#`TkbzKa0&c;IiGhu~DTh$Ped z2oB%UrZ#+lV0ZfX%+?PPY>qCU+x`)PwGQ(&PLB~RE#1J>{Rx73c)e`kQv}J2trelq z5KIU-7#{Z=K|-5u$>Z}8L~C*StQQDEj`?OSeu-f4jQe?MuMl{94ll}hjli{m*6i>b z1l^t{Ih=copz}_t@6~q*+Kow3=DkPI+PQwp%K`+AtE|uXs6kNoh76Y}L{K#?Gr#Hw z1ky;kw82LN-`iu0R-X`jkod~&KO=aZo#*fPg~+=gJW25t!JQ$R)c)T{zuPh?Gq?x= zdS5Pg#CHT)hv#X=NiZCmW@b9m2*a*E>9R$oFl??TQ>-nGVa=lh;oHk#ShC$I`CwTL z>O_=&#u&qNC!eg#!n-gY3%rRx!cg zRisv$RmRZ!T)mVQRWQg?)@8J-ilNgWD@<3zVB7paes5C@t=>9H`&Gwaevq*Us)3=- z6mNMe${PhC(4L*bUOnXBt!colD)yR{yMN3>dVzzoCf z^7Twl*T-<>N}B9a0}RXxOU0dr7*2-n4}abW!$BLzWUV=donNxkjhbNCblNM+q$!5g zs=Il0n_*boe^`-)1qM~q_hz=uF-&_k#-VEq3`zUT`u1vxA%3Dt>C*~BWbe8ufvqtF z)lAEXYJ*|my|#FCTMV9?_T^8r#Lz1ar1PyXICaUkSS7>YP|i!f#TtXnr91xnZ7{S< z4Nc0jCGrNpOTEwzLv7iZ%v*LCDt{`I`_vwTan@W-L3<3}X4f&5bi|Qu&esvN_jGIN#lx?z~h)=pXBjA81MH5r?_V;C3M2Jh{GLD_b1 z{)wI#A_}`oSr-gLkDs%+*$cyfnI7^dy)k%r-S&U)iovB}NK%mkgX6O|sTJHYblf#6 zvsNDr)?-WOHtCC@MfW)xD-R3}tJXH{?1`b)&DF9VUKmW)wN`j~V<;Q7C*02m!`BX7 zlZX3a&={RdPw0o?1=BZca(@gD7v9QK4Zv{IFSsamAcjk=-k5D1gyDQaqQjoS7>*w) z<$HVxh6B@QE0G_W>h!IZa@`-p2D8-}j{-2PeB27Z4J7hr?9Tr>6vLcRU8LoMF-(!4 zwWt|_VXR4CdE-zFak)4BEr($kz9uNiAq+#{@Yku%;Y42h#LT|KG4%azlsh;AL(lWG zG+~h#V17;0kx>{r3|=Lh6pcaFqLpH942I_Kc7-n+LF7F|C9jXgP;=^;^j&cnD!KQ` zI;zA_rtZzW?05`c9tnpQ|2^jK6COEuF#P9%&e7}ssaHGO3Ww|jJE?%vX zQhh9jb1PS7G#ZED*szwkO%jIvw!88>jK{G3YiDV<2^iL&Ic?ErB8C-ecln@67#0k; z;XiCLhS|-ACMl<2nEd)>>V&Bn#vB-#IeQw0*h!MyCDSp4yUx_COU4jTtA^>$85sKI zt&km=iJ{Nt7K-z;F!WIF48JxT1KqWA^8Gm&+DlKTznY7|>attbClv;Z<=6Ad&co0k zB(SJj3WgfiFU=ax$58QeyhG~+7)qb|?$drDhR<_mD4iBzc<)<1#ceT$=Z#ln3|NBU z{)^@~WGRO0dw1l=E+g`e?<5_cis7uwDT`UlF&wS#CSSY)!@fJY{%I>QY}*i!l(7m! z`iK{)hgM@)-Z?(=>>46(+3z`5)?%1-Az5=T4a20Rrlv2~VHiDhx$I*)h7p#{6=gPH z2>Xx`UUee|{}T?$^*0fDXJw_g+>F89TajhA1w;2nxp}Y^L)Yj2MXuX0*ze9a>%Sd? zc+a7ObyweS#zBS+&hOC75AU6YVPdzA$-bvB zjH-MxeduWnG1q%%MW4YiZ0)tYF=sLOMfep>KZn7${d2Pg=P|fR;v80I6M3`0`fOz| zbXhP>c>rOs8&Wmp6voiD<+6;6EQY4Nq+mIxS91Kqro~7F4Vz}oRo7v?$hMbCDb6jo^d9O{?c;Ccu zdQ}zEz*`s&hb@&w-Nvw|T~o#AI~cZn+ZsOYE{3$T?UPgPVOTcrc>2mbBJaRnS)1=; zm|<}>Z{Gt96W$CiI{6SoV&+q`3y&~FPaffL>oJB<#TTEaPcRIrJyluo6ob$G$|>KU zVNh&Yl2I`qL$~-Qxb_PS6umXS=}Qdl%D0zVzrxVw$}tP-HHIcDT;x69V5k>*#oy~K zhH5s0ll3OfS|pTbp__S6`Q!>Lbk4uzud2v`?Vf>2sg+o0o%A?$ zg9!_bdq!vOuFOJR(@#0as<2S?_9P8cl?CbgN~YJVvG6^5k?fHv3xy8minrBScwKsP z_}3aNJjU&krL|bNv-n7QjoK_+4d{_&UWWzL=2BkUx-4XA2NZRz$HKv5kIbCSSlBfq z%As$47B+c)^cmcMg*6Q(DZ?7Fu;gjQl=wz0sCO>Rm}t(z^f8U_oW?99Id95e+JuFX zRoY3{H)SF6#$k(H%~%Ld>n=ZP!NQ=(i~iZoS@7yGAStH>3%!jVr9NoMf)f*&`MMPg zofdw``P`ZX8^4JfkxX2H0xZL-{k zg`#?g)7@=Z&^~g`8rY77SKBY-h1#+3D6xN0oIMM-ogSJ^XwSlxN|6q;JFvjy6#6Xb z$im6h6O`*5h`iwyQg(D=VW-`KjKiH-*jUs6pQBh@Rg zGz-(3JIg--k@qd@U)G6*_=EkDs>xZ1nDQXCK{pnH+#)htIkPaZPGOFHcNRP!jMq5! zV4>I6@}_P*iM%7{%Lcfx-~bI2A-!0zuCO6Iwl@ncuUaQ3xw6n`<-zos3KnV)>z1|1 zjfKj#Y~EUTBJWq<(?L~Qa!dYlB-lypK za26W6!uhsN^5_H>ju$fiV-ktH$GwxLk76NX=DpPU zqghz*6_&Yb3=1n87UXOh%fiBEV>SE7u`p+svFWKK7N(3<%Px*5@^-JMxIKY|xT@>I zpG{;T{HA5HW)cg5>-MKhrm)aI$}y|bR2KSnWb*1vW1(j$@1kbYiM;5pnN2ba9Tp99 z=rV%^EC2UCE;CtZ-g>Okdlm}~G-Xo)X0uT9s463J4q3-fu8T*_Wuc5mTK-fO3t!AE zr73C_-ap=Fv2q>@`57SJoWjEWQQ7|c=Cg1^?v-?M0SgyR?xwN}SvZ#)nt5{(3&+;H z%XzYxg?+=vXx=YjVY_`<)9*`JNdG=pRxy=@73b?HYA}^it-7H*xJxXJ_ zhlL9VN}G1t%fi`7b7bB3v2fJ2w!&jSk+;^G@F53S*p}BiIXshvb({C3k37i2a%I=7 zNrzaN-}PMH+`}x)l6n*^JHnDY!Y#86M~S@4gB^AsV@V$2jnA>;EXgB`QZgr4l1C_& zl6#URd4$;+53^WspIZyRImMDZ!s`4lr&;LQq_tFfmL++F-4->@u_TYsMQ(ncC3%Fi z{%x~al1J#9)RAFH9^qDMH^h=WLQrNO%tEP+uX6^mEXgAzYKC24Ngkn;Y5YZ&Da;T^2_5n3d7+9!v5F zHF4`aBJZu0`R(tsB#+QiDu2L|Ji;yu_lGRFIZ*O}k64mNIO8Asn1wFbJt^)9OY#Ue zQpZ0f@&*jeob`+)d4yLvi=VS3k1$e`md}zrg2Z&k3zp;&X37q~WWmU@hT_~SmgEsu zhF^V6}cRZ~Aul`}4XX5do2bK>;Q6 zS3(|q!Z*K3j}ZJKxmR zPelcdNMG#}s-li|K7Qa>w2I1on1##4tEf}^CS)ugrJ}mHR~}O%Nkxs+97&lyNkz?B z`=QI{X+-`pO?oeyrK0GBrIL&k6*bmtpw+o$D$2&MIyGoD;dA^}uY`3f>cWz`*3&nt zs4s?Nn%_Qgmhk0$ z;Y0wVqP{maZa)YU{U1CXnuAo-s&NgJbuXx>ngP2{w8>FXiOWrcdfrx17q6AhV(+Oa z&)g%H)gG#-CN>Rf_kNqj2ny;e3BvsVHVs)|mERNqeEo^=^>;upX;+Pb1~3E`GYJ%^MZfF||#jtmi7K($}I17s>UK zEGN|LLg-~L8vI;aK=kB*^0vQMQL1GLtGAQ#r|x?nXiLgvYoBhq|5!yO$kcP%KT=U= zY;UgL^gu;9PFXXv@qL0HUUXhc0u_ zL{hR-**q1sdfb9Fr%1UT6+bTqvOj-Jw{gxqy&mmPl5!@&H{Als@yDf~+9IMq|I=bw6e@)fberY1`1TO!xA>scKLzoni8?hheyhMGwfONc%y+uNVL@KHrIG71Y( z5Pf7$keGKcQd6(x3A+xJR#P+Itey6$jG8)N*T}27v6`AP{6!n9a%!r&{Y=_Ls;1=I z@wlPo)l__yt*19vP*XA6E4J`6QB%`i9O)TRm6SX7Xmvt$HKnX3bq=egrjDKN=-8-^ zni{=%gJ;{iYRYnh|4Zw7YHCQglV9x2)Rf=deX|_utErpH(|KJRs3|t3zngPIHMKh3 z^pa~MH8nQGNA|{CP1&@Qe$Q{Jrt-=tZEjhpsjru=n|31De_0y4rkR@Rg50uW$^OXu z-KQirS5uKiFA~SLAlK~}`*>_iHTChBxz*TKYHIe<=(YZ>)ztn@cLqray1RJwuT75c z2sqPWB-vkca^rVDOEvYvuEzneQd5rQWZ_O^|K9#C7do_6QypI&dt%*2O$C?^zjT6> zzdEm0>JqZ=_9*7V6mow0on48sq|%rfS-5>F}K3zy;x6H_7o4A@iqXw^vhf^WD#7c2HBB zUK@8<-%(Ae2TfTp-$6}{pV)C-?ape-BeqK?Z5K5K6*}#>18S;PjTtvi%GK1QqT5r; zbXQaURpcQPoC%*_Z=0{^rlyiwE{U&1_8X30bm=9*9`Pn+vq`yDl}9~T+e1y&J-rvp z37!oq{k9o7?ijwr?gc4l^l-fINwU9TC_Qo+Ie($;#Icb*)l~KylWz_#YHDgT8%4QZ z7@*sSd34HD4rKa%s zdZ)eN-Zn>>r?}`mW9@+BZ;5Rq{TO+;{$^4 z=1vcqF<4E#oIJN3<)@~MdtP{y9YFNN>}{GbR86_Ad@!Ya5TXB(HknUwf-+}F{a_+z zQegjdg2z{H_HG+O{AOIkx0?tiJf6_AaVRNQF}qs|!85X|BaDU-f5`jPKZanoeZw1f z3Fhp+V5bOEQ_oWz9&IQ1@@a$qmBZDPW=r>?^23RIy>4|1h*VQ4BVP196s@MN_sh?( z6RV~gAC4b&KTb_a{ksG{CHsEz_Q~KA1V`IBhI}X3`0~6;oe8}rmnSk$2ujujSU!(eQ(p$wN_&t%{NmiY zTY-sc>O^;^jWY;7(T1ZP1bbWg)V@w|cF&}KMx#i)Xx;d2Yk~!~UC#9-D4DgW;Yfm| z<>TJWBY48n`&$OVkJ&XEvjnSc3hnrc;D%27D|Q;KrgjcH6?%7!ni{2@^Y+R(HMMT( z(w;Mth#ZMirY!=rbMEk*EFeklEdTz}zihaQ&+7FKW8;UGcl{?hgS zaq*q93en(vhZk*U@!~=4E+_Y_N zJV<$MO28ZrEpnE{8xyn}WWC}5X~(oG*Y1ZBTzIERVqJopT`TWGq@GplM9l<(11uVw zcOW?L{qt(i3Ek+gmm8-N43Q7o=t^+%(++#TMG-lCQb#Nzc&FZ#sreCVN~W@R+(PUw z<7}Qo02`P<5J8ZwGog76f_Im_?tO^Zb^U6vK9JzX{B5Jk z5nQFvj^9e`e~NLxrJe-q&Ym!%6v6zXrP5~*e6b+gsR^n7^1$A(Lxg_*fWX~e#Ln4% zcNH56ADQoK)<_|k^X56zl;FLZ&0dBQzV`OKS(7659$F62wjtQ^V_4KVQvYhxDy`DU z`Syz%o(&_o=lCVB1_T58jvarAoL5yZc)6FLe7e#5kL38N{?A5DBXn)&z48AFZ=E%dbA(Y{CEt}e^&M9?>-Q}kZoEq(1f(>OqhE1 z5!rX^S@yz7g4fT|ZPpR&++pGQ83cV>Bp-_&hOAh`aF{aO{lymu?k z*b)1gwrl)=QpCSJ^YY@83Eofkc3MENQtq6o+ligIkNdoH3$epW=KbzgC-&FVu0{b( z(9_6nMkK)@pO4gELvUYiHIEYnJ)E}3q!9lxt!uwULh60Ke$;a!v1e*n`ww{prS#&` zU&-+<8HH}WiGNL|U-z#yfyAF3ZHL=WQB!^HGPX&{YU+LLLOf=cnsQk@dAoEj>CcPi zu3M!hes*6zs_T5>e^16d%3H|A;Vz?8OVm`*xEFK0ml1znf3@)XVl_3)cWb$RZJXVnQu6k7-Map0OqPWqXoR^+_F|IACH+_AYpUWcBenC-< zBS`(PzLURhT|mlNH*^8Ae>gSlStvPwvW8SPozycqbiLD>RHCQ0wi%sPs;L>%vzpCb zMaBvD3a!elQBzA@j?{gZrlzhvI6k@M1`_8|!*TFtQf_^lO6#|gc%gRE?B7NDx7Efw zvNF|F)3Mced7n^Iv5k+nQf8C>CVLCD77`JkNNz0n>!Vj(ac=vk-+|NB`Y~^+dw+3A!mvocfY_np z`4aP#LV+6NQ@Spm{GQ~w#e651C$A$J79Ah!7q2v!kL2^XgqDcnb&I3k4orDF2#X8| z4ff+cC-tLs8P;z&M_AA=_bnYg-P*sVD_EJKcHnL5y2ya zBij9QJJsJG0~{OV`F-7U2<}}3qFr@m%r-aJtz(Ad?#(rg4J=-|J;M+|&<@%NZ3C@TK_g zA&7QVnYVjo>?mUL%7BRI(0J}ftbc0n!sUryigYXTbQ1zb2LE+);=e#w??xAarA+m4ChX$58tpv+ull&eq#M+<&RNFL!iIkY89-P_Sr_CEZ85Nnqsv0M0U{ Aa{vGU literal 0 HcmV?d00001 diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module new file mode 100644 index 00000000..f5d27e69 --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module @@ -0,0 +1,53 @@ +from caom2pipe import astro_composable as ac + + +def _get_datetime(base): + b = base.get('base').attrs + result = None + d = b.get('OBS_DATE') + t = b.get('OBS_TIME') + if d is not None and t is not None: + dt = f'{d} {t}' + result = ac.get_datetime(dt).value + return result + + +def _get_energy_resolving_power(base): + b = base.get('base').attrs + result = None + # Laurie Rousseau-Nepton - 11-08-22 + # Resolving Power could be given at the central wavelength of the filter. + # The formula is R = 1/lambda[nm]* (2*(STEP[nm]*(NAXIS3-zpd_index))/1.2067 + step = b.get('STEP') + zpd_index = b.get('zpd_index') + naxis_3 = b.get('step_nb') + filter_max = b.get('filter_nm_max') + filter_min = b.get('filter_nm_min') + wl = None + if filter_max is not None and filter_min is not None: + wl = (filter_min + filter_max) / 2 + if step is not None and zpd_index is not None and naxis_3 is not None and wl is not None: + result = 1 / wl * 2 * (step * (naxis_3 - zpd_index)) / 1.2067 + return result + + +def _get_exposure(base): + b = base.get('base').attrs + # Laurie Rousseau-Nepton - 11-08-22 + # Int. Time could be the total (multiplied by the cube spectral dimension f.attrs.get(‘NAXIS3’) + result = None + exposure = b.get('exposure_time') + naxis_3 = b.get('step_nb') + if exposure is not None and naxis_3 is not None: + result = exposure * naxis_3 + return result + + +def _get_fwhm(base): + b = base.get('base').attrs + minimum = b.get('filter_nm_min') + maximum = b.get('filter_nm_max') + result = None + if minimum is not None and maximum is not None: + result = (maximum - minimum) / 2 + return result diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py new file mode 100644 index 00000000..f5d27e69 --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py @@ -0,0 +1,53 @@ +from caom2pipe import astro_composable as ac + + +def _get_datetime(base): + b = base.get('base').attrs + result = None + d = b.get('OBS_DATE') + t = b.get('OBS_TIME') + if d is not None and t is not None: + dt = f'{d} {t}' + result = ac.get_datetime(dt).value + return result + + +def _get_energy_resolving_power(base): + b = base.get('base').attrs + result = None + # Laurie Rousseau-Nepton - 11-08-22 + # Resolving Power could be given at the central wavelength of the filter. + # The formula is R = 1/lambda[nm]* (2*(STEP[nm]*(NAXIS3-zpd_index))/1.2067 + step = b.get('STEP') + zpd_index = b.get('zpd_index') + naxis_3 = b.get('step_nb') + filter_max = b.get('filter_nm_max') + filter_min = b.get('filter_nm_min') + wl = None + if filter_max is not None and filter_min is not None: + wl = (filter_min + filter_max) / 2 + if step is not None and zpd_index is not None and naxis_3 is not None and wl is not None: + result = 1 / wl * 2 * (step * (naxis_3 - zpd_index)) / 1.2067 + return result + + +def _get_exposure(base): + b = base.get('base').attrs + # Laurie Rousseau-Nepton - 11-08-22 + # Int. Time could be the total (multiplied by the cube spectral dimension f.attrs.get(‘NAXIS3’) + result = None + exposure = b.get('exposure_time') + naxis_3 = b.get('step_nb') + if exposure is not None and naxis_3 is not None: + result = exposure * naxis_3 + return result + + +def _get_fwhm(base): + b = base.get('base').attrs + minimum = b.get('filter_nm_min') + maximum = b.get('filter_nm_max') + result = None + if minimum is not None and maximum is not None: + result = (maximum - minimum) / 2 + return result diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.xml b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.xml new file mode 100644 index 00000000..e2ca5eaf --- /dev/null +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.xml @@ -0,0 +1,114 @@ + + + def + def + 2018-05-21T02:07:22.0 + + ORB + + OBJECT + science + + M101_Field4 + + + + def + timeseries + 2 + + + cadc:def/def.h5 + science + data + application/x-hdf5 + 32784 + md5:5a28f24e62324c1a12ff76a46c59bc54 + + + 0 + + + 1 + 2 + 4 + 3 + + + + RA---TAN-SIP + deg + + + DEC--TAN-SIP + deg + + + + 2048 + 2064 + + + + 1073.5 + 210.97094060537 + + + 1031.5 + 54.268513730755 + + + -8.7769937953286e-05 + -6.8586770533601e-07 + -6.8634246910166e-07 + 8.7709224842659e-05 + + + 2000.0 + + + + + WAVE + nm + + + 1 + 0.0 + + 0.5 + 18.649999999999977 + + + + TOPOCENT + SN3 + 4926.487818556442 + + + + + TIME + s + + + 1 + 0.0 + + 0.5 + 59694.3770853125 + + + + MJD + 11198.6 + + + + + + + + + + diff --git a/caom2utils/caom2utils/tests/test_collections.py b/caom2utils/caom2utils/tests/test_collections.py index 59964286..149ef920 100644 --- a/caom2utils/caom2utils/tests/test_collections.py +++ b/caom2utils/caom2utils/tests/test_collections.py @@ -197,7 +197,7 @@ def _header(fqn): header_mock.side_effect = _header temp = tempfile.NamedTemporaryFile() - sys.argv = ('{} -o {} --no_validate --observation {} {} {} {} ' + sys.argv = ('{} --debug -o {} --no_validate --observation {} {} {} {} ' '--resource-id ivo://cadc.nrc.ca/test'.format( application, temp.name, expected.collection, expected.observation_id, @@ -232,8 +232,10 @@ def _get_cardinality(directory): return '--lineage catalog/vos://cadc.nrc.ca!vospace/CAOMworkshop/' \ 'Examples/DAO/dao_c122_2016_012725.fits' elif 'taos_' in directory: - return '--lineage star04239531/' \ - 'cadc:TAOSII/taos2_20220201T201317Z_star04239531.h5' + if 'def' in directory: + return '--lineage def/cadc:def/def.h5' + else: + return '--lineage star04239531/cadc:TAOSII/taos2_20220201T201317Z_star04239531.h5' else: return '' From 028dceec654badbcadf2f0910957956246fb66d7 Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Fri, 28 Oct 2022 15:28:36 -0700 Subject: [PATCH 35/38] CADC-11894 - remove external dependency. --- .../caom2utils/tests/data/taos_h5file/def/def.module | 8 +++++--- caom2utils/caom2utils/tests/data/taos_h5file/def/def.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module index f5d27e69..0902ad19 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module @@ -1,5 +1,5 @@ -from caom2pipe import astro_composable as ac - +from astropy.time import Time +from datetime import datetime def _get_datetime(base): b = base.get('base').attrs @@ -8,7 +8,9 @@ def _get_datetime(base): t = b.get('OBS_TIME') if d is not None and t is not None: dt = f'{d} {t}' - result = ac.get_datetime(dt).value + result = Time(datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f')) + result.format = 'mjd' + result = result.value return result diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py index f5d27e69..0902ad19 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py @@ -1,5 +1,5 @@ -from caom2pipe import astro_composable as ac - +from astropy.time import Time +from datetime import datetime def _get_datetime(base): b = base.get('base').attrs @@ -8,7 +8,9 @@ def _get_datetime(base): t = b.get('OBS_TIME') if d is not None and t is not None: dt = f'{d} {t}' - result = ac.get_datetime(dt).value + result = Time(datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f')) + result.format = 'mjd' + result = result.value return result From 2a2ace695d71c37b5b5b52b566f921c5257f6d9d Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Fri, 28 Oct 2022 15:35:27 -0700 Subject: [PATCH 36/38] CADC-11894 - flake8. --- .../caom2utils/tests/data/taos_h5file/def/def.module | 11 +++++++++-- .../caom2utils/tests/data/taos_h5file/def/def.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module index 0902ad19..2bf6d8ef 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.module @@ -1,6 +1,7 @@ from astropy.time import Time from datetime import datetime + def _get_datetime(base): b = base.get('base').attrs result = None @@ -28,7 +29,12 @@ def _get_energy_resolving_power(base): wl = None if filter_max is not None and filter_min is not None: wl = (filter_min + filter_max) / 2 - if step is not None and zpd_index is not None and naxis_3 is not None and wl is not None: + if ( + step is not None + and zpd_index is not None + and naxis_3 is not None + and wl is not None + ): result = 1 / wl * 2 * (step * (naxis_3 - zpd_index)) / 1.2067 return result @@ -36,7 +42,8 @@ def _get_energy_resolving_power(base): def _get_exposure(base): b = base.get('base').attrs # Laurie Rousseau-Nepton - 11-08-22 - # Int. Time could be the total (multiplied by the cube spectral dimension f.attrs.get(‘NAXIS3’) + # Int. Time could be the total (multiplied by the cube spectral dimension + # f.attrs.get(‘NAXIS3’) result = None exposure = b.get('exposure_time') naxis_3 = b.get('step_nb') diff --git a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py index 0902ad19..2bf6d8ef 100644 --- a/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py +++ b/caom2utils/caom2utils/tests/data/taos_h5file/def/def.py @@ -1,6 +1,7 @@ from astropy.time import Time from datetime import datetime + def _get_datetime(base): b = base.get('base').attrs result = None @@ -28,7 +29,12 @@ def _get_energy_resolving_power(base): wl = None if filter_max is not None and filter_min is not None: wl = (filter_min + filter_max) / 2 - if step is not None and zpd_index is not None and naxis_3 is not None and wl is not None: + if ( + step is not None + and zpd_index is not None + and naxis_3 is not None + and wl is not None + ): result = 1 / wl * 2 * (step * (naxis_3 - zpd_index)) / 1.2067 return result @@ -36,7 +42,8 @@ def _get_energy_resolving_power(base): def _get_exposure(base): b = base.get('base').attrs # Laurie Rousseau-Nepton - 11-08-22 - # Int. Time could be the total (multiplied by the cube spectral dimension f.attrs.get(‘NAXIS3’) + # Int. Time could be the total (multiplied by the cube spectral dimension + # f.attrs.get(‘NAXIS3’) result = None exposure = b.get('exposure_time') naxis_3 = b.get('step_nb') From faf5bb0c1c333977385cd2167e039bb790a1b0af Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Fri, 28 Oct 2022 16:52:21 -0700 Subject: [PATCH 37/38] CADC-11894 - remove the debug from the caom2gen call. --- caom2utils/caom2utils/tests/test_collections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caom2utils/caom2utils/tests/test_collections.py b/caom2utils/caom2utils/tests/test_collections.py index 149ef920..802c9c06 100644 --- a/caom2utils/caom2utils/tests/test_collections.py +++ b/caom2utils/caom2utils/tests/test_collections.py @@ -197,7 +197,7 @@ def _header(fqn): header_mock.side_effect = _header temp = tempfile.NamedTemporaryFile() - sys.argv = ('{} --debug -o {} --no_validate --observation {} {} {} {} ' + sys.argv = ('{} -o {} --no_validate --observation {} {} {} {} ' '--resource-id ivo://cadc.nrc.ca/test'.format( application, temp.name, expected.collection, expected.observation_id, From 49e3d014349d7c6afb52e30011c77f6ff3e04f5c Mon Sep 17 00:00:00 2001 From: Sharon Goliath Date: Wed, 16 Nov 2022 12:31:04 -0800 Subject: [PATCH 38/38] CADC-10715 - code review comments - remove CadcDataClient support from the StorageClientWrapper class. --- caom2utils/caom2utils/caom2blueprint.py | 10 +- caom2utils/caom2utils/data_util.py | 175 +++++------------- caom2utils/caom2utils/tests/test_data_util.py | 82 +------- 3 files changed, 52 insertions(+), 215 deletions(-) diff --git a/caom2utils/caom2utils/caom2blueprint.py b/caom2utils/caom2utils/caom2blueprint.py index 2a151f0a..136c726f 100755 --- a/caom2utils/caom2utils/caom2blueprint.py +++ b/caom2utils/caom2utils/caom2blueprint.py @@ -5271,15 +5271,7 @@ def proc(args, obs_blueprints): raise RuntimeError(msg) subject = net.Subject.from_cmd_line_args(args) - if args.resource_id == 'ivo://cadc.nrc.ca/fits2caom2': - # if the resource_id is the default value, using CadcDataClient - client = data_util.StorageClientWrapper( - subject, using_storage_inventory=False) - else: - # using the new Storage Inventory system, since it's the one that - # depends on a resource_id - client = data_util.StorageClientWrapper( - subject, resource_id=args.resource_id) + client = data_util.StorageClientWrapper(subject, resource_id=args.resource_id) validate_wcs = True if args.no_validate: validate_wcs = False diff --git a/caom2utils/caom2utils/data_util.py b/caom2utils/caom2utils/data_util.py index b73a862a..171fb504 100644 --- a/caom2utils/caom2utils/data_util.py +++ b/caom2utils/caom2utils/data_util.py @@ -75,7 +75,7 @@ from astropy.io import fits from urllib.parse import urlparse -from cadcdata import FileInfo, CadcDataClient, StorageInventoryClient +from cadcdata import FileInfo, StorageInventoryClient from cadcutils import exceptions @@ -92,61 +92,31 @@ class StorageClientWrapper: """ - Wrap the choice between CadcDataClient and StorageInventoryClient. + Wrap the metrics collection with StorageInventoryClient. """ - def __init__( - self, - subject, - using_storage_inventory=True, - resource_id='ivo://cadc.nrc.ca/uvic/minoc', - metrics=None, - ): + def __init__(self, subject, resource_id='ivo://cadc.nrc.ca/uvic/minoc', metrics=None): """ - :param subject: net.Subject instance for authentication and - authorization - :param using_storage_inventory: if True will use - StorageInventoryClient for file operations at CADC. If False will - use CadcDataClient. - :param resource_id: str identifies the StorageInventoryClient - endpoint. If using_storage_inventory is set to False, it's - un-necessary. - :param metrics: caom2pipe.manaage_composable.Metrics instance. If set, - will track execution times, by action, from the beginning of - the method invocation to the end of the method invocation, - success or failure. Defaults to None, because fits2caom2 is - a stand-alone application. + :param subject: net.Subject instance for authentication and authorization + :param resource_id: str identifies the StorageInventoryClient endpoint. Defaults to the installation closest to + most of the current invocations. + :param metrics: caom2pipe.manaage_composable.Metrics instance. If set, will track execution times, by action, + from the beginning of the method invocation to the end of the method invocation, success or failure. + Defaults to None, because fits2caom2 is a stand-alone application. """ - if using_storage_inventory: - self._cadc_client = StorageInventoryClient( - subject=subject, resource_id=resource_id - ) - else: - self._cadc_client = CadcDataClient(subject=subject) - self._use_si = using_storage_inventory + self._cadc_client = StorageInventoryClient(subject=subject, resource_id=resource_id) self._metrics = metrics self._logger = logging.getLogger(self.__class__.__name__) def _add_fail_metric(self, action, name): - """Single location for the check for a self._metrics member in the - failure case.""" + """Single location for the check for a self._metrics member in the failure case.""" if self._metrics is not None: - client_name = 'si' if self._use_si else 'data' - self._metrics.observe_failure(action, client_name, name) + self._metrics.observe_failure(action, 'si', name) def _add_metric(self, action, name, start, value): - """Single location for the check for a self._metrics member in the - success case.""" + """Single location for the check for a self._metrics member in the success case.""" if self._metrics is not None: - client_name = 'si' if self._use_si else 'data' - self._metrics.observe( - start, - StorageClientWrapper._current(), - value, - action, - client_name, - name, - ) + self._metrics.observe(start, StorageClientWrapper._current(), value, action, 'si', name) def get(self, working_directory, uri): """ @@ -156,15 +126,12 @@ def get(self, working_directory, uri): :param uri: str this is an Artifact URI, representing the file to be retrieved. """ - self._logger.debug(f'Being get for {uri} in {working_directory}') + self._logger.debug(f'Begin get for {uri} in {working_directory}') start = StorageClientWrapper._current() try: archive, f_name = self._decompose(uri) fqn = path.join(working_directory, f_name) - if self._use_si: - self._cadc_client.cadcget(uri, dest=fqn) - else: - self._cadc_client.get_file(archive, f_name, destination=fqn) + self._cadc_client.cadcget(uri, dest=fqn) except Exception as e: self._add_fail_metric('get', uri) self._logger.debug(traceback.format_exc()) @@ -186,11 +153,7 @@ def get_head(self, uri): try: b = BytesIO() b.name = uri - if self._use_si: - self._cadc_client.cadcget(uri, b, fhead=True) - else: - archive, f_name = StorageClientWrapper._decompose(uri) - self._cadc_client.get_file(archive, f_name, b, fhead=True) + self._cadc_client.cadcget(uri, b, fhead=True) fits_header = b.getvalue().decode('ascii') b.close() self._add_metric('get_head', uri, start, len(fits_header)) @@ -207,44 +170,28 @@ def get_head(self, uri): def info(self, uri): """ - Retrieve the descriptive metdata associated with a file. + Retrieve the descriptive metadata associated with a file. :param uri: str that is an Artifact URI, representing the file for which to retrieve metadata :return: cadcdata.FileInfo instance, no scheme for md5sum """ self._logger.debug(f'Begin info for {uri}') try: - if self._use_si: - result = self._cadc_client.cadcinfo(uri) - # make the result look like the other possible ways to - # obtain metadata - result.md5sum = result.md5sum.replace('md5:', '') - else: - archive, f_name = StorageClientWrapper._decompose(uri) - temp = self._cadc_client.get_file_info(archive, f_name) - result = FileInfo( - id=uri, - size=temp.get('size'), - file_type=temp.get('type'), - md5sum=temp.get('md5sum').replace('md5:', ''), - encoding=temp.get('encoding'), - ) + result = self._cadc_client.cadcinfo(uri) + # make the result look like the other possible ways to + # obtain metadata + result.md5sum = result.md5sum.replace('md5:', '') except exceptions.NotFoundException: self._logger.info(f'cadcinfo:: {uri} not found') result = None self._logger.debug('End info') return result - def put(self, working_directory, uri, stream='default'): + def put(self, working_directory, uri): """ Store a file at CADC. - :param working_directory: str fully-qualified name of where to find - the file on the local machine - :param uri: str that is an Artifact URI, representing the file to - be stored at CADC. - :param stream: str representing the namespace used by the - CadcDataClient. Not required if using the StorageInventoryClient. - 'default' is default name for a lately-created ad archive. + :param working_directory: str fully-qualified name of where to find the file on the local machine + :param uri: str that is an Artifact URI, representing the file to be stored at CADC. """ self._logger.debug(f'Begin put for {uri} in {working_directory}') start = self._current() @@ -255,41 +202,22 @@ def put(self, working_directory, uri, stream='default'): try: local_meta = get_local_file_info(fqn) encoding = get_file_encoding(fqn) - if self._use_si: - replace = True - cadc_meta = self.info(uri) - if cadc_meta is None: - replace = False - self._logger.debug( - f'uri {uri} src {fqn} replace {replace} file_type ' - f'{local_meta.file_type} encoding {encoding} md5_checksum ' - f'{local_meta.md5sum}' - ) - self._cadc_client.cadcput( - uri, - src=fqn, - replace=replace, - file_type=local_meta.file_type, - file_encoding=encoding, - md5_checksum=local_meta.md5sum, - ) - else: - archive, f_name = self._decompose(uri) - # libmagic does a worse job with guessing file types - # than ad for .fits.gz => it will say 'binary' - self._logger.debug( - f'archive {archive} f_name {f_name} archive_stream ' - f'{stream} mime_type {local_meta.file_type} ' - f'mime_encoding {encoding} md5_check True ' - ) - self._cadc_client.put_file( - archive, - f_name, - archive_stream=stream, - mime_type=local_meta.file_type, - mime_encoding=encoding, - md5_check=True, - ) + replace = True + cadc_meta = self.info(uri) + if cadc_meta is None: + replace = False + self._logger.debug( + f'uri {uri} src {fqn} replace {replace} file_type {local_meta.file_type} encoding {encoding} ' + f'md5_checksum {local_meta.md5sum}' + ) + self._cadc_client.cadcput( + uri, + src=fqn, + replace=replace, + file_type=local_meta.file_type, + file_encoding=encoding, + md5_checksum=local_meta.md5sum, + ) self._logger.info(f'Stored {fqn} at CADC.') except Exception as e: self._add_fail_metric('put', uri) @@ -311,19 +239,14 @@ def remove(self, uri): """ self._logger.debug(f'Begin remove for {uri}') start = StorageClientWrapper._current() - if self._use_si: - try: - self._cadc_client.cadcremove(uri) - except Exception as e: - self._add_fail_metric('remove', uri) - self._logger.debug(traceback.format_exc()) - self._logger.error(e) - raise exceptions.UnexpectedException( - f'Did not remove {uri} because {e}' - ) - else: - raise NotImplementedError( - 'No remove functionality for CadcDataClient' + try: + self._cadc_client.cadcremove(uri) + except Exception as e: + self._add_fail_metric('remove', uri) + self._logger.debug(traceback.format_exc()) + self._logger.error(e) + raise exceptions.UnexpectedException( + f'Did not remove {uri} because {e}' ) self._add_metric('remove', uri, start, value=None) self._logger.debug('End remove') diff --git a/caom2utils/caom2utils/tests/test_data_util.py b/caom2utils/caom2utils/tests/test_data_util.py index 886fcbab..14533d95 100644 --- a/caom2utils/caom2utils/tests/test_data_util.py +++ b/caom2utils/caom2utils/tests/test_data_util.py @@ -105,77 +105,6 @@ def test_get_file_type(): ), f'wrong type {data_util.get_file_type(key)} for {key}' -@patch('caom2utils.data_util.CadcDataClient', autospec=True) -def test_cadc_data_client(cadc_client_mock): - test_subject = Mock(autospec=True) - test_uri = 'ad:TEST/test_file.fits' - test_working_directory = Path(test_fits2caom2.TESTDATA_DIR) - test_fqn = test_working_directory / 'test_file.fits' - if test_fqn.exists(): - test_fqn.unlink() - - def info_mock(ignore1, ignore2): - return { - 'type': 'application/fits', - 'md5sum': 'abc', - 'size': 42, - } - - def get_mock(ignore1, ignore2, destination, **kwargs): - fhead = kwargs.get('fhead') - if fhead: - destination.write(TEST_HEADERS) - else: - test_fqn.write_text('CadcDataClient') - - cadc_client_mock.return_value.get_file_info.side_effect = info_mock - cadc_client_mock.return_value.get_file.side_effect = get_mock - cadc_client_mock.return_value.put_file = Mock(autospec=True) - - test_wrapper = data_util.StorageClientWrapper( - subject=test_subject, - using_storage_inventory=False, - ) - assert test_wrapper is not None, 'ctor failure' - - # info - test_result = test_wrapper.info(test_uri) - _check_info_result(test_result) - - # get_head - test_result = test_wrapper.get_head(test_uri) - _check_header_result(test_result) - - # get - test_wrapper.get(test_working_directory, test_uri) - _check_get_result(test_fqn) - - # put - test_wrapper.put(test_working_directory, test_uri) - _check_put_result(cadc_client_mock.return_value.put_file) - - # delete - with pytest.raises(NotImplementedError): - test_wrapper.remove(test_uri) - - cadc_client_mock.return_value.get_file_info.side_effect = ( - exceptions.UnexpectedException('get_file_info') - ) - cadc_client_mock.return_value.get_file.side_effect = ( - exceptions.UnexpectedException('get_file') - ) - cadc_client_mock.return_value.put_file.side_effect = ( - exceptions.UnexpectedException('put_file') - ) - _fail_mock(test_wrapper, test_uri, test_working_directory) - - cadc_client_mock.return_value.get_file_info.side_effect = ( - exceptions.NotFoundException('cadcinfo') - ) - test_result = test_wrapper.info(test_uri) - assert test_result is None, 'expected when not found' - - @patch('caom2utils.data_util.StorageInventoryClient') def test_storage_inventory_client(cadc_client_mock): test_subject = Mock(autospec=True) @@ -201,10 +130,7 @@ def get_si_mock(ignore2, dest, **kwargs): cadc_client_mock.return_value.cadcput = Mock(autospec=True) cadc_client_mock.return_value.cadcremove = Mock(autospec=True) - test_wrapper = data_util.StorageClientWrapper( - subject=test_subject, - using_storage_inventory=True, - ) + test_wrapper = data_util.StorageClientWrapper(subject=test_subject) assert test_wrapper is not None, 'ctor failure' # info @@ -264,11 +190,7 @@ def _get(working_directory, uri): client_mock.return_value.cadcget.side_effect = _get client_mock.return_value.cadcremove.side_effect = Mock() - test_wrapper = data_util.StorageClientWrapper( - subject=test_subject, - using_storage_inventory=True, - metrics=test_metrics, - ) + test_wrapper = data_util.StorageClientWrapper(subject=test_subject, metrics=test_metrics) assert test_wrapper is not None, 'ctor failure' # test metrics failure