diff --git a/src/mintpy/load_data.py b/src/mintpy/load_data.py index 972e01228..fd41b67ab 100644 --- a/src/mintpy/load_data.py +++ b/src/mintpy/load_data.py @@ -676,8 +676,8 @@ def prepare_metadata(iDict): ut.print_command_line(script_name, iargs) try: prep_module.main(iargs) - except: - warnings.warn('prep_isce.py failed. Assuming its result exists and continue...') + except Exception as e: + warnings.warn(f'prep_isce.py failed: {e}.\nAssuming its result exists and continue...') # [optional] for topsStack: SAFE_files.txt --> S1A/B_date.txt if os.path.isfile(meta_file) and isce_utils.get_processor(meta_file) == 'topsStack': @@ -731,8 +731,8 @@ def prepare_metadata(iDict): ut.print_command_line(script_name, iargs) try: prep_module.main(iargs) - except: - warnings.warn('prep_aria.py failed. Assuming its result exists and continue...') + except Exception as e: + warnings.warn(f'prep_aria.py failed: {e}.\n Assuming its result exists and continuing...') elif processor == 'gmtsar': # use the custom template file if exists & input @@ -746,8 +746,8 @@ def prepare_metadata(iDict): ut.print_command_line(script_name, iargs) try: prep_module.main(iargs) - except: - warnings.warn('prep_gmtsar.py failed. Assuming its result exists and continue...') + except Exception as e: + warnings.warn(f'prep_gmtsar.py failed: {e}.\nAssuming its result exists and continue...') return diff --git a/src/mintpy/objects/stack.py b/src/mintpy/objects/stack.py index 655bf6813..11d1fc13a 100644 --- a/src/mintpy/objects/stack.py +++ b/src/mintpy/objects/stack.py @@ -658,8 +658,8 @@ def read(self, datasetName=GEOMETRY_DSET_NAMES[0], box=None, print_msg=True): if print_msg: print(f'reading {familyName:<15} data from file: {self.file} ...') - if len(ds.shape) == 1: - data = ds[:] + if len(ds.shape) <= 1: + data = ds[()] elif len(ds.shape) == 2: data = ds[box[1]:box[3], box[0]:box[2]] else: diff --git a/src/mintpy/save_hdfeos5.py b/src/mintpy/save_hdfeos5.py index 8c05b88c2..1c95a5007 100644 --- a/src/mintpy/save_hdfeos5.py +++ b/src/mintpy/save_hdfeos5.py @@ -406,6 +406,8 @@ def write_hdf5_file(metadata, out_file, ts_file, tcoh_file, scoh_file, mask_file else: raise ValueError(f'Un-recognized dataset name: {dsName}!') + if data.ndim < 1: # Skip the grid mapping scalar dataset + continue # write dset = create_hdf5_dataset(group, dsName, data) diff --git a/src/mintpy/utils/prep_utils.py b/src/mintpy/utils/prep_utils.py new file mode 100644 index 000000000..01afc9107 --- /dev/null +++ b/src/mintpy/utils/prep_utils.py @@ -0,0 +1,257 @@ +"""Utilities to make HDF5 files CF-compliant and readable by GDAL.""" +from __future__ import annotations + +import datetime + +import h5py +import numpy as np +import pyproj + +from mintpy.utils import readfile + +__all__ = ["write_coordinate_system"] + +TIME_DSET_NAME = "time" + + +class CoordinateError(ValueError): + pass + +def write_coordinate_system( + filename: str, + dset_names: list[str], + xy_dim_names: tuple[str, str] = ("x", "y"), + grid_mapping_dset: str = "spatial_ref", +) -> None: + """Write the coordinate system metadata to datasets within `filename`. + + Parameters + ---------- + filename : str + File path. + dset_names : list[str] + list of dataset names within the HDF5 file. + xy_dim_names : tuple[str, str], optional + x and y dimension names, by default ("x", "y"). + grid_mapping_dset : str, optional + Dataset name for grid mapping HDF5 attribute + By default "spatial_ref" (matching `rioxarray`). + """ + atr = readfile.read_attribute(filename) + if "X_FIRST" not in atr: + raise CoordinateError(f"{filename} is not geocoded.") + # TODO: It should be possible to write in the 2D lat/lon arrays + # for radar coordinates. + # But unclear what quick benefit we get from that like with geocoded files. + + epsg = int(atr.get("EPSG", 4326)) + crs = pyproj.CRS.from_epsg(epsg) + + with h5py.File(filename, "a") as hf: + _setup_xy_dimensions(hf, atr, crs, xy_dim_names) + srs_dset = hf.require_dataset(grid_mapping_dset, shape=(), dtype=int) + srs_dset.attrs.update(crs.to_cf()) + + for dset_name in dset_names: + dset = hf[dset_name] + if dset.ndim < 2: + print(f"Skipping {dset.ndim}-dimensional dset {dset_name}") + continue + # Setup the dataset holding the SRS information + dset.attrs["grid_mapping"] = grid_mapping_dset + _attach_xy_dimensions(hf, dset, xy_dim_names) + if dset.ndim == 3: + _setup_time_dimension(hf, dset) + + +def _get_xy_arrays(atr: dict) -> tuple[np.ndarray, np.ndarray]: + """ + Generate x and y arrays from attribute dictionary. + + Parameters + ---------- + atr : dict + MintPy attribute dictionary containing ROIPAC raster attributes. + + Returns + ------- + tuple[np.ndarray, np.ndarray] + x and y arrays. + """ + x0 = float(atr["X_FIRST"]) + y0 = float(atr["Y_FIRST"]) + x_step = float(atr["X_STEP"]) + y_step = float(atr["Y_STEP"]) + rows = int(atr["LENGTH"]) + cols = int(atr["WIDTH"]) + + x_arr = x0 + x_step * np.arange(cols) + y_arr = y0 + y_step * np.arange(rows) + + # Shift by half pixel to get the centers + x_arr += x_step / 2 + y_arr += y_step / 2 + return x_arr, y_arr + + +def _get_coordinate_metadata(crs: pyproj.CRS) -> tuple[dict, dict]: + """Get HDF5 coordinate metadata based on CRS type. + + Parameters + ---------- + crs : pyproj.CRS + Coordinate Reference System. + + Returns + ------- + tuple[dict, dict] + Dictionary of metadata for x and y respectively. + """ + x_coord_attrs = {"axis": "X"} + y_coord_attrs = {"axis": "Y"} + + if crs.is_projected: + units = "meters" + # X metadata + x_coord_attrs.update( + { + "long_name": "x coordinate of projection", + "standard_name": "projection_x_coordinate", + "units": units, + } + ) + # Y metadata + y_coord_attrs.update( + { + "long_name": "y coordinate of projection", + "standard_name": "projection_y_coordinate", + "units": units, + } + ) + elif crs.is_geographic: + # X metadata + x_coord_attrs.update( + { + "long_name": "longitude", + "standard_name": "longitude", + "units": "degrees_east", + } + ) + # Y metadata + y_coord_attrs.update( + { + "long_name": "latitude", + "standard_name": "latitude", + "units": "degrees_north", + } + ) + return x_coord_attrs, y_coord_attrs + + +def _setup_xy_dimensions( + hf: h5py.File, + atr: dict, + crs: pyproj.CRS, + xy_dim_names: tuple[str, str] = ("x", "y"), +): + """Create X/Y spatial dimension in the HDF5 file. + + Sets the datasets as HDF5 dimension scales. + + Parameters + ---------- + hf : h5py.File + HDF5 file object. + dset : h5py.Dataset + Dataset within the HDF5 file. + atr : dict + MintPy attribute dictionary containing ROIPAC raster attributes. + crs : pyproj.CRS + Coordinate Reference System. + xy_dim_names : tuple[str, str], optional + x and y dimension names, by default ("x", "y"). + + Returns + ------- + Optional[list[datetime.datetime]] + list of dates if they exist, otherwise None. + """ + x_dim_name, y_dim_name = xy_dim_names + # add metadata to x, y coordinates + x_arr, y_arr = _get_xy_arrays(atr) + x_dim_dset = hf.require_dataset( + x_dim_name, shape=x_arr.shape, dtype=x_arr.dtype, data=x_arr + ) + x_dim_dset.make_scale(x_dim_name) + y_dim_dset = hf.require_dataset( + y_dim_name, shape=y_arr.shape, dtype=y_arr.dtype, data=y_arr + ) + y_dim_dset.make_scale(y_dim_name) + + x_coord_attrs, y_coord_attrs = _get_coordinate_metadata(crs) + + y_dim_dset.attrs.update(y_coord_attrs) + x_dim_dset.attrs.update(x_coord_attrs) + + +def _attach_xy_dimensions( + hf: h5py.File, + dset: h5py.Dataset, + xy_dim_names: tuple[str, str] = ("x", "y"), +): + """ + Setup time dimension in the HDF5 file. + + Parameters + ---------- + hf : h5py.File + HDF5 file object. + dset : h5py.Dataset + Dataset within the HDF5 file. + xy_dim_names : tuple[str, str], optional + x and y dimension names, by default ("x", "y"). + + Returns + ------- + Optional[list[datetime.datetime]] + list of dates if they exist, otherwise None. + """ + x_dim_name, y_dim_name = xy_dim_names + ndim = dset.ndim + dset.dims[ndim - 1].attach_scale(hf[x_dim_name]) + dset.dims[ndim - 2].attach_scale(hf[y_dim_name]) + dset.dims[ndim - 1].label = x_dim_name + dset.dims[ndim - 2].label = y_dim_name + + +def _setup_time_dimension(hf: h5py.File, dset: h5py.Dataset): + """Setup time dimension in the HDF5 file. + + Parameters + ---------- + hf : h5py.File + HDF5 file object. + dset : h5py.Dataset + Dataset within the HDF5 file. + """ + if "date" not in hf or hf["date"].ndim == 2: + print(f"'date' dataset not in {hf}, skipping 'time' dimension") + # If we want to do something other than time as a 3rd dimension + # (e.g. for ifg date pairs) we'll need to figure out what other valid + # dims there are. otherwise, you can just do `phony_dims="sort"` in xarray + return None + + date_arr = [ + datetime.datetime.strptime(ds, "%Y%m%d") for ds in hf["date"][()].astype(str) + ] + days_since = np.array([(d - date_arr[0]).days for d in date_arr]) + dt_dim = hf.require_dataset( + TIME_DSET_NAME, data=days_since, shape=days_since.shape, dtype=days_since.dtype + ) + dt_dim.make_scale() + cf_attrs = dict( + units=f"days since {str(date_arr[0])}", calendar="proleptic_gregorian" + ) + dt_dim.attrs.update(cf_attrs) + dset.dims[0].attach_scale(dt_dim) + dset.dims[0].label = TIME_DSET_NAME diff --git a/src/mintpy/utils/writefile.py b/src/mintpy/utils/writefile.py index 3862d3c7c..a34a3c660 100644 --- a/src/mintpy/utils/writefile.py +++ b/src/mintpy/utils/writefile.py @@ -14,7 +14,7 @@ import h5py import numpy as np -from mintpy.utils import readfile +from mintpy.utils import prep_utils, readfile def write(datasetDict, out_file, metadata=None, ref_file=None, compression=None, ds_unit_dict=None, print_msg=True): @@ -309,9 +309,9 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ ds = fr[key] if isinstance(ds, h5py.Dataset): - # auxliary dataset + # auxiliary dataset if ds.shape[-2:] != shape2d_orig: - ds_name_dict[key] = [ds.dtype, ds.shape, ds[:]] + ds_name_dict[key] = [ds.dtype, ds.shape, ds[()]] # dataset else: @@ -360,16 +360,16 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ t=str(data_type), s=str(data_shape), c=ds_comp)) - ds = f.create_dataset(key, - shape=data_shape, - maxshape=max_shape, - dtype=data_type, - chunks=True, - compression=ds_comp) - - # write auxliary data + if len(data_shape) < 1: + # scalar datasets can't be chunked + kwargs = {} + else: + kwargs = dict(maxshape=max_shape, chunks=True, compression=ds_comp) + ds = f.create_dataset(key, shape=data_shape, dtype=data_type, **kwargs) + + # write auxiliary data if len(ds_name_dict[key]) > 2 and ds_name_dict[key][2] is not None: - ds[:] = np.array(ds_name_dict[key][2]) + ds[()] = np.array(ds_name_dict[key][2]) # write attributes in root level for key, value in meta.items(): @@ -382,6 +382,13 @@ def layout_hdf5(fname, ds_name_dict=None, metadata=None, ds_unit_dict=None, ref_ f[key].attrs['UNIT'] = value vprint(f'add /{key:<{max_digit}} attribute: UNIT = {value}') + if ds_name_dict: + vprint(f'Adding coordinate metadata to all datasets in {fname}') + try: + prep_utils.write_coordinate_system(fname, list(ds_name_dict.keys())) + except prep_utils.CoordinateError: + vprint('Skipping, not geocoded.') + vprint(f'close HDF5 file: {fname}') return fname diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 000000000..e079f8a60 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1 @@ +pytest diff --git a/tests/test_prep_utils.py b/tests/test_prep_utils.py new file mode 100644 index 000000000..d6ff16175 --- /dev/null +++ b/tests/test_prep_utils.py @@ -0,0 +1,124 @@ +from datetime import date, timedelta + +import h5py +import numpy as np +import pytest +from osgeo import gdal + +from mintpy.utils import prep_utils, writefile + +gdal.UseExceptions() + + +@pytest.fixture +def metadata(): + return { + "EPSG": "32611", + "X_FIRST": "480540.0", + "X_STEP": "30.0", + "X_UNIT": "meters", + "Y_FIRST": "3902670.0", + "Y_STEP": "-30.0", + "Y_UNIT": "meters", + "LENGTH": "100", + "WIDTH": "100", + "REF_X": "50", + "REF_Y": "50", + # Sample S1 metadata + "ALOOKS": "1", + "AZIMUTH_PIXEL_SIZE": "14.1", + "CENTER_LINE_UTC": "49436.654675", + "EARTH_RADIUS": "6371000.0", + "HEIGHT": "750000.0", + "ORBIT_DIRECTION": "Descending", + "PLATFORM": "S1A", + "RANGE_PIXEL_SIZE": "2.329562114715323", + "RLOOKS": "1", + "STARTING_RANGE": "845960.7488998839", + "UNIT": "m", + "WAVELENGTH": "0.05546576", + "DATA_TYPE": "float32", + "PROCESSOR": "mintpy", + "NO_DATA_VALUE": "none", + } + + +@pytest.fixture +def data2d(metadata): + rows = int(metadata["LENGTH"]) + cols = int(metadata["WIDTH"]) + return np.random.randn(rows, cols).astype("float32") + + +@pytest.fixture +def dates(): + num_dates = 10 + date_list = [date(2020, 1, 1) + i * timedelta(days=12) for i in range(num_dates)] + date_strs = [d.strftime("%Y%m%d") for d in date_list] + return np.array(date_strs, dtype=np.string_) + + +@pytest.fixture +def data3d(dates, metadata): + rows = int(metadata["LENGTH"]) + cols = int(metadata["WIDTH"]) + return np.random.randn(len(dates), rows, cols).astype("float32") + + +def _check_gdal_metadata(gdal_str, meta): + ds = gdal.Open(gdal_str) + assert ds is not None + assert int(ds.GetSpatialRef().GetAuthorityCode(None)) == int(meta["EPSG"]) + + gt = ds.GetGeoTransform() + # GEOTRANSFORM = [204500.0, 5.0, 0.0, 2151300.0, 0.0, -10.0] + assert gt[0] == float(meta["X_FIRST"]) + assert gt[3] == float(meta["Y_FIRST"]) + assert gt[1] == float(meta["X_STEP"]) + assert gt[5] == float(meta["Y_STEP"]) + ds = None + + +def test_2d(metadata, data2d, tmp_path): + rows, cols = data2d.shape + ds_name_dict = { + "velocity": [np.float32, (rows, cols), data2d], + } + + # initiate HDF5 file + metadata["FILE_TYPE"] = "velocity" + # meta["REF_DATE"] = ref_date # might not be the first date! + outfile = tmp_path / "velocity.h5" + writefile.layout_hdf5(str(outfile), ds_name_dict, metadata=metadata) + + # Check the metadata was written + with h5py.File(outfile) as hf: + assert "x" in hf + assert "y" in hf + assert prep_utils.TIME_DSET_NAME not in hf + assert "spatial_ref" in hf + + _check_gdal_metadata(f"NETCDF:{outfile}:velocity", metadata) + + +def test_3d(metadata, dates, data3d, tmp_path): + num_dates, rows, cols = data3d.shape + ds_name_dict = { + "date": [dates.dtype, (num_dates,), dates], + "timeseries": [np.float32, (num_dates, rows, cols), data3d], + } + + # initiate HDF5 file + metadata["FILE_TYPE"] = "timeseries" + # meta["REF_DATE"] = ref_date # might not be the first date! + outfile = tmp_path / "timeseries.h5" + writefile.layout_hdf5(str(outfile), ds_name_dict, metadata=metadata) + + # Check the metadata was written + with h5py.File(outfile) as hf: + assert "x" in hf + assert "y" in hf + assert prep_utils.TIME_DSET_NAME in hf + assert "spatial_ref" in hf + + _check_gdal_metadata(f"NETCDF:{outfile}:timeseries", metadata)