From 5d883391d8be6af70aea21ac2b6fd3e12fc9b010 Mon Sep 17 00:00:00 2001 From: roeldegoede Date: Tue, 28 Jan 2025 17:47:30 +0100 Subject: [PATCH] Overwriting files that were lazily loaded results in permission errors. Loading the data first solves that problem. --- hydromt_sfincs/quadtree.py | 4 +++- hydromt_sfincs/sfincs.py | 2 ++ hydromt_sfincs/subgrid.py | 2 ++ hydromt_sfincs/utils.py | 29 +++++++++++++++++++++++++++++ tests/test_quadtree.py | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 1 deletion(-) diff --git a/hydromt_sfincs/quadtree.py b/hydromt_sfincs/quadtree.py index f8985451..4a88bc02 100644 --- a/hydromt_sfincs/quadtree.py +++ b/hydromt_sfincs/quadtree.py @@ -11,7 +11,7 @@ import xugrid as xu from pyproj import CRS, Transformer -from hydromt_sfincs.utils import xu_open_dataset +from hydromt_sfincs.utils import xu_open_dataset, check_exists_and_lazy # optional dependency try: @@ -125,6 +125,8 @@ def write(self, file_name: Union[str, Path] = "sfincs.nc", version: int = 0): ) ds.attrs = attrs + # before writing, check if the file already exists while data is still lazily loaded + check_exists_and_lazy(ds, file_name) ds.to_netcdf(file_name) def map_overlay(self, file_name, xlim=None, ylim=None, color="black", width=800): diff --git a/hydromt_sfincs/sfincs.py b/hydromt_sfincs/sfincs.py index 7f5b0d5c..7fb97c2d 100644 --- a/hydromt_sfincs/sfincs.py +++ b/hydromt_sfincs/sfincs.py @@ -3337,6 +3337,8 @@ def write_forcing(self, data_vars: Union[List, str] = None, fmt: str = "%7.2f"): self.write_vector(variables=f"forcing.{list(rename.keys())[0]}") # write 2D gridded timeseries else: + # before writing, check if the file already exists while data is still lazily loaded + utils.check_exists_and_lazy(ds, fn) ds.to_netcdf(fn, encoding=encoding) def read_states(self): diff --git a/hydromt_sfincs/subgrid.py b/hydromt_sfincs/subgrid.py index 28dd529c..8a1f4ba2 100644 --- a/hydromt_sfincs/subgrid.py +++ b/hydromt_sfincs/subgrid.py @@ -841,6 +841,8 @@ def write(self, file_name): # fix names to match SFINCS convention # ds = ds.rename_vars({"uv_navg": "uv_navg_w", "uv_ffit": "uv_fnfit"}) + # before writing, check if the file already exists while data is still lazily loaded + utils.check_exists_and_lazy(ds, file_name) ds.to_netcdf(file_name) diff --git a/hydromt_sfincs/utils.py b/hydromt_sfincs/utils.py index df1e1a7b..c3e5e6aa 100644 --- a/hydromt_sfincs/utils.py +++ b/hydromt_sfincs/utils.py @@ -6,6 +6,7 @@ import copy import io import logging +import os from datetime import datetime from pathlib import Path from typing import Dict, List, Tuple, Union @@ -53,6 +54,8 @@ "rotated_grid", "build_overviews", "find_uv_indices", + "xu_open_dataset", + "check_exists_and_lazy", ] logger = logging.getLogger(__name__) @@ -1338,3 +1341,29 @@ def xu_open_dataset(*args, **kwargs): """ with xr.open_dataset(*args, **kwargs) as ds: return xu.UgridDataset(ds) + + +def check_exists_and_lazy(ds, file_name): + """If a netcdf file is read lazily, the file can not be overwritten. + This function checks whether the file already exists, if so, it checks + if the data is lazily loaded. If so, data should be loaded before writing. + + Parameters + ---------- + ds : xarray.Dataset, xu.UgridDataset + The dataset to be written to a netcdf file. + file_name : str + The path to the netcdf file. + """ + if not os.path.exists(file_name): + return + + # Check for lazy loading + lazy_vars = [not data_array._in_memory for data_array in ds.data_vars.values()] + + # if all(lazy_vars): + # return # All variables are lazy-loaded, skip writing? + + if any(lazy_vars): + ds.load() # Some variables are lazy-loaded, load them into memory + return diff --git a/tests/test_quadtree.py b/tests/test_quadtree.py index 711e4552..e3431671 100644 --- a/tests/test_quadtree.py +++ b/tests/test_quadtree.py @@ -1,7 +1,10 @@ from os.path import join, dirname, abspath import numpy as np +import os from pyproj import CRS +import shutil +from hydromt_sfincs import utils from hydromt_sfincs.quadtree import QuadtreeGrid TESTDATADIR = join(dirname(abspath(__file__)), "data") @@ -35,3 +38,34 @@ def test_quadtree_io(tmpdir): assert np.sum(qtr2.data["msk"].values) == 4298 # assert the dep variable is the same assert np.sum(qtr.data["dep"].values) == np.sum(qtr2.data["dep"].values) + + +def test_overwrite_quadtree_nc(tmpdir): + ncfile = join(TESTDATADIR, "sfincs_test_quadtree", "sfincs.nc") + nc_copy = join(str(tmpdir), "sfincs.nc") + + # Create file + copy + shutil.copy(ncfile, nc_copy) + + # Open the copy with xu_open_dataset + # This opens the file lazily + ds = utils.xu_open_dataset(nc_copy) + + # Convert to dataset + ds = ds.ugrid.to_dataset() + + # Try to write + # NOTE this should fail because it still has lazy references to the file + try: + ds.to_netcdf(nc_copy) + except PermissionError: + pass + + # Now perform the check and lazy loading check + utils.check_exists_and_lazy(ds, nc_copy) + + # Try to overwrite the file + ds.to_netcdf(nc_copy) + + # Remove the copied file + os.remove(nc_copy)