Skip to content

Commit

Permalink
Overwriting files that were lazily loaded results in permission error…
Browse files Browse the repository at this point in the history
…s. Loading the data first solves that problem.
  • Loading branch information
roeldegoede committed Jan 28, 2025
1 parent 3849deb commit 5d88339
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 1 deletion.
4 changes: 3 additions & 1 deletion hydromt_sfincs/quadtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import xugrid as xu
from pyproj import CRS, Transformer

from hydromt_sfincs.utils import xu_open_dataset
from hydromt_sfincs.utils import xu_open_dataset, check_exists_and_lazy

# optional dependency
try:
Expand Down Expand Up @@ -125,6 +125,8 @@ def write(self, file_name: Union[str, Path] = "sfincs.nc", version: int = 0):
)

ds.attrs = attrs
# before writing, check if the file already exists while data is still lazily loaded
check_exists_and_lazy(ds, file_name)
ds.to_netcdf(file_name)

def map_overlay(self, file_name, xlim=None, ylim=None, color="black", width=800):
Expand Down
2 changes: 2 additions & 0 deletions hydromt_sfincs/sfincs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3337,6 +3337,8 @@ def write_forcing(self, data_vars: Union[List, str] = None, fmt: str = "%7.2f"):
self.write_vector(variables=f"forcing.{list(rename.keys())[0]}")
# write 2D gridded timeseries
else:
# before writing, check if the file already exists while data is still lazily loaded
utils.check_exists_and_lazy(ds, fn)
ds.to_netcdf(fn, encoding=encoding)

def read_states(self):
Expand Down
2 changes: 2 additions & 0 deletions hydromt_sfincs/subgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,8 @@ def write(self, file_name):
# fix names to match SFINCS convention
# ds = ds.rename_vars({"uv_navg": "uv_navg_w", "uv_ffit": "uv_fnfit"})

# before writing, check if the file already exists while data is still lazily loaded
utils.check_exists_and_lazy(ds, file_name)
ds.to_netcdf(file_name)


Expand Down
29 changes: 29 additions & 0 deletions hydromt_sfincs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import copy
import io
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Tuple, Union
Expand Down Expand Up @@ -53,6 +54,8 @@
"rotated_grid",
"build_overviews",
"find_uv_indices",
"xu_open_dataset",
"check_exists_and_lazy",
]

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -1338,3 +1341,29 @@ def xu_open_dataset(*args, **kwargs):
"""
with xr.open_dataset(*args, **kwargs) as ds:
return xu.UgridDataset(ds)


def check_exists_and_lazy(ds, file_name):
"""If a netcdf file is read lazily, the file can not be overwritten.
This function checks whether the file already exists, if so, it checks
if the data is lazily loaded. If so, data should be loaded before writing.
Parameters
----------
ds : xarray.Dataset, xu.UgridDataset
The dataset to be written to a netcdf file.
file_name : str
The path to the netcdf file.
"""
if not os.path.exists(file_name):
return

# Check for lazy loading
lazy_vars = [not data_array._in_memory for data_array in ds.data_vars.values()]

# if all(lazy_vars):
# return # All variables are lazy-loaded, skip writing?

if any(lazy_vars):
ds.load() # Some variables are lazy-loaded, load them into memory
return
34 changes: 34 additions & 0 deletions tests/test_quadtree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from os.path import join, dirname, abspath
import numpy as np
import os
from pyproj import CRS
import shutil

from hydromt_sfincs import utils
from hydromt_sfincs.quadtree import QuadtreeGrid

TESTDATADIR = join(dirname(abspath(__file__)), "data")
Expand Down Expand Up @@ -35,3 +38,34 @@ def test_quadtree_io(tmpdir):
assert np.sum(qtr2.data["msk"].values) == 4298
# assert the dep variable is the same
assert np.sum(qtr.data["dep"].values) == np.sum(qtr2.data["dep"].values)


def test_overwrite_quadtree_nc(tmpdir):
ncfile = join(TESTDATADIR, "sfincs_test_quadtree", "sfincs.nc")
nc_copy = join(str(tmpdir), "sfincs.nc")

# Create file + copy
shutil.copy(ncfile, nc_copy)

# Open the copy with xu_open_dataset
# This opens the file lazily
ds = utils.xu_open_dataset(nc_copy)

# Convert to dataset
ds = ds.ugrid.to_dataset()

# Try to write
# NOTE this should fail because it still has lazy references to the file
try:
ds.to_netcdf(nc_copy)
except PermissionError:
pass

# Now perform the check and lazy loading check
utils.check_exists_and_lazy(ds, nc_copy)

# Try to overwrite the file
ds.to_netcdf(nc_copy)

# Remove the copied file
os.remove(nc_copy)

0 comments on commit 5d88339

Please sign in to comment.