diff --git a/.github/workflows/gha_pytest_pr.yml b/.github/workflows/gha_pytest_pr.yml index 67177f6..cc10915 100644 --- a/.github/workflows/gha_pytest_pr.yml +++ b/.github/workflows/gha_pytest_pr.yml @@ -26,4 +26,4 @@ jobs: - name: pytesting shell: bash -l {0} - run: pytest \ No newline at end of file + run: pytest --location opendap \ No newline at end of file diff --git a/.github/workflows/gha_pytest_push.yml b/.github/workflows/gha_pytest_push.yml index d56e966..c100dcf 100644 --- a/.github/workflows/gha_pytest_push.yml +++ b/.github/workflows/gha_pytest_push.yml @@ -26,4 +26,4 @@ jobs: - name: pytesting shell: bash -l {0} - run: pytest \ No newline at end of file + run: pytest --location opendap \ No newline at end of file diff --git a/mom6/mom6_module/mom6_io.py b/mom6/mom6_module/mom6_io.py index 0c14256..4e5fc30 100644 --- a/mom6/mom6_module/mom6_io.py +++ b/mom6/mom6_module/mom6_io.py @@ -195,21 +195,24 @@ def get_all(self) -> xr.Dataset: raise OSError('for raw grid please input the path to grid file') else: ds_static = MOM6Static.get_grid(self.static_relative_dir) + # setup chuck + io_chunk = {} elif self.source == 'opendap': file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog() for file in file_list: var_flag = 'static' in file if var_flag : ds_static = xr.open_dataset(file) + io_chunk = {'init': 4,'member':1,'lead':-1} - file_read = [file for file in file_list if self.var in file] + file_read = [file for file in file_list if f'{self.var}_' in file] # merge the static field with the variables ds = xr.open_mfdataset( file_read, combine='nested', concat_dim='init', - chunks={'init': 4,'member':1,'lead':-1} + chunks=io_chunk ).sortby('init') ds = xr.merge([ds_static,ds]) # ds = ds.isel(init=slice(1,None)) # exclude the 1980 empty field due to merge @@ -232,14 +235,16 @@ def get_all(self) -> xr.Dataset: else: mom6_dir = os.path.join(DATA_PATH,self.data_relative_dir) file_list = glob.glob(f'{mom6_dir}/*.nc') + io_chunk = {} elif self.source == 'opendap': file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog() + io_chunk = {'init': 1,'member':1,'lead':-1} - file_read = [file for file in file_list if self.var in file] + file_read = [file for file in file_list if f'{self.var}_' in file] ds = xr.open_mfdataset( file_read,combine='nested', concat_dim='init', - chunks={'init': 1,'member':1,'lead':1} + chunks=io_chunk ).sortby('init') # test if accident read raw file @@ -288,15 +293,17 @@ def get_tercile( raise OSError('for raw grid please input the path to grid file') else: ds_static = MOM6Static.get_grid(self.static_relative_dir) + io_chunk = {} elif self.source == 'opendap': file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog() for file in file_list: var_flag = 'static' in file if var_flag : ds_static = xr.open_dataset(file) + io_chunk = {'init': 4,'member':1,'lead':-1} # refine based on var name - file_read = [file for file in file_list if self.var in file] + file_read = [file for file in file_list if f'{self.var}_' in file] # refine based on region if average_type == 'grid': file_read = [file for file in file_read if '.region.' not in file] @@ -308,7 +315,7 @@ def get_tercile( file_read, combine='nested', concat_dim='init', - chunks={'init': 4,'member':1,'lead':-1} + chunks=io_chunk ).sortby('init') ds = xr.merge([ds_static,ds]) # ds = ds.isel(init=slice(1,None)) # exclude the 1980 empty field due to merge @@ -331,10 +338,12 @@ def get_tercile( else: mom6_dir = os.path.join(DATA_PATH,self.tercile_relative_dir) file_list = glob.glob(f'{mom6_dir}/*.nc') + io_chunk = {} elif self.source == 'opendap': file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog() + io_chunk = {'init': 4,'member':1,'lead':-1} - file_read = [file for file in file_list if self.var in file] + file_read = [file for file in file_list if f'{self.var}_' in file] # refine based on region if average_type == 'grid': @@ -345,7 +354,7 @@ def get_tercile( ds = xr.open_mfdataset( file_read,combine='nested', concat_dim='init', - chunks={'init': 1,'member':1,'lead':1} + chunks=io_chunk ).sortby('init') # test if accident read raw file @@ -538,20 +547,22 @@ def get_all(self) -> xr.Dataset: raise IOError('for raw grid please input the path to grid file') else: ds_static = MOM6Static.get_grid(self.static_relative_dir) + io_chunk = {} elif self.source == 'opendap': file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog() for file in file_list: var_flag = 'static' in file if var_flag : ds_static = xr.open_dataset(file) + io_chunk = {'time': 100} - file_read = [file for file in file_list if self.var in file] + file_read = [file for file in file_list if f'.{self.var}.' in file] # merge the static field with the variables ds = xr.open_mfdataset( file_read,combine='nested', concat_dim='time', - chunks={'time': 100} + chunks=io_chunk ).sortby('time') ds = xr.merge([ds_static,ds]) ds = ds.isel(time=slice(1,None)) # exclude the 1980 empty field due to merge @@ -577,7 +588,7 @@ def get_all(self) -> xr.Dataset: elif self.source == 'opendap': file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog() - file_read = [file for file in file_list if self.var in file] + file_read = [file for file in file_list if f'.{self.var}.' in file] ds = xr.open_mfdataset( file_read, combine='nested', diff --git a/mom6/notebook/gulf_stream_index.ipynb b/mom6/notebook/gulf_stream_index.ipynb index f7dab4e..6e30607 100644 --- a/mom6/notebook/gulf_stream_index.ipynb +++ b/mom6/notebook/gulf_stream_index.ipynb @@ -17,13 +17,13 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "48e504ff-196d-4f7e-8226-cf3265021e6e", "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", - "from mom6.mom6_module import mom6_process as mp" + "from mom6.mom6_module.mom6_io import MOM6Historical" ] }, { @@ -36,17 +36,23 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "b73f9d9b-b0fe-453c-a702-02041fcead2a", "metadata": {}, "outputs": [], "source": [ - "ds = mp.MOM6Historical.get_mom6_all('ssh',grid='raw')" + "ds = MOM6Historical(\n", + " var = 'ssh',\n", + " data_relative_dir = 'hist_run/',\n", + " static_relative_dir = 'static/',\n", + " grid = 'raw',\n", + " source = 'local'\n", + ").get_all()" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "408d6f27", "metadata": {}, "outputs": [ @@ -441,21 +447,21 @@ " wet_c (yq, xq) float32 3MB ...\n", " ... ...\n", " areacello_bu (yq, xq) float32 3MB ...\n", - " average_DT (time) timedelta64[ns] 3kB dask.array<chunksize=(100,), meta=np.ndarray>\n", - " average_T1 (time) datetime64[ns] 3kB dask.array<chunksize=(100,), meta=np.ndarray>\n", - " average_T2 (time) datetime64[ns] 3kB dask.array<chunksize=(100,), meta=np.ndarray>\n", - " ssh (time, yh, xh) float32 849MB dask.array<chunksize=(100, 225, 206), meta=np.ndarray>\n", - " time_bnds (time, nv) datetime64[ns] 5kB dask.array<chunksize=(100, 2), meta=np.ndarray>\n", + " average_DT (time) timedelta64[ns] 3kB dask.array<chunksize=(324,), meta=np.ndarray>\n", + " average_T1 (time) datetime64[ns] 3kB dask.array<chunksize=(324,), meta=np.ndarray>\n", + " average_T2 (time) datetime64[ns] 3kB dask.array<chunksize=(324,), meta=np.ndarray>\n", + " ssh (time, yh, xh) float32 849MB dask.array<chunksize=(22, 225, 206), meta=np.ndarray>\n", + " time_bnds (time, nv) datetime64[ns] 5kB dask.array<chunksize=(324, 2), meta=np.ndarray>\n", "Attributes:\n", " NumFilesInSet: 1\n", " title: NWA12_MOM6_v1.0\n", " grid_type: regular\n", - " grid_tile: N/A