From 6cb0abd5a26a8bd4e8ae5e7efc89ed5b0bff7f0e Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Tue, 28 Jan 2025 13:09:00 +0100 Subject: [PATCH 01/34] Rewrite data set creation routine --- pyvisgen/simulation/data_set.py | 418 ++++++++++++++++++++++++++++- pyvisgen/simulation/observation.py | 51 ++-- pyvisgen/utils/data.py | 4 +- 3 files changed, 434 insertions(+), 39 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index e75dcdb..f0d464a 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -7,7 +7,7 @@ from astropy import units as un from astropy.coordinates import AltAz, Angle, EarthLocation, SkyCoord from astropy.time import Time -from tqdm import tqdm +from tqdm.autonotebook import tqdm import pyvisgen.fits.writer as writer import pyvisgen.layouts.layouts as layouts @@ -17,6 +17,422 @@ from pyvisgen.utils.data import load_bundles, open_bundles +class SimulateDataSet: + def __init__(self): + pass + + @classmethod + def from_config( + cls, + config: str | Path, + slurm=False, + job_id=None, + n=None, + image_key="y", + ) -> None: + """ + Parameters + ---------- + config : str or Path + path to config file + slurm : bool + True, if slurm is used + job_id : int + job_id, given by slurm + n : int + running index + """ + cls = cls() + cls.job_id = job_id + cls.n = n + cls.key = image_key + + cls.conf = read_data_set_conf(config) + + cls.out_path = Path(cls.conf["out_path_fits"]) + cls.out_path.mkdir(parents=True, exist_ok=True) + + cls.data = load_bundles(cls.conf["in_path"]) + + if slurm: + cls._run_slurm() + else: + cls._run() + + return cls + + @classmethod + def from_params( + cls, + images, + ra, + dec, + layout, + img_size, + fov, + int_time, + scan_start, + scan_duration, + num_scans, + scan_sep, + ref_freq, + freq_offsets, + bandwidths, + *, + corrupted=False, + noisy=0, + sensitivty_cut=1e-6, + mode="full", + grid_size=128, + grid_fov=100, + amp_phase=False, + num_test_images=500, + bundle_size=100, + train_valid_split=0.2, + seed=42, + device="cuda", + input_path="skies/", + out_path_fits="build/uvfits", + out_path_gridded="build/gridded", + slurm=False, + job_id=None, + n=None, + ) -> None: + """ """ + cls.job_id = job_id + cls.n = n + + cls.conf = { + "mode": mode, + "device": device, + "seed": seed, + "layout": (layout,), + "img_size": (img_size,), + "fov_center_ra": ([ra, ra],), + "fov_center_dec": ([dec, dec],), + "fov_size": fov, + "corr_int_time": int_time, + "scan_start": scan_start, + "scan_duration": scan_duration, + "num_scans": num_scans, + "scan_separation": scan_sep, + "ref_frequency": ref_freq, + "frequency_offsets": freq_offsets, + "bandwidths": bandwidths, + "corrupted": corrupted, + "noisy": noisy, + "sensitivty_cut": sensitivty_cut, + "num_test_images": num_test_images, + "bundle_size": bundle_size, + "train_valid_split": train_valid_split, + "grid_size": grid_size, + "grid_fov": grid_fov, + "amp_phase": amp_phase, + "in_path": input_path, + "out_path_fits": out_path_fits, + "out_path_gridded": out_path_gridded, + } + + cls.out_path = Path(cls.conf["out_path_fits"]) + + if not cls.out_path.isdir(): + cls.out_path.mkdir(parents=True, exist_ok=True) + + cls.data = load_bundles(cls.conf["in_path"]) + + if slurm: + cls._run_slurm() + else: + cls._run() + + def _run_slurm(self): + """ """ + job_id = int(self.job_id + self.n * 500) + out = self.out_path / Path("vis_" + str(job_id) + ".fits") + + imgs_bundle = len(open_bundles(self.data[0])) + bundle = torch.div(job_id, imgs_bundle, rounding_mode="floor") + image = job_id - bundle * imgs_bundle + + SI = torch.tensor(open_bundles(self.data[bundle])[image]) + + if len(SI.shape) == 2: + SI = SI.unsqueeze(0) + + obs = self.create_observation(self.conf) + vis_data = vis_loop(obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"]) + hdu_list = writer.create_hdu_list(vis_data, obs) + hdu_list.writeto(out, overwrite=True) + + def _run(self): + """ """ + for i in tqdm(range(len(self.data))): + SIs = self.get_images(self.data, i) + + for j, SI in enumerate(tqdm(SIs)): + obs = create_observation(self.conf) + vis_data = vis_loop( + obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"] + ) + + out = self.out_path / Path("vis_" + str(j + len(SIs) * i) + ".fits") + hdu_list = writer.create_hdu_list(vis_data, obs) + hdu_list.writeto(out, overwrite=True) + + def get_images(self, bundles, i) -> torch.tensor: + """ """ + SIs = torch.tensor(open_bundles(bundles[i], key=self.key)) + + if len(SIs.shape) == 3: + SIs = SIs.unsqueeze(1) + + return SIs + + def _create_observation(self) -> Observation: + rc = create_sampling_rc(self.conf) + + dense = False + if rc["mode"] == "dense": + dense = True + + obs = Observation( + src_ra=rc["fov_center_ra"], + src_dec=rc["fov_center_dec"], + start_time=rc["scan_start"], + scan_duration=rc["scan_duration"], + num_scans=rc["num_scans"], + scan_separation=rc["scan_separation"], + integration_time=rc["corr_int_time"], + ref_frequency=rc["ref_frequency"], + frequency_offsets=rc["frequency_offsets"], + bandwidths=rc["bandwidths"], + fov=rc["fov_size"], + image_size=rc["img_size"], + array_layout=rc["layout"], + corrupted=rc["corrupted"], + device=rc["device"], + dense=dense, + sensitivity_cut=rc["sensitivity_cut"], + ) + + return obs + + def _create_sampling_rc(self): + """ + Draw sampling options and test if at least half of the + telescopes can see the source. If not, then new + parameters are drawn. + + Parameters + ---------- + conf : dict + simulation options + + Returns + ------- + dict + contains the observation parameters + """ + + global rng + + if self.conf["seed"] is not None: + rng = np.random.default_rng(self.conf["seed"]) + else: + rng = np.random.default_rng() + + samp_ops = draw_sampling_opts() + array_layout = layouts.get_array_layout(self.conf["layout"][0]) + half_telescopes = array_layout.x.shape[0] // 2 + + while test_opts(samp_ops) <= half_telescopes: + samp_ops = draw_sampling_opts(self.conf) + + return samp_ops + + def draw_sampling_opts(self): + """ + Draw observation options from given intervals. + + Parameters + ---------- + conf : dict + simulation options + + Returns + ------- + dict + contains randomly drawn observation options + """ + + if "rng" not in globals(): + global rng + rng = np.random.default_rng(self.conf["seed"]) + + angles_ra = np.arange( + self.conf["fov_center_ra"][0][0], + self.conf["fov_center_ra"][0][1], + step=0.1, + ) + angles_dec = np.arange( + self.conf["fov_center_dec"][0][0], + self.conf["fov_center_dec"][0][1], + step=0.1, + ) + + fov_center_ra = rng.choice(angles_ra) + fov_center_dec = rng.choice(angles_dec) + + start_time_l = datetime.strptime( + self.conf["scan_start"][0], "%d-%m-%Y %H:%M:%S" + ) + start_time_h = datetime.strptime( + self.conf["scan_start"][1], "%d-%m-%Y %H:%M:%S" + ) + start_times = pd.date_range( + start_time_l, + start_time_h, + freq="1h", + ).strftime("%d-%m-%Y %H:%M:%S") + + scan_start = rng.choice( + [datetime.strptime(time, "%d-%m-%Y %H:%M:%S") for time in start_times] + ) + scan_duration = int( + rng.integers(self.conf["scan_duration"][0], self.conf["scan_duration"][1]) + ) + + num_scans = int( + rng.integers(self.conf["num_scans"][0], self.conf["num_scans"][1]) + ) + + # TODO: is this really necessary? + opts = np.array( + [ + self.conf["mode"], + self.conf["layout"][0], + self.conf["img_size"][0], + fov_center_ra, + fov_center_dec, + self.conf["fov_size"], + self.conf["corr_int_time"], + scan_start, + scan_duration, + num_scans, + self.conf["scan_separation"], + self.conf["ref_frequency"], + self.conf["frequency_offsets"], + self.conf["bandwidths"], + self.conf["corrupted"], + self.conf["device"], + self.conf["sensitivty_cut"], + ], + dtype="object", + ) + + samp_ops = { + "mode": opts[0], + "layout": opts[1], + "img_size": opts[2], + "fov_center_ra": opts[3], + "fov_center_dec": opts[4], + "fov_size": opts[5], + "corr_int_time": opts[6], + "scan_start": opts[7], + "scan_duration": opts[8], + "num_scans": opts[9], + "scan_separation": opts[10], + "ref_frequency": opts[11], + "frequency_offsets": opts[12], + "bandwidths": opts[13], + "corrupted": opts[14], + "device": opts[15], + "sensitivity_cut": opts[16], + } + return samp_ops + + def test_opts(self, rc): + """ + Compute the number of telescopes that can observe the source given + certain randomly drawn parameters. + + Parameters + ---------- + rc : dict + randomly drawn observational parameters + + Returns + ------- + + """ + array_layout = layouts.get_array_layout(rc["layout"]) + + src_crd = SkyCoord( + ra=rc["fov_center_ra"], dec=rc["fov_center_dec"], unit=(un.deg, un.deg) + ) + + time = self._calc_time_steps(rc) + + _, el_st_0 = self.calc_ref_elev(src_crd, time[0], array_layout) + _, el_st_1 = self.calc_ref_elev(src_crd, time[1], array_layout) + + el_min = 15 + el_max = 85 + + active_telescopes_0 = np.sum((el_st_0 >= el_min) & (el_st_0 <= el_max)) + active_telescopes_1 = np.sum((el_st_1 >= el_min) & (el_st_1 <= el_max)) + + return min(active_telescopes_0, active_telescopes_1) + + def calc_ref_elev(self, src_crd, time, array_layout): + if time.shape == (): + time = time[None] + + # Calculate for all times + # calculate GHA, Greenwich as reference for EHT + ha_all = Angle( + [t.sidereal_time("apparent", "greenwich") - src_crd.ra for t in time] + ) + + # calculate elevations + el_st_all = src_crd.transform_to( + AltAz( + obstime=time.reshape(len(time), -1), + location=EarthLocation.from_geocentric( + np.repeat([array_layout.x], len(time), axis=0), + np.repeat([array_layout.y], len(time), axis=0), + np.repeat([array_layout.z], len(time), axis=0), + unit=un.m, + ), + ) + ).alt.degree + + assert len(ha_all.value) == len(el_st_all) + + return ha_all, el_st_all + + def _calc_time_steps(self, conf): + start_time = Time(conf["scan_start"].isoformat(), format="isot") + scan_separation = conf["scan_separation"] + num_scans = conf["num_scans"] + scan_duration = conf["scan_duration"] + int_time = conf["corr_int_time"] + + time_lst = [ + start_time + + scan_separation * i * un.second + + i * scan_duration * un.second + + j * int_time * un.second + for i in range(num_scans) + for j in range(int(scan_duration / int_time) + 1) + ] + # +1 because t_1 is the stop time of t_0 + # in order to save computing power we take one time more to complete interval + time = Time(time_lst) + + return time + + def simulate_data_set(config, slurm=False, job_id=None, n=None): """ Wrapper function for simulating visibilities. diff --git a/pyvisgen/simulation/observation.py b/pyvisgen/simulation/observation.py index 8f4ad61..5f2658e 100644 --- a/pyvisgen/simulation/observation.py +++ b/pyvisgen/simulation/observation.py @@ -6,14 +6,13 @@ import astropy.units as un import torch from astropy.constants import c -from astropy.coordinates import AltAz, Angle, EarthLocation, SkyCoord, Longitude +from astropy.coordinates import AltAz, Angle, EarthLocation, Longitude, SkyCoord from astropy.time import Time from tqdm.autonotebook import tqdm from pyvisgen.layouts import layouts from pyvisgen.simulation.array import Array - DEFAULT_POL_KWARGS = { "delta": 0, "amp_ratio": 0.5, @@ -277,20 +276,20 @@ def __init__( the simulation of polarisation. Default: `None` pol_kwargs : dict, optional Additional keyword arguments for the simulation - of polarisation. Default: `{ + of polarisation. Default: ``{ "delta": 0, "amp_ratio": 0.5, "random_state": 42, - }` + }`` field_kwargs : dict, optional Additional keyword arguments for the random polarisation field that is applied when simulating polarisation. - Default: `{ + Default: ``{ "order": [1, 1], "scale": [0, 1], "threshold": None, "random_state": 42 - }` + }`` show_progress : bool, optional If `True`, show a progress bar during the iteration over the scans. Default: False @@ -464,8 +463,9 @@ def calc_baselines(self): self.baselines.add_baseline(bas) def get_baselines(self, times): - """Calculates baselines from source coordinates and time of observation for - every antenna station in array_layout. + """Calculates baselines from source coordinates + and time of observation for every antenna station + in array_layout. Parameters ---------- @@ -583,7 +583,8 @@ def calc_ref_elev(self, time=None): ) def calc_feed_rotation(self, ha: Angle) -> Angle: - r"""Calculates feed rotation for every antenna at every time step. + r"""Calculates feed rotation for every antenna at + every time step. Notes ----- @@ -592,10 +593,12 @@ def calc_feed_rotation(self, ha: Angle) -> Angle: .. math:: - q = \atan\left(\frac{\sin h}{\cos\delta \tan\varphi - \sin\delta \cos h\right), + q = \atan\left(\frac{\sin h}{\cos\delta \tan\varphi + - \sin\delta \cos h\right), - where $h$ is the local hour angle, $\varphi$ the geographical latitude - of the observer, and $\delta$ the declination of the source. + where $h$ is the local hour angle, $\varphi$ the geographical + latitude of the observer, and $\delta$ the declination of + the source. """ # We need to create a tensor from the EarthLocation object # and save only the geographical latitude of each antenna @@ -612,30 +615,6 @@ def calc_feed_rotation(self, ha: Angle) -> Angle: return q - def calc_direction_cosines(self, ha, el_st, delta_x, delta_y, delta_z): - src_dec = torch.deg2rad(self.dec) - ha = torch.deg2rad(ha) - - u = (torch.sin(ha) * delta_x + torch.cos(ha) * delta_y).reshape(-1) - v = ( - -torch.sin(src_dec) * torch.cos(ha) * delta_x - + torch.sin(src_dec) * torch.sin(ha) * delta_y - + torch.cos(src_dec) * delta_z - ).reshape(-1) - w = ( - torch.cos(src_dec) * torch.cos(ha) * delta_x - - torch.cos(src_dec) * torch.sin(ha) * delta_y - + torch.sin(src_dec) * delta_z - ).reshape(-1) - - if not (u.shape == v.shape == w.shape): - raise ValueError( - "Expected u, v, and w to have the same shapes: " - f"{u.shape}, {v.shape}, {w.shape}" - ) - - return u, v, w - def create_rd_grid(self): """Calculates RA and Dec values for a given fov around a source position diff --git a/pyvisgen/utils/data.py b/pyvisgen/utils/data.py index adfc441..f7958b6 100644 --- a/pyvisgen/utils/data.py +++ b/pyvisgen/utils/data.py @@ -18,7 +18,7 @@ def get_bundles(path): return bundles -def open_bundles(path): +def open_bundles(path, key="y"): f = h5py.File(path, "r") - bundle_y = np.array(f["y"]) + bundle_y = np.array(f[key]) return bundle_y From 3579cfb841fd2d2825af0d5864590112b2aac44f Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Tue, 28 Jan 2025 13:41:10 +0100 Subject: [PATCH 02/34] Remove unused packages from dependencies --- pyproject.toml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5afa926..5b6e05a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,6 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ - "astroplan", "astropy<=6.1.0", "click", "h5py", @@ -36,17 +35,12 @@ dependencies = [ "jupyter", "matplotlib", "natsort", - "numexpr", "numpy", "pandas", - "pre-commit", - "pytest", - "pytest-cov", "scipy", "toma", "toml", "torch", - "torch", "tqdm", ] @@ -59,6 +53,10 @@ tests = [ "tomli", ] +dev = [ + "pre-commit", +] + docs = [ "graphviz", "ipython", From fe7c0d0219670883462f33b735a9e1110813a23f Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:48:35 +0100 Subject: [PATCH 03/34] Rename simulation script entry point --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5b6e05a..534f4d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ docs = [ ] [project.scripts] -pyvisgen_create_dataset = "pyvisgen.simulation.scripts.create_dataset:main" +pyvisgen-simulate = "pyvisgen.simulation.scripts.create_dataset:main" [tool.setuptools_scm] write_to = "pyvisgen/_version.py" From e856947f893cf72560b12fb630221fa77e3bee86 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:51:24 +0100 Subject: [PATCH 04/34] Refactor gridding module - Add new grid_vis_loop_data method that allows to easily grid data coming from the vis_loop - Move some methods to new gridding.utils submodule - Update gridding script --- pyvisgen/gridding/__init__.py | 7 +- pyvisgen/gridding/gridder.py | 251 ++++++++----------------- pyvisgen/gridding/scripts/grid_data.py | 13 ++ pyvisgen/gridding/utils.py | 154 +++++++++++++++ 4 files changed, 247 insertions(+), 178 deletions(-) create mode 100644 pyvisgen/gridding/scripts/grid_data.py create mode 100644 pyvisgen/gridding/utils.py diff --git a/pyvisgen/gridding/__init__.py b/pyvisgen/gridding/__init__.py index 9bcde62..c53f9ce 100644 --- a/pyvisgen/gridding/__init__.py +++ b/pyvisgen/gridding/__init__.py @@ -1,13 +1,11 @@ from .gridder import ( - calc_truth_fft, convert_amp_phase, convert_real_imag, - create_gridded_data_set, ducc0_gridding, grid_data, - open_data, - save_fft_pair, + grid_vis_loop_data, ) +from .utils import calc_truth_fft, create_gridded_data_set, open_data, save_fft_pair __all__ = [ "calc_truth_fft", @@ -16,6 +14,7 @@ "create_gridded_data_set", "ducc0_gridding", "grid_data", + "grid_vis_loop_data", "open_data", "save_fft_pair", ] diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py index eb6f372..d7d2ef9 100644 --- a/pyvisgen/gridding/gridder.py +++ b/pyvisgen/gridding/gridder.py @@ -1,154 +1,8 @@ -import os -from pathlib import Path - import astropy.constants as const -import h5py import numpy as np -from tqdm import tqdm +from numpy import AxisError -from pyvisgen.fits.data import fits_data from pyvisgen.gridding.alt_gridder import ms2dirty_python_fast -from pyvisgen.utils.config import read_data_set_conf -from pyvisgen.utils.data import load_bundles, open_bundles - -os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" - - -def create_gridded_data_set(config): - conf = read_data_set_conf(config) - out_path_fits = Path(conf["out_path_fits"]) - out_path = Path(conf["out_path_gridded"]) - out_path.mkdir(parents=True, exist_ok=True) - - sky_dist = load_bundles(conf["in_path"]) - fits_files = fits_data(out_path_fits) - size = len(fits_files) - print(size) - - ################### - # test - if conf["num_test_images"] > 0: - bundle_test = int(conf["num_test_images"] // conf["bundle_size"]) - size -= conf["num_test_images"] - - for i in tqdm(range(bundle_test)): - ( - uv_data_test, - freq_data_test, - gridded_data_test, - sky_dist_test, - ) = open_data(fits_files, sky_dist, conf, i) - - truth_fft_test = calc_truth_fft(sky_dist_test) - - if conf["amp_phase"]: - gridded_data_test = convert_amp_phase(gridded_data_test, sky_sim=False) - truth_amp_phase_test = convert_amp_phase(truth_fft_test, sky_sim=True) - else: - gridded_data_test = convert_real_imag(gridded_data_test, sky_sim=False) - truth_amp_phase_test = convert_real_imag(truth_fft_test, sky_sim=True) - assert gridded_data_test.shape[1] == 2 - - out = out_path / Path("samp_test" + str(i) + ".h5") - - save_fft_pair(out, gridded_data_test, truth_amp_phase_test) - # - ################### - - size_train = int(size // (1 + conf["train_valid_split"])) - size_valid = size - size_train - print(f"Training size: {size_train}, Validation size: {size_valid}") - bundle_train = int(size_train // conf["bundle_size"]) - bundle_valid = int(size_valid // conf["bundle_size"]) - - ################### - # train - for i in tqdm(range(bundle_train)): - i += bundle_test - uv_data_train, freq_data_train, gridded_data_train, sky_dist_train = open_data( - fits_files, sky_dist, conf, i - ) - - truth_fft_train = calc_truth_fft(sky_dist_train) - - if conf["amp_phase"]: - gridded_data_train = convert_amp_phase(gridded_data_train, sky_sim=False) - truth_amp_phase_train = convert_amp_phase(truth_fft_train, sky_sim=True) - else: - gridded_data_train = convert_real_imag(gridded_data_train, sky_sim=False) - truth_amp_phase_train = convert_real_imag(truth_fft_train, sky_sim=True) - - out = out_path / Path("samp_train" + str(i - bundle_test) + ".h5") - - save_fft_pair(out, gridded_data_train, truth_amp_phase_train) - train_index_last = i - # - ################### - - ################### - # valid - for i in tqdm(range(bundle_valid)): - i += train_index_last - uv_data_valid, freq_data_valid, gridded_data_valid, sky_dist_valid = open_data( - fits_files, sky_dist, conf, i - ) - - truth_fft_valid = calc_truth_fft(sky_dist_valid) - - if conf["amp_phase"]: - gridded_data_valid = convert_amp_phase(gridded_data_valid, sky_sim=False) - truth_amp_phase_valid = convert_amp_phase(truth_fft_valid, sky_sim=True) - else: - gridded_data_valid = convert_real_imag(gridded_data_valid, sky_sim=False) - truth_amp_phase_valid = convert_real_imag(truth_fft_valid, sky_sim=True) - - out = out_path / Path("samp_valid" + str(i - train_index_last) + ".h5") - - save_fft_pair(out, gridded_data_valid, truth_amp_phase_valid) - # - ################### - - -def open_data(fits_files, sky_dist, conf, i): - sky_sim_bundle_size = len(open_bundles(sky_dist[0])) - uv_data = [ - fits_files.get_uv_data(n).copy() - for n in np.arange( - i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size - ) - ] - freq_data = np.array( - [ - fits_files.get_freq_data(n) - for n in np.arange( - i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size - ) - ], - dtype="object", - ) - gridded_data = np.array( - [grid_data(data, freq, conf).copy() for data, freq in zip(uv_data, freq_data)] - ) - bundle = np.floor_divide(i * sky_sim_bundle_size, sky_sim_bundle_size) - gridded_truth = np.array( - [ - open_bundles(sky_dist[bundle])[n] - for n in np.arange( - i * sky_sim_bundle_size - bundle * sky_sim_bundle_size, - (i * sky_sim_bundle_size) - + sky_sim_bundle_size - - bundle * sky_sim_bundle_size, - ) - ] - ) - return uv_data, freq_data, gridded_data, gridded_truth - - -def calc_truth_fft(sky_dist): - truth_fft = np.fft.fftshift( - np.fft.fft2(np.fft.fftshift(sky_dist, axes=(1, 2)), axes=(1, 2)), axes=(1, 2) - ) - return truth_fft def ducc0_gridding(uv_data, freq_data): @@ -227,10 +81,10 @@ def grid_data(uv_data, freq_data, conf): samps = np.array( [ - np.append(u, -u), - np.append(v, -v), + np.append(-u, u), + np.append(-v, v), np.append(real, real), - np.append(imag, -imag), + np.append(-imag, imag), ] ) # Generate Mask @@ -241,12 +95,79 @@ def grid_data(uv_data, freq_data, conf): # bins are shifted by delta/2 so that maximum in uv space matches maximum # in numpy fft - bins = ( - np.arange(start=-(N / 2) * delta, stop=(N / 2 + 1) * delta, step=delta) - - delta / 2 + bins = np.arange(start=-((N + 1) / 2) * delta, stop=(N / 2) * delta, step=delta) + + mask, *_ = np.histogram2d(samps[0], samps[1], bins=[bins, bins], density=False) + mask[mask == 0] = 1 + + mask_real, x_edges, y_edges = np.histogram2d( + samps[0], samps[1], bins=[bins, bins], weights=samps[2], density=False + ) + mask_imag, x_edges, y_edges = np.histogram2d( + samps[0], samps[1], bins=[bins, bins], weights=samps[3], density=False ) - # if len(bins) - 1 > N: - # bins = np.delete(bins, -1) + + mask_real /= mask + mask_imag /= mask + + assert mask_real.shape == (conf["grid_size"], conf["grid_size"]) + gridded_vis = np.zeros((2, N, N)) + gridded_vis[0] = mask_real + gridded_vis[1] = mask_imag + + return gridded_vis + + +def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0): + """Grid data coming from the vis_loop.""" + if vis_data.ndim != 7: + if vis_data.ndim == 3: + vis_data = np.stack( + [vis_data.real, vis_data.imag, np.ones(vis_data.shape)], + axis=3, + )[:, None, None, :, None, ...] + else: + raise ValueError("Expected vis_data to be of dimension 3 or 7") + + if isinstance(freq_bands, float): + freq_bands = [freq_bands] + + u = np.array([uu * np.array(freq) for freq in freq_bands]).ravel() + v = np.array([vv * np.array(freq) for freq in freq_bands]).ravel() + + try: + stokes_vis = ( + np.squeeze( + (vis_data[..., stokes_comp, 0] + 1j * vis_data[..., stokes_comp, 1]) + ) + .swapaxes(0, 1) + .ravel() + ) + except AxisError: + stokes_vis = np.squeeze( + (vis_data[..., stokes_comp, 0] + 1j * vis_data[..., stokes_comp, 1]) + ).ravel() + + real = stokes_vis.real + imag = stokes_vis.imag + + samps = np.array( + [ + np.concatenate([-u, u]), + np.concatenate([-v, v]), + np.concatenate([real, real]), + np.concatenate([-imag, imag]), + ] + ) + # Generate Mask + N = conf["grid_size"] # image size + fov = conf["grid_fov"] * np.pi / (3600 * 180) + + delta = 1 / fov + + # bins are shifted by delta/2 so that maximum in uv space matches maximum + # in numpy fft + bins = np.arange(start=-((N + 1) / 2) * delta, stop=(N / 2) * delta, step=delta) mask, *_ = np.histogram2d(samps[0], samps[1], bins=[bins, bins], density=False) mask[mask == 0] = 1 @@ -265,6 +186,7 @@ def grid_data(uv_data, freq_data, conf): gridded_vis = np.zeros((2, N, N)) gridded_vis[0] = mask_real gridded_vis[1] = mask_imag + return gridded_vis @@ -293,22 +215,3 @@ def convert_real_imag(data, sky_sim=False): data = np.stack((real, imag), axis=1) return data - - -def save_fft_pair(path, x, y, name_x="x", name_y="y"): - """ - write fft_pairs created in second analysis step to h5 file - """ - half_image = x.shape[2] // 2 - x = x[:, :, : half_image + 1, :] - y = y[:, :, : half_image + 1, :] - with h5py.File(path, "w") as hf: - hf.create_dataset(name_x, data=x) - hf.create_dataset(name_y, data=y) - hf.close() - - -if __name__ == "__main__": - create_gridded_data_set( - "/net/big-tank/POOL/projects/radio/test_rime/create_dataset.toml" - ) diff --git a/pyvisgen/gridding/scripts/grid_data.py b/pyvisgen/gridding/scripts/grid_data.py new file mode 100644 index 0000000..8e46e7d --- /dev/null +++ b/pyvisgen/gridding/scripts/grid_data.py @@ -0,0 +1,13 @@ +import click + +from pyvisgen.gridding.utils import create_gridded_data_set + + +@click.command() +@click.option("-c" "--config", required=True, type=str, default=None) +def grid_data(config): + create_gridded_data_set(config) + + +if __name__ == "__main__": + grid_data() diff --git a/pyvisgen/gridding/utils.py b/pyvisgen/gridding/utils.py new file mode 100644 index 0000000..6aeebdd --- /dev/null +++ b/pyvisgen/gridding/utils.py @@ -0,0 +1,154 @@ +import os +from pathlib import Path + +import h5py +import numpy as np +from tqdm.autonotebook import tqdm + +from pyvisgen.fits.data import fits_data +from pyvisgen.gridding import convert_amp_phase, convert_real_imag, grid_data +from pyvisgen.utils.config import read_data_set_conf +from pyvisgen.utils.data import load_bundles, open_bundles + +os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" + + +def create_gridded_data_set(config): + conf = read_data_set_conf(config) + out_path_fits = Path(conf["out_path_fits"]) + out_path = Path(conf["out_path_gridded"]) + out_path.mkdir(parents=True, exist_ok=True) + + sky_dist = load_bundles(conf["in_path"]) + fits_files = fits_data(out_path_fits) + size = len(fits_files) + print(size) + + # test + if conf["num_test_images"] > 0: + bundle_test = int(conf["num_test_images"] // conf["bundle_size"]) + size -= conf["num_test_images"] + + for i in tqdm(range(bundle_test)): + ( + uv_data_test, + freq_data_test, + gridded_data_test, + sky_dist_test, + ) = open_data(fits_files, sky_dist, conf, i) + + truth_fft_test = calc_truth_fft(sky_dist_test) + + if conf["amp_phase"]: + gridded_data_test = convert_amp_phase(gridded_data_test, sky_sim=False) + truth_amp_phase_test = convert_amp_phase(truth_fft_test, sky_sim=True) + else: + gridded_data_test = convert_real_imag(gridded_data_test, sky_sim=False) + truth_amp_phase_test = convert_real_imag(truth_fft_test, sky_sim=True) + assert gridded_data_test.shape[1] == 2 + + out = out_path / Path("samp_test" + str(i) + ".h5") + + save_fft_pair(out, gridded_data_test, truth_amp_phase_test) + + size_train = int(size // (1 + conf["train_valid_split"])) + size_valid = size - size_train + print(f"Training size: {size_train}, Validation size: {size_valid}") + bundle_train = int(size_train // conf["bundle_size"]) + bundle_valid = int(size_valid // conf["bundle_size"]) + + # train + for i in tqdm(range(bundle_train)): + i += bundle_test + uv_data_train, freq_data_train, gridded_data_train, sky_dist_train = open_data( + fits_files, sky_dist, conf, i + ) + + truth_fft_train = calc_truth_fft(sky_dist_train) + + if conf["amp_phase"]: + gridded_data_train = convert_amp_phase(gridded_data_train, sky_sim=False) + truth_amp_phase_train = convert_amp_phase(truth_fft_train, sky_sim=True) + else: + gridded_data_train = convert_real_imag(gridded_data_train, sky_sim=False) + truth_amp_phase_train = convert_real_imag(truth_fft_train, sky_sim=True) + + out = out_path / Path("samp_train" + str(i - bundle_test) + ".h5") + + save_fft_pair(out, gridded_data_train, truth_amp_phase_train) + train_index_last = i + + # valid + for i in tqdm(range(bundle_valid)): + i += train_index_last + uv_data_valid, freq_data_valid, gridded_data_valid, sky_dist_valid = open_data( + fits_files, sky_dist, conf, i + ) + + truth_fft_valid = calc_truth_fft(sky_dist_valid) + + if conf["amp_phase"]: + gridded_data_valid = convert_amp_phase(gridded_data_valid, sky_sim=False) + truth_amp_phase_valid = convert_amp_phase(truth_fft_valid, sky_sim=True) + else: + gridded_data_valid = convert_real_imag(gridded_data_valid, sky_sim=False) + truth_amp_phase_valid = convert_real_imag(truth_fft_valid, sky_sim=True) + + out = out_path / Path("samp_valid" + str(i - train_index_last) + ".h5") + + save_fft_pair(out, gridded_data_valid, truth_amp_phase_valid) + + +def open_data(fits_files, sky_dist, conf, i): + sky_sim_bundle_size = len(open_bundles(sky_dist[0])) + uv_data = [ + fits_files.get_uv_data(n).copy() + for n in np.arange( + i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size + ) + ] + freq_data = np.array( + [ + fits_files.get_freq_data(n) + for n in np.arange( + i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size + ) + ], + dtype="object", + ) + gridded_data = np.array( + [grid_data(data, freq, conf).copy() for data, freq in zip(uv_data, freq_data)] + ) + bundle = np.floor_divide(i * sky_sim_bundle_size, sky_sim_bundle_size) + gridded_truth = np.array( + [ + open_bundles(sky_dist[bundle])[n] + for n in np.arange( + i * sky_sim_bundle_size - bundle * sky_sim_bundle_size, + (i * sky_sim_bundle_size) + + sky_sim_bundle_size + - bundle * sky_sim_bundle_size, + ) + ] + ) + return uv_data, freq_data, gridded_data, gridded_truth + + +def save_fft_pair(path, x, y, name_x="x", name_y="y"): + """ + write fft_pairs created in second analysis step to h5 file + """ + half_image = x.shape[2] // 2 + x = x[:, :, : half_image + 1, :] + y = y[:, :, : half_image + 1, :] + with h5py.File(path, "w") as hf: + hf.create_dataset(name_x, data=x) + hf.create_dataset(name_y, data=y) + hf.close() + + +def calc_truth_fft(sky_dist): + truth_fft = np.fft.fftshift( + np.fft.fft2(np.fft.fftshift(sky_dist, axes=(1, 2)), axes=(1, 2)), axes=(1, 2) + ) + return truth_fft From eb1b4042e115931ce9d1863e5fa865d7bf9e4b40 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:56:24 +0100 Subject: [PATCH 05/34] Catch case where times would be a 0dim array in Observation.get_baselines() --- pyvisgen/simulation/observation.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pyvisgen/simulation/observation.py b/pyvisgen/simulation/observation.py index 5f2658e..7d2a851 100644 --- a/pyvisgen/simulation/observation.py +++ b/pyvisgen/simulation/observation.py @@ -388,8 +388,9 @@ def calc_time_steps(self): for i in range(self.num_scans) for j in range(int(self.scan_duration / self.int_time) + 1) ] - # +1 because t_1 is the stop time of t_0 - # in order to save computing power we take one time more to complete interval + # +1 because t_1 is the stop time of t_0. + # In order to save computing power we take + # one time more to complete interval time = Time(time_lst) return time, time.mjd * (60 * 60 * 24) @@ -477,6 +478,10 @@ def get_baselines(self, times): dataclass object baselines between telescopes with visibility flags """ + # catch rare case where dimension of times is 0 + if times.ndim == 0: + times = Time([times]) + # calculate GHA, local HA, and station elevation for all times. GHA, ha_local, el_st_all = self.calc_ref_elev(time=times) From 015bdde97d36acdae8d618e45887f0fae595f236 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:57:11 +0100 Subject: [PATCH 06/34] Completely rewrite data set creation routine in pyvisgen.simulation.data_set --- pyvisgen/simulation/data_set.py | 902 +++++++++++--------------------- 1 file changed, 298 insertions(+), 604 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index f0d464a..b6afb71 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -1,21 +1,28 @@ -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path import numpy as np -import pandas as pd import torch from astropy import units as un -from astropy.coordinates import AltAz, Angle, EarthLocation, SkyCoord +from astropy.coordinates import AltAz, EarthLocation, SkyCoord from astropy.time import Time from tqdm.autonotebook import tqdm -import pyvisgen.fits.writer as writer import pyvisgen.layouts.layouts as layouts +from pyvisgen.gridding import ( + calc_truth_fft, + convert_amp_phase, + convert_real_imag, + grid_vis_loop_data, + save_fft_pair, +) from pyvisgen.simulation.observation import Observation from pyvisgen.simulation.visibility import vis_loop from pyvisgen.utils.config import read_data_set_conf from pyvisgen.utils.data import load_bundles, open_bundles +DATEFMT = "%d-%m-%Y %H:%M:%S" + class SimulateDataSet: def __init__(self): @@ -25,398 +32,364 @@ def __init__(self): def from_config( cls, config: str | Path, - slurm=False, - job_id=None, - n=None, - image_key="y", + /, + image_key: str = "y", + *, + grid: bool = True, + slurm: bool = False, + job_id: int | None = None, + n: int | None = None, + date_fmt: str = DATEFMT, + num_images: int | None = None, ) -> None: - """ + """Simulates data from parameters in a config file. + Parameters ---------- config : str or Path - path to config file - slurm : bool - True, if slurm is used - job_id : int - job_id, given by slurm - n : int - running index + Path to the config file. + image_key : str, optional + Key under which the true sky distributions are saved + in the HDF5 file. Default: ``'y'`` + grid : bool, optional + If ``True``, apply gridding to visibility data and + save to HDF5 files. Default: ``True`` + slurm : bool, optional + ``True``, if slurm is used, Default: ``False`` + job_id : int or None, optional + ``job_id`` given by slurm. Default: ``None`` + n : int or None, optional + Running index. Default: ``None`` + date_fmt : str, optional + Format string for datetime objects. + Default: ``'%d-%m-%Y %H:%M:%S'`` + num_images : int or None, optional + Number of combined total images in the bundles. + If not ``None``, will skip counting the images before + drawing the random parameters. Default: ``None`` """ cls = cls() + cls.key = image_key + cls.grid = grid + cls.slurm = slurm cls.job_id = job_id cls.n = n - cls.key = image_key + cls.date_fmt = date_fmt cls.conf = read_data_set_conf(config) - cls.out_path = Path(cls.conf["out_path_fits"]) - cls.out_path.mkdir(parents=True, exist_ok=True) - - cls.data = load_bundles(cls.conf["in_path"]) - - if slurm: - cls._run_slurm() + if grid: + cls.out_path = Path(cls.conf["out_path_gridded"]) else: - cls._run() + cls.out_path = Path(cls.conf["out_path_fits"]) - return cls + if not cls.out_path.is_dir(): + cls.out_path.mkdir(parents=True) - @classmethod - def from_params( - cls, - images, - ra, - dec, - layout, - img_size, - fov, - int_time, - scan_start, - scan_duration, - num_scans, - scan_sep, - ref_freq, - freq_offsets, - bandwidths, - *, - corrupted=False, - noisy=0, - sensitivty_cut=1e-6, - mode="full", - grid_size=128, - grid_fov=100, - amp_phase=False, - num_test_images=500, - bundle_size=100, - train_valid_split=0.2, - seed=42, - device="cuda", - input_path="skies/", - out_path_fits="build/uvfits", - out_path_gridded="build/gridded", - slurm=False, - job_id=None, - n=None, - ) -> None: - """ """ - cls.job_id = job_id - cls.n = n + cls.data = load_bundles(cls.conf["in_path"]) - cls.conf = { - "mode": mode, - "device": device, - "seed": seed, - "layout": (layout,), - "img_size": (img_size,), - "fov_center_ra": ([ra, ra],), - "fov_center_dec": ([dec, dec],), - "fov_size": fov, - "corr_int_time": int_time, - "scan_start": scan_start, - "scan_duration": scan_duration, - "num_scans": num_scans, - "scan_separation": scan_sep, - "ref_frequency": ref_freq, - "frequency_offsets": freq_offsets, - "bandwidths": bandwidths, - "corrupted": corrupted, - "noisy": noisy, - "sensitivty_cut": sensitivty_cut, - "num_test_images": num_test_images, - "bundle_size": bundle_size, - "train_valid_split": train_valid_split, - "grid_size": grid_size, - "grid_fov": grid_fov, - "amp_phase": amp_phase, - "in_path": input_path, - "out_path_fits": out_path_fits, - "out_path_gridded": out_path_gridded, - } - - cls.out_path = Path(cls.conf["out_path_fits"]) - - if not cls.out_path.isdir(): - cls.out_path.mkdir(parents=True, exist_ok=True) + if not num_images: + len_data = tqdm( + range(len(cls.data)), + position=0, + leave=False, + desc="Counting Number of Images", + colour="#754fc9", + ) + # get number of random parameter draws from number of images in data + num_draws = np.sum([len(cls.get_images(i)) for i in len_data]) - cls.data = load_bundles(cls.conf["in_path"]) + # draw parameters beforehand, i.e. outside the simulation loop + cls.create_sampling_rc(num_draws) if slurm: cls._run_slurm() else: cls._run() - def _run_slurm(self): - """ """ - job_id = int(self.job_id + self.n * 500) - out = self.out_path / Path("vis_" + str(job_id) + ".fits") + return cls - imgs_bundle = len(open_bundles(self.data[0])) - bundle = torch.div(job_id, imgs_bundle, rounding_mode="floor") - image = job_id - bundle * imgs_bundle + def _run(self): + data = tqdm( + range(len(self.data)), + position=0, + desc="Processing Bundles", + colour="#52ba66", + ) - SI = torch.tensor(open_bundles(self.data[bundle])[image]) + samp_ops_idx = 0 + for i in data: + SIs = self.get_images(i) + truth_fft = calc_truth_fft(SIs) + + SIs = tqdm( + SIs, + position=1, + desc=f"Bundle {i}", + colour="#595cbd", + leave=False, + ) - if len(SI.shape) == 2: - SI = SI.unsqueeze(0) + sim_data = [] + for SI in SIs: + obs = self.create_observation(samp_ops_idx) + vis = vis_loop( + obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"] + ) - obs = self.create_observation(self.conf) - vis_data = vis_loop(obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"]) - hdu_list = writer.create_hdu_list(vis_data, obs) - hdu_list.writeto(out, overwrite=True) + if self.grid: + sim_data.append( + grid_vis_loop_data( + vis.u, vis.v, vis.get_values(), self.freq_bands, self.conf + ) + ) + samp_ops_idx += 1 + + sim_data = np.array(sim_data) + + if self.grid: + if self.conf["amp_phase"]: + sim_data = convert_amp_phase(sim_data, sky_sim=False) + truth_fft = convert_amp_phase(truth_fft, sky_sim=True) + else: + sim_data = convert_real_imag(sim_data, sky_sim=False) + truth_fft = convert_real_imag(truth_fft, sky_sim=True) + + out = self.out_path / Path( + f"samp_{self.conf['file_prefix']}_" + str(i) + ".h5" + ) - def _run(self): - """ """ - for i in tqdm(range(len(self.data))): - SIs = self.get_images(self.data, i) + save_fft_pair(out, sim_data, truth_fft) - for j, SI in enumerate(tqdm(SIs)): - obs = create_observation(self.conf) - vis_data = vis_loop( - obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"] - ) + path_msg = self.conf["out_path_gridded"] + else: + path_msg = self.conf["out_path_fits"] + + print( + f"Successfully simulated and saved {samp_ops_idx} images " + f"to '{path_msg}'!" + ) - out = self.out_path / Path("vis_" + str(j + len(SIs) * i) + ".fits") - hdu_list = writer.create_hdu_list(vis_data, obs) - hdu_list.writeto(out, overwrite=True) + def _run_slurm(): + raise NotImplementedError("Not implememented yet!") - def get_images(self, bundles, i) -> torch.tensor: - """ """ - SIs = torch.tensor(open_bundles(bundles[i], key=self.key)) + def get_images(self, i): + SIs = torch.tensor(open_bundles(self.data[i], key=self.key)) if len(SIs.shape) == 3: SIs = SIs.unsqueeze(1) return SIs - def _create_observation(self) -> Observation: - rc = create_sampling_rc(self.conf) + def create_observation(self, i): + rc = self.samp_ops + + # put the respective values inside the + # pol_kwargs and field_kwargs dicts. + pol_kwargs = dict( + delta=rc["delta"][i], + amp_ratio=rc["amp_ratio"][i], + random_state=self.conf["seed"], + ) + field_kwargs = dict( + order=rc["order"][i], + scale=rc["scale"][i], + threshold=rc["threshold"], + random_state=self.conf["seed"], + ) dense = False - if rc["mode"] == "dense": + if self.conf["mode"] == "dense": dense = True obs = Observation( - src_ra=rc["fov_center_ra"], - src_dec=rc["fov_center_dec"], - start_time=rc["scan_start"], - scan_duration=rc["scan_duration"], - num_scans=rc["num_scans"], - scan_separation=rc["scan_separation"], - integration_time=rc["corr_int_time"], - ref_frequency=rc["ref_frequency"], - frequency_offsets=rc["frequency_offsets"], - bandwidths=rc["bandwidths"], - fov=rc["fov_size"], - image_size=rc["img_size"], - array_layout=rc["layout"], - corrupted=rc["corrupted"], - device=rc["device"], + **self.samp_ops_const, + src_ra=rc["src_ra"][i], + src_dec=rc["src_dec"][i], + start_time=rc["start_time"][i], + scan_duration=int(rc["scan_duration"][i]), + num_scans=int(rc["num_scans"][i]), + pol_kwargs=pol_kwargs, + field_kwargs=field_kwargs, dense=dense, - sensitivity_cut=rc["sensitivity_cut"], ) return obs - def _create_sampling_rc(self): - """ - Draw sampling options and test if at least half of the - telescopes can see the source. If not, then new - parameters are drawn. - - Parameters - ---------- - conf : dict - simulation options - - Returns - ------- - dict - contains the observation parameters - """ - - global rng - - if self.conf["seed"] is not None: - rng = np.random.default_rng(self.conf["seed"]) + def create_sampling_rc(self, size): + if self.conf["seed"]: + self.rng = np.random.default_rng(self.conf["seed"]) else: - rng = np.random.default_rng() - - samp_ops = draw_sampling_opts() - array_layout = layouts.get_array_layout(self.conf["layout"][0]) - half_telescopes = array_layout.x.shape[0] // 2 - - while test_opts(samp_ops) <= half_telescopes: - samp_ops = draw_sampling_opts(self.conf) - - return samp_ops - - def draw_sampling_opts(self): - """ - Draw observation options from given intervals. + self.rng = np.random.default_rng() - Parameters - ---------- - conf : dict - simulation options - - Returns - ------- - dict - contains randomly drawn observation options - """ - - if "rng" not in globals(): - global rng - rng = np.random.default_rng(self.conf["seed"]) + if self.conf["mode"] == "dense": + self.freq_bands = np.array(self.conf["ref_frequency"]) + else: + self.freq_bands = np.array(self.conf["ref_frequency"]) + np.array( + self.conf["frequency_offsets"] + ) - angles_ra = np.arange( - self.conf["fov_center_ra"][0][0], - self.conf["fov_center_ra"][0][1], - step=0.1, - ) - angles_dec = np.arange( - self.conf["fov_center_dec"][0][0], - self.conf["fov_center_dec"][0][1], - step=0.1, + # Split sampling options into two dicts: + # samps_ops_const is always the same, values in + # samps_ops, however, will be drawn randomly. + self.samp_ops_const = dict( + array_layout=self.conf["layout"][0], + image_size=self.conf["img_size"][0], + fov=self.conf["fov_size"], + integration_time=self.conf["corr_int_time"], + scan_separation=self.conf["scan_separation"], + ref_frequency=self.conf["ref_frequency"], + frequency_offsets=self.conf["frequency_offsets"], + bandwidths=self.conf["bandwidths"], + corrupted=self.conf["corrupted"], + device=self.conf["device"], + sensitivity_cut=self.conf["sensitivty_cut"], + polarisation=self.conf["polarisation"], + ) # NOTE: scan_separation and integration_time may change in the future + + # get second half of the sampling options; + # this is the randomly drawn, i.e. non-constant, part + self.samp_ops = self.draw_sampling_opts(size) + + test_idx = tqdm( + range(self.samp_ops["src_ra"].size), + position=0, + desc="Pre-drawing and testing sample parameters", + colour="#00c1a2", + leave=False, ) - fov_center_ra = rng.choice(angles_ra) - fov_center_dec = rng.choice(angles_dec) + for i in test_idx: + self.test_rand_opts(i) - start_time_l = datetime.strptime( - self.conf["scan_start"][0], "%d-%m-%Y %H:%M:%S" - ) - start_time_h = datetime.strptime( - self.conf["scan_start"][1], "%d-%m-%Y %H:%M:%S" - ) - start_times = pd.date_range( - start_time_l, - start_time_h, - freq="1h", - ).strftime("%d-%m-%Y %H:%M:%S") - - scan_start = rng.choice( - [datetime.strptime(time, "%d-%m-%Y %H:%M:%S") for time in start_times] + def draw_sampling_opts(self, size): + ra = self.rng.uniform( + self.conf["fov_center_ra"][0][0], self.conf["fov_center_ra"][0][1], size ) - scan_duration = int( - rng.integers(self.conf["scan_duration"][0], self.conf["scan_duration"][1]) + dec = self.rng.uniform( + self.conf["fov_center_dec"][0][0], self.conf["fov_center_dec"][0][1], size ) - num_scans = int( - rng.integers(self.conf["num_scans"][0], self.conf["num_scans"][1]) + start_time_l = datetime.strptime(self.conf["scan_start"][0], self.date_fmt) + start_time_h = datetime.strptime(self.conf["scan_start"][1], self.date_fmt) + start_times = np.arange(start_time_l, start_time_h, timedelta(hours=1)).astype( + datetime ) - # TODO: is this really necessary? - opts = np.array( - [ - self.conf["mode"], - self.conf["layout"][0], - self.conf["img_size"][0], - fov_center_ra, - fov_center_dec, - self.conf["fov_size"], - self.conf["corr_int_time"], - scan_start, - scan_duration, - num_scans, - self.conf["scan_separation"], - self.conf["ref_frequency"], - self.conf["frequency_offsets"], - self.conf["bandwidths"], - self.conf["corrupted"], - self.conf["device"], - self.conf["sensitivty_cut"], - ], - dtype="object", + scan_start = self.rng.choice(start_times, size) + scan_duration = self.rng.integers( + self.conf["scan_duration"][0], + self.conf["scan_duration"][1], + size, + ) + num_scans = self.rng.integers( + self.conf["num_scans"][0], self.conf["num_scans"][1], size ) - samp_ops = { - "mode": opts[0], - "layout": opts[1], - "img_size": opts[2], - "fov_center_ra": opts[3], - "fov_center_dec": opts[4], - "fov_size": opts[5], - "corr_int_time": opts[6], - "scan_start": opts[7], - "scan_duration": opts[8], - "num_scans": opts[9], - "scan_separation": opts[10], - "ref_frequency": opts[11], - "frequency_offsets": opts[12], - "bandwidths": opts[13], - "corrupted": opts[14], - "device": opts[15], - "sensitivity_cut": opts[16], - } - return samp_ops + if scan_duration.size == 1: + scan_duration = scan_duration.astype(int) - def test_opts(self, rc): - """ - Compute the number of telescopes that can observe the source given - certain randomly drawn parameters. - - Parameters - ---------- - rc : dict - randomly drawn observational parameters + if num_scans.size == 1: + num_scans = num_scans.astype(int) - Returns - ------- + # if polarisation is None, we don't need to enter the + # conditional below, so we set delta, amp_ratio, field_order, + # and field_scale to None. + delta, amp_ratio, field_order, field_scale = np.full((4, size), None) - """ - array_layout = layouts.get_array_layout(rc["layout"]) + if self.conf["polarisation"]: + if self.conf["pol_delta"]: + delta = np.repeat(self.conf["pol_delta"], size) + else: + delta = self.rng.uniform(0, 180, size) - src_crd = SkyCoord( - ra=rc["fov_center_ra"], dec=rc["fov_center_dec"], unit=(un.deg, un.deg) - ) + if self.conf["pol_amp_ratio"]: + amp_ratio = np.repeat(self.conf["pol_amp_ratio"], size) + else: + amp_ratio = self.rng.uniform(0, 1, size) - time = self._calc_time_steps(rc) + if self.conf["field_order"]: + field_order = np.repeat(self.conf["field_order"], size).reshape(-1, 2) + else: + field_order = np.repeat(self.rng.uniform(0, 1, size), 2).reshape(-1, 2) - _, el_st_0 = self.calc_ref_elev(src_crd, time[0], array_layout) - _, el_st_1 = self.calc_ref_elev(src_crd, time[1], array_layout) + if self.conf["field_scale"]: + field_scale = np.stack( + np.repeat(self.conf["field_scale"], size).reshape(2, -1), axis=1 + ) + else: + a = self.rng.uniform(0, 1 - 1e-6, size) + b = np.repeat(1, size, dtype=float) + field_scale = np.stack((a, b), axis=1) + + samp_ops = dict( + src_ra=ra, + src_dec=dec, + start_time=scan_start, + scan_duration=scan_duration, + num_scans=num_scans, + delta=delta, + amp_ratio=amp_ratio, + order=field_order, + scale=field_scale, + threshold=self.conf["field_threshold"], + ) + # NOTE: We don't need to draw random values for threshold + # as threshold=None should be suitable for almost all cases. + # However, since threshold has to be in the field_kwargs dict + # later, we need to include it here instead of inside the + # samp_ops_const dictionary. - el_min = 15 - el_max = 85 + return samp_ops - active_telescopes_0 = np.sum((el_st_0 >= el_min) & (el_st_0 <= el_max)) - active_telescopes_1 = np.sum((el_st_1 >= el_min) & (el_st_1 <= el_max)) + def test_rand_opts(self, i): + array = layouts.get_array_layout(self.samp_ops_const["array_layout"]) - return min(active_telescopes_0, active_telescopes_1) + time_steps = self.calc_time_steps(i) + src_crd = SkyCoord( + self.samp_ops["src_ra"][i], self.samp_ops["src_dec"][i], unit=un.deg + ) - def calc_ref_elev(self, src_crd, time, array_layout): - if time.shape == (): - time = time[None] + total_stations = len(array.st_num) - # Calculate for all times - # calculate GHA, Greenwich as reference for EHT - ha_all = Angle( - [t.sidereal_time("apparent", "greenwich") - src_crd.ra for t in time] + locations = EarthLocation.from_geocentric( + x=array.x, + y=array.y, + z=array.z, + unit=un.m, ) - # calculate elevations - el_st_all = src_crd.transform_to( - AltAz( - obstime=time.reshape(len(time), -1), - location=EarthLocation.from_geocentric( - np.repeat([array_layout.x], len(time), axis=0), - np.repeat([array_layout.y], len(time), axis=0), - np.repeat([array_layout.z], len(time), axis=0), - unit=un.m, - ), - ) - ).alt.degree - - assert len(ha_all.value) == len(el_st_all) + altaz_frames = AltAz(obstime=time_steps[:, None], location=locations[None]) + src_alt = src_crd.transform_to(altaz_frames).alt.degree - return ha_all, el_st_all + # check which stations can see the source for each time step + visible = np.logical_and( + array.el_low.numpy() <= src_alt, src_alt <= array.el_high.numpy() + ) - def _calc_time_steps(self, conf): - start_time = Time(conf["scan_start"].isoformat(), format="isot") - scan_separation = conf["scan_separation"] - num_scans = conf["num_scans"] - scan_duration = conf["scan_duration"] - int_time = conf["corr_int_time"] + # We want at least half of the telescopes to see the source + visible_count_per_t = visible.sum(axis=1) + visible_half = visible_count_per_t > total_stations // 2 + + # If the source is not seen by half the telescopes half of the observation time, + # we redraw the source ra and dec and scan start times, duration, + # and number of scans. Then we test again by calling this function recursively. + if visible_half.sum() < time_steps.size // 2: + redrawn_samp_ops = self.draw_sampling_opts(1) + self.samp_ops["src_ra"][i] = redrawn_samp_ops["src_ra"][0] + self.samp_ops["src_dec"][i] = redrawn_samp_ops["src_dec"][0] + self.samp_ops["start_time"][i] = redrawn_samp_ops["start_time"][0] + self.samp_ops["scan_duration"][i] = redrawn_samp_ops["scan_duration"][0] + self.samp_ops["num_scans"][i] = redrawn_samp_ops["num_scans"][0] + + self.test_rand_opts(i) + + def calc_time_steps(self, i): + start_time = Time(self.samp_ops["start_time"][i].isoformat(), format="isot") + scan_separation = self.samp_ops_const["scan_separation"] + num_scans = self.samp_ops["num_scans"][i] + scan_duration = self.samp_ops["scan_duration"][i] + int_time = self.samp_ops_const["integration_time"] time_lst = [ start_time @@ -426,288 +399,9 @@ def _calc_time_steps(self, conf): for i in range(num_scans) for j in range(int(scan_duration / int_time) + 1) ] - # +1 because t_1 is the stop time of t_0 - # in order to save computing power we take one time more to complete interval + # +1 because t_1 is the stop time of t_0. + # In order to save computing power we take + # one time more to complete interval time = Time(time_lst) return time - - -def simulate_data_set(config, slurm=False, job_id=None, n=None): - """ - Wrapper function for simulating visibilities. - Distinction between slurm and non-threaded simulation. - - Parameters - ---------- - config : toml file - path to config file - slurm : bool - True, if slurm is used - job_id : int - job_id, given by slurm - n : int - running index - - """ - conf = read_data_set_conf(config) - out_path = Path(conf["out_path_fits"]) - out_path.mkdir(parents=True, exist_ok=True) - data = load_bundles(conf["in_path"]) - - if slurm: - job_id = int(job_id + n * 500) - out = out_path / Path("vis_" + str(job_id) + ".fits") - imgs_bundle = len(open_bundles(data[0])) - bundle = torch.div(job_id, imgs_bundle, rounding_mode="floor") - image = job_id - bundle * imgs_bundle - SI = torch.tensor(open_bundles(data[bundle])[image]) - if len(SI.shape) == 2: - SI = SI.unsqueeze(0) - - obs = create_observation(conf) - vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"]) - hdu_list = writer.create_hdu_list(vis_data, obs) - hdu_list.writeto(out, overwrite=True) - - else: - for i in tqdm(range(len(data))): - SIs = get_images(data, i) - - for j, SI in enumerate(tqdm(SIs)): - obs = create_observation(conf) - vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"]) - - out = out_path / Path("vis_" + str(j + len(SIs) * i) + ".fits") - hdu_list = writer.create_hdu_list(vis_data, obs) - hdu_list.writeto(out, overwrite=True) - - -def get_images(bundles, i): - SIs = torch.tensor(open_bundles(bundles[i])) - if len(SIs.shape) == 3: - SIs = SIs.unsqueeze(1) - return SIs - - -def create_observation(conf): - rc = create_sampling_rc(conf) - dense = False - if rc["mode"] == "dense": - dense = True - - obs = Observation( - src_ra=rc["fov_center_ra"], - src_dec=rc["fov_center_dec"], - start_time=rc["scan_start"], - scan_duration=rc["scan_duration"], - num_scans=rc["num_scans"], - scan_separation=rc["scan_separation"], - integration_time=rc["corr_int_time"], - ref_frequency=rc["ref_frequency"], - frequency_offsets=rc["frequency_offsets"], - bandwidths=rc["bandwidths"], - fov=rc["fov_size"], - image_size=rc["img_size"], - array_layout=rc["layout"], - corrupted=rc["corrupted"], - device=rc["device"], - dense=dense, - sensitivity_cut=rc["sensitivity_cut"], - ) - return obs - - -def create_sampling_rc(conf): - """ - Draw sampling options and test if atleast half of the telescopes can see the source. - If not, then new parameters are drawn. - - Parameters - ---------- - conf : dict - simulation options - - Returns - ------- - dict - contains the observation parameters - """ - - global rng - - if conf["seed"] is not None: - rng = np.random.default_rng(conf["seed"]) - else: - rng = np.random.default_rng() - - samp_ops = draw_sampling_opts(conf) - array_layout = layouts.get_array_layout(conf["layout"][0]) - half_telescopes = array_layout.x.shape[0] // 2 - - while test_opts(samp_ops) <= half_telescopes: - samp_ops = draw_sampling_opts(conf) - - return samp_ops - - -def draw_sampling_opts(conf): - """ - Draw observation options from given intervals. - - Parameters - ---------- - conf : dict - simulation options - - Returns - ------- - dict - contains randomly drawn observation options - """ - - if "rng" not in globals(): - global rng - rng = np.random.default_rng(conf["seed"]) - - angles_ra = np.arange( - conf["fov_center_ra"][0][0], conf["fov_center_ra"][0][1], step=0.1 - ) - fov_center_ra = rng.choice(angles_ra) - - angles_dec = np.arange( - conf["fov_center_dec"][0][0], conf["fov_center_dec"][0][1], step=0.1 - ) - fov_center_dec = rng.choice(angles_dec) - start_time_l = datetime.strptime(conf["scan_start"][0], "%d-%m-%Y %H:%M:%S") - start_time_h = datetime.strptime(conf["scan_start"][1], "%d-%m-%Y %H:%M:%S") - start_times = pd.date_range(start_time_l, start_time_h, freq="1h").strftime( - "%d-%m-%Y %H:%M:%S" - ) - scan_start = rng.choice( - [datetime.strptime(time, "%d-%m-%Y %H:%M:%S") for time in start_times] - ) - scan_duration = int( - rng.integers(conf["scan_duration"][0], conf["scan_duration"][1]) - ) - num_scans = int(rng.integers(conf["num_scans"][0], conf["num_scans"][1])) - opts = np.array( - [ - conf["mode"], - conf["layout"][0], - conf["img_size"][0], - fov_center_ra, - fov_center_dec, - conf["fov_size"], - conf["corr_int_time"], - scan_start, - scan_duration, - num_scans, - conf["scan_separation"], - conf["ref_frequency"], - conf["frequency_offsets"], - conf["bandwidths"], - conf["corrupted"], - conf["device"], - conf["sensitivty_cut"], - ], - dtype="object", - ) - samp_ops = { - "mode": opts[0], - "layout": opts[1], - "img_size": opts[2], - "fov_center_ra": opts[3], - "fov_center_dec": opts[4], - "fov_size": opts[5], - "corr_int_time": opts[6], - "scan_start": opts[7], - "scan_duration": opts[8], - "num_scans": opts[9], - "scan_separation": opts[10], - "ref_frequency": opts[11], - "frequency_offsets": opts[12], - "bandwidths": opts[13], - "corrupted": opts[14], - "device": opts[15], - "sensitivity_cut": opts[16], - } - return samp_ops - - -def test_opts(rc): - """ - Compute the number of telescopes that can observe the source given - certain randomly drawn parameters. - - Parameters - ---------- - rc : dict - randomly drawn observational parameters - - Returns - ------- - - """ - array_layout = layouts.get_array_layout(rc["layout"]) - src_crd = SkyCoord( - ra=rc["fov_center_ra"], dec=rc["fov_center_dec"], unit=(un.deg, un.deg) - ) - time = calc_time_steps(rc) - _, el_st_0 = calc_ref_elev(src_crd, time[0], array_layout) - _, el_st_1 = calc_ref_elev(src_crd, time[1], array_layout) - el_min = 15 - el_max = 85 - active_telescopes_0 = np.sum((el_st_0 >= el_min) & (el_st_0 <= el_max)) - active_telescopes_1 = np.sum((el_st_1 >= el_min) & (el_st_1 <= el_max)) - return min(active_telescopes_0, active_telescopes_1) - - -def calc_ref_elev(src_crd, time, array_layout): - if time.shape == (): - time = time[None] - # Calculate for all times - # calculate GHA, Greenwich as reference for EHT - ha_all = Angle( - [t.sidereal_time("apparent", "greenwich") - src_crd.ra for t in time] - ) - - # calculate elevations - el_st_all = src_crd.transform_to( - AltAz( - obstime=time.reshape(len(time), -1), - location=EarthLocation.from_geocentric( - np.repeat([array_layout.x], len(time), axis=0), - np.repeat([array_layout.y], len(time), axis=0), - np.repeat([array_layout.z], len(time), axis=0), - unit=un.m, - ), - ) - ).alt.degree - assert len(ha_all.value) == len(el_st_all) - return ha_all, el_st_all - - -def calc_time_steps(conf): - start_time = Time(conf["scan_start"].isoformat(), format="isot") - scan_separation = conf["scan_separation"] - num_scans = conf["num_scans"] - scan_duration = conf["scan_duration"] - int_time = conf["corr_int_time"] - - time_lst = [ - start_time - + scan_separation * i * un.second - + i * scan_duration * un.second - + j * int_time * un.second - for i in range(num_scans) - for j in range(int(scan_duration / int_time) + 1) - ] - # +1 because t_1 is the stop time of t_0 - # in order to save computing power we take one time more to complete interval - time = Time(time_lst) - return time - - -if __name__ == "__main__": - simulate_data_set() From 3014cdbdf2f605daed669bfd31af7300425a8b81 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:57:42 +0100 Subject: [PATCH 07/34] Rewrite data set creation script --- pyvisgen/simulation/scripts/create_dataset.py | 66 ++++++++++++++++--- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/pyvisgen/simulation/scripts/create_dataset.py b/pyvisgen/simulation/scripts/create_dataset.py index 4af7903..e01242f 100644 --- a/pyvisgen/simulation/scripts/create_dataset.py +++ b/pyvisgen/simulation/scripts/create_dataset.py @@ -1,13 +1,13 @@ import click -from pyvisgen.gridding.gridder import create_gridded_data_set -from pyvisgen.simulation.data_set import simulate_data_set +from pyvisgen.simulation.data_set import SimulateDataSet @click.command() -@click.argument("configuration_path", type=click.Path(exists=True, dir_okay=False)) -@click.option("--job_id", required=False, type=int, default=None) -@click.option("--n", required=False, type=int, default=None) +@click.argument( + "configuration_path", + type=click.Path(exists=True, dir_okay=False), +) @click.option( "--mode", type=click.Choice( @@ -20,13 +20,61 @@ ), default="simulate", ) -def main(configuration_path, mode, job_id=None, n=None): +@click.option( + "-k", + "--key", + required=False, + type=str, + default="y", + help="Key under which the images are saved in the HDF5 file", +) +@click.option("-j", "--job_id", required=False, type=int, default=None) +@click.option("--n", required=False, type=int, default=None) +@click.option( + "--num_images", + required=False, + type=int, + default=None, + help=""" + Number of images in all bundles combined. + Will skip the automatic count. + """, +) +def main( + configuration_path: str | click.Path, + mode: str, + key: str = "y", + job_id=None, + n=None, + date_fmt="%d-%m-%Y %H:%M:%S", + num_images: int | None = None, +): if mode == "simulate": - simulate_data_set(configuration_path) + SimulateDataSet.from_config( + configuration_path, + image_key=key, + grid=False, + date_fmt=date_fmt, + num_images=num_images, + ) if mode == "slurm": - simulate_data_set(configuration_path, slurm=True, job_id=job_id, n=n) + SimulateDataSet.from_config( + configuration_path, + image_key=key, + slurm=True, + job_id=job_id, + n=n, + date_fmt=date_fmt, + num_images=num_images, + ) if mode == "gridding": - create_gridded_data_set(configuration_path) + SimulateDataSet.from_config( + configuration_path, + image_key=key, + grid=True, + date_fmt=date_fmt, + num_images=num_images, + ) if __name__ == "__main__": From c1fdf71fa3925c405cc8cb2c15266a45a362b940 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:58:10 +0100 Subject: [PATCH 08/34] Update config parser for new data set creation routine --- pyvisgen/utils/config.py | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pyvisgen/utils/config.py b/pyvisgen/utils/config.py index ebad4e2..64b91ef 100644 --- a/pyvisgen/utils/config.py +++ b/pyvisgen/utils/config.py @@ -15,6 +15,7 @@ def read_data_set_conf(conf_toml): simulation configuration """ config = toml.load(conf_toml) + config = sanitize_conf(config) conf = {} conf["mode"] = config["sampling_options"]["mode"] @@ -37,6 +38,13 @@ def read_data_set_conf(conf_toml): conf["noisy"] = config["sampling_options"]["noisy"] conf["sensitivty_cut"] = config["sampling_options"]["sensitivity_cut"] + conf["polarisation"] = config["polarisation_options"]["mode"] + conf["pol_delta"] = config["polarisation_options"]["delta"] + conf["pol_amp_ratio"] = config["polarisation_options"]["amp_ratio"] + conf["field_order"] = config["polarisation_options"]["field_order"] + conf["field_scale"] = config["polarisation_options"]["field_scale"] + conf["field_threshold"] = config["polarisation_options"]["field_threshold"] + conf["num_test_images"] = config["bundle_options"]["num_test_images"] conf["bundle_size"] = config["bundle_options"]["bundle_size"] conf["train_valid_split"] = config["bundle_options"]["train_valid_split"] @@ -46,4 +54,37 @@ def read_data_set_conf(conf_toml): conf["in_path"] = config["bundle_options"]["in_path"] conf["out_path_fits"] = config["bundle_options"]["out_path_fits"] conf["out_path_gridded"] = config["bundle_options"]["out_path_gridded"] + conf["file_prefix"] = config["bundle_options"]["file_prefix"] + + # handle case if file_prefix = None + if not conf["file_prefix"]: + conf["file_prefix"] = "" + return conf + + +def sanitize_conf(conf: dict) -> dict: + """Sanitizes a given dict by replacinginstances of + 'none' str with None. + + Parameters + ---------- + conf : list + Unsanitized config dict. + + Returns + ------- + sanitized_conf : list + Sanitized conf dict where all instances of 'none' + are replaced with None. + """ + sanitized_conf = {} + for key, val in conf.items(): + if isinstance(val, dict): + val = sanitize_conf(val) + elif isinstance(val, str) and val == "none": + val = None + + sanitized_conf[key] = val + + return sanitized_conf From 07c2b6a603012eca337d016a766145d96a2387f9 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:58:50 +0100 Subject: [PATCH 09/34] Remove torch.flip in vis_loop --- pyvisgen/simulation/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyvisgen/simulation/visibility.py b/pyvisgen/simulation/visibility.py index fb15eb0..507780d 100644 --- a/pyvisgen/simulation/visibility.py +++ b/pyvisgen/simulation/visibility.py @@ -386,7 +386,7 @@ def vis_loop( raise ValueError("Expected batch_size to be 'auto' or type int") pol = Polarisation( - torch.flip(SI, dims=[1]), + SI, sensitivity_cut=obs.sensitivity_cut, polarisation=obs.polarisation, device=obs.device, From 78022889295a9e2d03612ba640b38510d974025e Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 16:59:10 +0100 Subject: [PATCH 10/34] Update pyvisgen.simulation.__init__ --- pyvisgen/simulation/__init__.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/pyvisgen/simulation/__init__.py b/pyvisgen/simulation/__init__.py index c7fd0c7..4d81ab6 100644 --- a/pyvisgen/simulation/__init__.py +++ b/pyvisgen/simulation/__init__.py @@ -1,14 +1,5 @@ from .array import Array -from .data_set import ( - calc_ref_elev, - calc_time_steps, - create_observation, - create_sampling_rc, - draw_sampling_opts, - get_images, - simulate_data_set, - test_opts, -) +from .data_set import SimulateDataSet from .observation import Baselines, Observation, ValidBaselineSubset from .scan import angularDistance, calc_beam, calc_fourier, integrate, jinc, rime from .visibility import Visibilities, generate_noise, vis_loop @@ -17,22 +8,16 @@ "Array", "Baselines", "Observation", + "SimulateDataSet", "ValidBaselineSubset", "Visibilities", "angularDistance", "calc_beam", "calc_fourier", - "calc_ref_elev", "calc_time_steps", - "create_observation", - "create_sampling_rc", - "draw_sampling_opts", "generate_noise", - "get_images", "integrate", "jinc", "rime", - "simulate_data_set", - "test_opts", "vis_loop", ] From aba72f4685a924f8f5e004fd8eab672b26d43dea Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Wed, 29 Jan 2025 17:03:49 +0100 Subject: [PATCH 11/34] Update default data set config --- config/default_data_set.toml | 37 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/config/default_data_set.toml b/config/default_data_set.toml index 6310c87..ae72748 100644 --- a/config/default_data_set.toml +++ b/config/default_data_set.toml @@ -2,30 +2,39 @@ mode = "full" device = "cpu" seed = 1337 -layout = "vla" -img_size = 128 +layout = "vlba" +img_size = 1024 fov_center_ra = [100, 110] fov_center_dec = [30, 40] -fov_size = 100 +fov_size = 0.24 corr_int_time = 30.0 -scan_start = ["16-01-2020 00:04:01", "16-01-2020 08:59:59"] -scan_duration = [60, 90] -num_scans = [1, 2] +scan_start = ["01-01-1995 00:00:01", "01-01-2025 23:59:59"] +scan_duration = [20, 600] +num_scans = [6, 10] scan_separation = 360 -ref_frequency = 15.21e9 -frequency_offsets = [0e8, 0.8e8, 1.44e8, 2.08e8] -bandwidths = [6.4e7, 6.4e7, 6.4e7, 6.4e7] +ref_frequency = 15.17600e9 +frequency_offsets = [0e8, 1.28e8, 2.56e8, 3.84e8] +bandwidths = [1.28e8, 1.28e8, 1.28e8, 1.28e8] noisy = 0 corrupted = false sensitivity_cut = 1e-6 +[polarisation_options] +mode = "none" # linear, circular, or "none" +delta = 45 # phase angle +amp_ratio = 0.5 # polarisation amplitude ratio +field_order = [0.01, 0.01] # (x, y) orders of the random polarisation field +field_scale = [0, 1] # scaling of the intensity of the polarisation field +field_threshold = "none" + [bundle_options] -in_path = "skies/" -out_path_fits = "build/uvfits" -out_path_gridded = "build/gridded" +file_prefix="train" # e.g. train, test, or valid. If "none", no prefix will be applied +in_path = "/path/to/input/data/" +out_path_fits = "/output/path/" +out_path_gridded = "/output/path/for/gridding/" num_test_images = 500 bundle_size = 100 train_valid_split = 0.2 -grid_size = 128 -grid_fov = 100 +grid_size = 1024 +grid_fov = 0.24 amp_phase = false From 5ae214c74a3b0c4fa3ae32685443c46224449507 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Thu, 30 Jan 2025 15:28:08 +0100 Subject: [PATCH 12/34] Test shapes, fix fft truth --- pyvisgen/gridding/gridder.py | 4 ++-- pyvisgen/gridding/utils.py | 15 ++++++++++++++- pyvisgen/simulation/data_set.py | 3 +++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py index d7d2ef9..1ab198c 100644 --- a/pyvisgen/gridding/gridder.py +++ b/pyvisgen/gridding/gridder.py @@ -194,7 +194,7 @@ def convert_amp_phase(data, sky_sim=False): if sky_sim: amp = np.abs(data) phase = np.angle(data) - data = np.stack((amp, phase), axis=1) + data = np.concatenate((amp, phase), axis=1) else: test = data[:, 0] + 1j * data[:, 1] amp = np.abs(test) @@ -208,7 +208,7 @@ def convert_real_imag(data, sky_sim=False): real = data.real imag = data.imag - data = np.stack((real, imag), axis=1) + data = np.concatenate((real, imag), axis=1) else: real = data[:, 0] imag = data[:, 1] diff --git a/pyvisgen/gridding/utils.py b/pyvisgen/gridding/utils.py index 6aeebdd..438385e 100644 --- a/pyvisgen/gridding/utils.py +++ b/pyvisgen/gridding/utils.py @@ -141,14 +141,27 @@ def save_fft_pair(path, x, y, name_x="x", name_y="y"): half_image = x.shape[2] // 2 x = x[:, :, : half_image + 1, :] y = y[:, :, : half_image + 1, :] + + test_shapes(x) + test_shapes(y) + with h5py.File(path, "w") as hf: hf.create_dataset(name_x, data=x) hf.create_dataset(name_y, data=y) hf.close() +def test_shapes(array): + if array.shape[1] != 2: + raise ValueError("Expected array axis 1 to be 2!") + + if len(array.shape) != 4: + raise ValueError("Expected array shape to be of len 4!") + + def calc_truth_fft(sky_dist): truth_fft = np.fft.fftshift( - np.fft.fft2(np.fft.fftshift(sky_dist, axes=(1, 2)), axes=(1, 2)), axes=(1, 2) + np.fft.fft2(np.fft.fftshift(sky_dist, axes=(2, 3)), axes=(2, 3)), axes=(2, 3) ) + return truth_fft diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index b6afb71..629f575 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -155,6 +155,9 @@ def _run(self): sim_data = convert_real_imag(sim_data, sky_sim=False) truth_fft = convert_real_imag(truth_fft, sky_sim=True) + if sim_data.shape[1] != 2: + raise ValueError("Expected sim_data axis 1 to be 2!") + out = self.out_path / Path( f"samp_{self.conf['file_prefix']}_" + str(i) + ".h5" ) From 6f08598299ef217b829caa3f523dcd08f2852b12 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Thu, 30 Jan 2025 18:46:10 +0100 Subject: [PATCH 13/34] Fix gridder --- pyvisgen/gridding/gridder.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py index 1ab198c..68f9c2d 100644 --- a/pyvisgen/gridding/gridder.py +++ b/pyvisgen/gridding/gridder.py @@ -132,6 +132,9 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0): if isinstance(freq_bands, float): freq_bands = [freq_bands] + uu /= const.c + vv /= const.c + u = np.array([uu * np.array(freq) for freq in freq_bands]).ravel() v = np.array([vv * np.array(freq) for freq in freq_bands]).ravel() @@ -159,6 +162,7 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0): np.concatenate([-imag, imag]), ] ) + # Generate Mask N = conf["grid_size"] # image size fov = conf["grid_fov"] * np.pi / (3600 * 180) @@ -182,7 +186,12 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0): mask_real /= mask mask_imag /= mask - assert mask_real.shape == (conf["grid_size"], conf["grid_size"]) + if mask_real.shape != (conf["grid_size"], conf["grid_size"]): + raise ValueError( + "shape mismatch: Expected mask_real to be " + f"of shape {(conf['grid_size'], conf['grid_size'])}" + ) + gridded_vis = np.zeros((2, N, N)) gridded_vis[0] = mask_real gridded_vis[1] = mask_imag From 3ef8dece65c63cbc87555c2f1d9cf0a5c6ca3980 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Thu, 30 Jan 2025 18:47:33 +0100 Subject: [PATCH 14/34] Update sim data loop --- pyvisgen/simulation/data_set.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index 629f575..f9fc8eb 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -138,11 +138,12 @@ def _run(self): ) if self.grid: - sim_data.append( - grid_vis_loop_data( - vis.u, vis.v, vis.get_values(), self.freq_bands, self.conf - ) + gridded = grid_vis_loop_data( + vis.u, vis.v, vis.get_values(), self.freq_bands, self.conf ) + + sim_data.append(gridded) + samp_ops_idx += 1 sim_data = np.array(sim_data) From 359db2cf29f965d68291cceeb8e3e51fede0a425 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Thu, 30 Jan 2025 18:48:57 +0100 Subject: [PATCH 15/34] Change u, v, and w in scan, don't integrate over mean --- pyvisgen/simulation/scan.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py index a4a7129..ad977c9 100644 --- a/pyvisgen/simulation/scan.py +++ b/pyvisgen/simulation/scan.py @@ -78,9 +78,9 @@ def calc_fourier(img, bas, lm, spw_low, spw_high): Return Fourier Kernel for every pixel in lm grid and given baselines. Shape is given by lm axes and baseline axis """ - u_cmplt = torch.cat((bas[0], bas[1])) - v_cmplt = torch.cat((bas[3], bas[4])) - w_cmplt = torch.cat((bas[6], bas[7])) + u_cmplt = bas[0] + v_cmplt = bas[3] + w_cmplt = bas[6] l = lm[..., 0] # noqa: E741 m = lm[..., 1] @@ -219,10 +219,4 @@ def integrate(X1, X2): int_f = 0.5 * torch.sum(int_m, dim=0) del int_m - X_t = torch.stack(torch.split(int_f, int(int_f.shape[0] / 2), dim=0)) - del int_f - - int_t = 0.5 * torch.sum(X_t, dim=0) - del X_t - - return int_t + return int_f From 45c26a5b1f12bdfe22f3b5ba2c1a11febec6c89a Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 31 Jan 2025 14:22:14 +0100 Subject: [PATCH 16/34] Update progress bar descriptions --- pyvisgen/simulation/data_set.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index f9fc8eb..4d95280 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -93,7 +93,7 @@ def from_config( range(len(cls.data)), position=0, leave=False, - desc="Counting Number of Images", + desc="Counting images", colour="#754fc9", ) # get number of random parameter draws from number of images in data @@ -113,7 +113,7 @@ def _run(self): data = tqdm( range(len(self.data)), position=0, - desc="Processing Bundles", + desc="Processing bundles", colour="#52ba66", ) @@ -125,7 +125,7 @@ def _run(self): SIs = tqdm( SIs, position=1, - desc=f"Bundle {i}", + desc=f"Bundle {i + 1}", colour="#595cbd", leave=False, ) From acfb4407223459ecbb50a81caeb6a913ce26d0d7 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Sat, 1 Feb 2025 16:02:49 +0100 Subject: [PATCH 17/34] Improve readability of the SimulateDataSet class, print config --- pyvisgen/simulation/data_set.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index 4d95280..872f21a 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -78,6 +78,8 @@ def from_config( cls.conf = read_data_set_conf(config) + print("Simulation Config:\n", cls.conf) + if grid: cls.out_path = Path(cls.conf["out_path_gridded"]) else: @@ -86,11 +88,11 @@ def from_config( if not cls.out_path.is_dir(): cls.out_path.mkdir(parents=True) - cls.data = load_bundles(cls.conf["in_path"]) + cls.data_paths = load_bundles(cls.conf["in_path"]) if not num_images: len_data = tqdm( - range(len(cls.data)), + range(len(cls.data_paths)), position=0, leave=False, desc="Counting images", @@ -111,7 +113,7 @@ def from_config( def _run(self): data = tqdm( - range(len(self.data)), + range(len(self.data_paths)), position=0, desc="Processing bundles", colour="#52ba66", @@ -163,7 +165,7 @@ def _run(self): f"samp_{self.conf['file_prefix']}_" + str(i) + ".h5" ) - save_fft_pair(out, sim_data, truth_fft) + save_fft_pair(path=out, x=sim_data, y=truth_fft) path_msg = self.conf["out_path_gridded"] else: @@ -178,7 +180,7 @@ def _run_slurm(): raise NotImplementedError("Not implememented yet!") def get_images(self, i): - SIs = torch.tensor(open_bundles(self.data[i], key=self.key)) + SIs = torch.tensor(open_bundles(self.data_paths[i], key=self.key)) if len(SIs.shape) == 3: SIs = SIs.unsqueeze(1) From 67c83ed68d2c71ebf9620638cc1221096923b0c0 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Mon, 3 Feb 2025 11:32:46 +0100 Subject: [PATCH 18/34] Move convert_phase_amp and convert_real_imag to gridding.utils --- pyvisgen/gridding/__init__.py | 11 ++++---- pyvisgen/gridding/gridder.py | 27 -------------------- pyvisgen/gridding/utils.py | 47 ++++++++++++++++++++++++++++++----- 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/pyvisgen/gridding/__init__.py b/pyvisgen/gridding/__init__.py index c53f9ce..1b4319c 100644 --- a/pyvisgen/gridding/__init__.py +++ b/pyvisgen/gridding/__init__.py @@ -1,11 +1,12 @@ -from .gridder import ( +from .gridder import ducc0_gridding, grid_data, grid_vis_loop_data +from .utils import ( + calc_truth_fft, convert_amp_phase, convert_real_imag, - ducc0_gridding, - grid_data, - grid_vis_loop_data, + create_gridded_data_set, + open_data, + save_fft_pair, ) -from .utils import calc_truth_fft, create_gridded_data_set, open_data, save_fft_pair __all__ = [ "calc_truth_fft", diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py index 68f9c2d..873d40c 100644 --- a/pyvisgen/gridding/gridder.py +++ b/pyvisgen/gridding/gridder.py @@ -197,30 +197,3 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0): gridded_vis[1] = mask_imag return gridded_vis - - -def convert_amp_phase(data, sky_sim=False): - if sky_sim: - amp = np.abs(data) - phase = np.angle(data) - data = np.concatenate((amp, phase), axis=1) - else: - test = data[:, 0] + 1j * data[:, 1] - amp = np.abs(test) - phase = np.angle(test) - data = np.stack((amp, phase), axis=1) - return data - - -def convert_real_imag(data, sky_sim=False): - if sky_sim: - real = data.real - imag = data.imag - - data = np.concatenate((real, imag), axis=1) - else: - real = data[:, 0] - imag = data[:, 1] - - data = np.stack((real, imag), axis=1) - return data diff --git a/pyvisgen/gridding/utils.py b/pyvisgen/gridding/utils.py index 438385e..45b57b5 100644 --- a/pyvisgen/gridding/utils.py +++ b/pyvisgen/gridding/utils.py @@ -6,7 +6,7 @@ from tqdm.autonotebook import tqdm from pyvisgen.fits.data import fits_data -from pyvisgen.gridding import convert_amp_phase, convert_real_imag, grid_data +from pyvisgen.gridding import grid_data from pyvisgen.utils.config import read_data_set_conf from pyvisgen.utils.data import load_bundles, open_bundles @@ -142,8 +142,8 @@ def save_fft_pair(path, x, y, name_x="x", name_y="y"): x = x[:, :, : half_image + 1, :] y = y[:, :, : half_image + 1, :] - test_shapes(x) - test_shapes(y) + test_shapes(x, "x") + test_shapes(y, "y") with h5py.File(path, "w") as hf: hf.create_dataset(name_x, data=x) @@ -151,12 +151,18 @@ def save_fft_pair(path, x, y, name_x="x", name_y="y"): hf.close() -def test_shapes(array): +def test_shapes(array, name): if array.shape[1] != 2: - raise ValueError("Expected array axis 1 to be 2!") + raise ValueError( + f"Expected array {name} axis 1 to be 2 but got " + f"{array.shape} with axis 1: {array.shape[1]}!" + ) if len(array.shape) != 4: - raise ValueError("Expected array shape to be of len 4!") + raise ValueError( + f"Expected array {name} shape to be of len 4 but got " + f"{array.shape} with len {len(array.shape)}!" + ) def calc_truth_fft(sky_dist): @@ -165,3 +171,32 @@ def calc_truth_fft(sky_dist): ) return truth_fft + + +def convert_amp_phase(data, sky_sim=False): + if sky_sim: + amp = np.abs(data) + phase = np.angle(data) + data = np.concatenate((amp, phase), axis=1) + else: + test = data[:, 0] + 1j * data[:, 1] + amp = np.abs(test) + phase = np.angle(test) + data = np.stack((amp, phase), axis=1) + + return data + + +def convert_real_imag(data, sky_sim=False): + if sky_sim: + real = data.real + imag = data.imag + + data = np.concatenate((real, imag), axis=1) + else: + real = data[:, 0] + imag = data[:, 1] + + data = np.stack((real, imag), axis=1) + + return data From 2e7e58721a499ef1a423bec42c68bad88950e0fe Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 15:34:22 +0100 Subject: [PATCH 19/34] Add slurm support, add FITS file support, add docstrings --- pyvisgen/simulation/data_set.py | 186 ++++++++++++++++++++++++++------ 1 file changed, 152 insertions(+), 34 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index 872f21a..e6e5b57 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -6,8 +6,10 @@ from astropy import units as un from astropy.coordinates import AltAz, EarthLocation, SkyCoord from astropy.time import Time +from joblib import Parallel, delayed from tqdm.autonotebook import tqdm +import pyvisgen.fits.writer as writer import pyvisgen.layouts.layouts as layouts from pyvisgen.gridding import ( calc_truth_fft, @@ -37,11 +39,12 @@ def from_config( *, grid: bool = True, slurm: bool = False, - job_id: int | None = None, - n: int | None = None, + slurm_job_id: int | None = None, + slurm_n: int | None = None, date_fmt: str = DATEFMT, num_images: int | None = None, - ) -> None: + multiprocess: int | str = 1, + ): """Simulates data from parameters in a config file. Parameters @@ -56,9 +59,9 @@ def from_config( save to HDF5 files. Default: ``True`` slurm : bool, optional ``True``, if slurm is used, Default: ``False`` - job_id : int or None, optional + slurm_job_id : int or None, optional ``job_id`` given by slurm. Default: ``None`` - n : int or None, optional + slurm_n : int or None, optional Running index. Default: ``None`` date_fmt : str, optional Format string for datetime objects. @@ -67,14 +70,23 @@ def from_config( Number of combined total images in the bundles. If not ``None``, will skip counting the images before drawing the random parameters. Default: ``None`` + multiprocess : int or str, optional + Number of jobs to use in multiprocessing during the + sampling and testing phase. If -1 or ``'all'``, + use all available cores. Default: 1 """ cls = cls() cls.key = image_key cls.grid = grid cls.slurm = slurm - cls.job_id = job_id - cls.n = n + cls.job_id = slurm_job_id + cls.n = slurm_n cls.date_fmt = date_fmt + cls.num_images = num_images + cls.multiprocess = multiprocess + + if multiprocess in ["all"]: + cls.multiprocess = -1 cls.conf = read_data_set_conf(config) @@ -90,8 +102,8 @@ def from_config( cls.data_paths = load_bundles(cls.conf["in_path"]) - if not num_images: - len_data = tqdm( + if not cls.num_images: + data_bundles = tqdm( range(len(cls.data_paths)), position=0, leave=False, @@ -99,19 +111,23 @@ def from_config( colour="#754fc9", ) # get number of random parameter draws from number of images in data - num_draws = np.sum([len(cls.get_images(i)) for i in len_data]) - - # draw parameters beforehand, i.e. outside the simulation loop - cls.create_sampling_rc(num_draws) + cls.num_images = np.sum( + [len(cls.get_images(bundle)) for bundle in data_bundles] + ) if slurm: cls._run_slurm() else: + # draw parameters beforehand, i.e. outside the simulation loop + cls.create_sampling_rc(cls.num_images) cls._run() return cls - def _run(self): + def _run(self) -> None: + """Runs the simulation and saves visibility data either as + bundled HDF5 files or as individual FITS files. + """ data = tqdm( range(len(self.data_paths)), position=0, @@ -167,19 +183,62 @@ def _run(self): save_fft_pair(path=out, x=sim_data, y=truth_fft) - path_msg = self.conf["out_path_gridded"] + path_msg = Path(self.conf["out_path_gridded"]) / Path( + f"samp_{self.conf['file_prefix']}_.h5" + ) else: - path_msg = self.conf["out_path_fits"] + for i, vis_data in enumerate(sim_data): + out = self.out_path / Path( + f"vis_{self.conf['file_prefix']}_" + str(i) + ".fits" + ) + hdu_list = writer.create_hdu_list(vis_data, obs) + hdu_list.writeto(out, overwrite=True) + + path_msg = self.conf["out_path_fits"] / Path( + f"samp_{self.conf['file_prefix']}_.fits" + ) print( f"Successfully simulated and saved {samp_ops_idx} images " f"to '{path_msg}'!" ) - def _run_slurm(): - raise NotImplementedError("Not implememented yet!") + def _run_slurm(self) -> None: + """Runs the simulation in slurm and saves visibility data + as individual FITS files. + """ + job_id = int(self.slurm_job_id + self.slurm_n * 500) + out = self.conf["out_path_fits"] / Path("vis_" + str(job_id) + ".fits") + + bundle = torch.div(job_id, self.num_images, rounding_mode="floor") + image = job_id - bundle * self.num_images + + SI = torch.tensor(open_bundles(self.data_paths[bundle])[image]) - def get_images(self, i): + if len(SI.shape) == 2: + SI = SI.unsqueeze(0) + + self.create_sampling_rc(1) + obs = self.create_observation(0) + vis_data = vis_loop(obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"]) + + hdu_list = writer.create_hdu_list(vis_data, obs) + hdu_list.writeto(out, overwrite=True) + + def get_images(self, i: int) -> torch.tensor: + """Opens bundle with index i and returns :func:`~torch.tensor` + of images. + + Parameters + ---------- + i : int + Bundle index. + + Returns + ------- + SIs : :func:`~torch.tensor` + :func:`~torch.tensor` of images from bundle ``i``. + """ SIs = torch.tensor(open_bundles(self.data_paths[i], key=self.key)) if len(SIs.shape) == 3: @@ -187,7 +246,21 @@ def get_images(self, i): return SIs - def create_observation(self, i): + def create_observation(self, i: int) -> Observation: + """Creates :class:`~pyvisgen.simulation.Observation` + dataclass object for image ``i``. + + Parameters + ---------- + i : int + Index of image for which the observation is created. + + Returns + ------- + obs : Observation + :class:`~pyvisgen.simulation.Observation` dataclass + object for image ``i``. + """ rc = self.samp_ops # put the respective values inside the @@ -222,7 +295,15 @@ def create_observation(self, i): return obs - def create_sampling_rc(self, size): + def create_sampling_rc(self, size: int) -> None: + """Creates sampling runtime configuration containing + all relevant parameters for the simulation. + + Parameters + ---------- + size : int + Number of parameters to draw, equal to number of images. + """ if self.conf["seed"]: self.rng = np.random.default_rng(self.conf["seed"]) else: @@ -265,10 +346,25 @@ def create_sampling_rc(self, size): leave=False, ) - for i in test_idx: - self.test_rand_opts(i) + self.array = layouts.get_array_layout(self.samp_ops_const["array_layout"]) - def draw_sampling_opts(self, size): + Parallel(n_jobs=self.multiprocess)( + delayed(self.test_rand_opts)(i) for i in test_idx + ) + + def draw_sampling_opts(self, size: int) -> dict: + """Draws randomized sampling parameters for the simulation. + + Parameters + ---------- + size : int + Number of parameters to draw, equal to number of images. + + Returns + ------- + samp_opts : dict + Sampling options/parameters stored inside a dictionary. + """ ra = self.rng.uniform( self.conf["fov_center_ra"][0][0], self.conf["fov_center_ra"][0][1], size ) @@ -328,7 +424,7 @@ def draw_sampling_opts(self, size): b = np.repeat(1, size, dtype=float) field_scale = np.stack((a, b), axis=1) - samp_ops = dict( + samp_opts = dict( src_ra=ra, src_dec=dec, start_time=scan_start, @@ -346,22 +442,31 @@ def draw_sampling_opts(self, size): # later, we need to include it here instead of inside the # samp_ops_const dictionary. - return samp_ops + return samp_opts - def test_rand_opts(self, i): - array = layouts.get_array_layout(self.samp_ops_const["array_layout"]) + def test_rand_opts(self, i: int) -> None: + """Tests randomized sampling parameters by checking + if the source is visible for 50% of the telescopes in + the array for 50% of the observation time. If that + condition is not fullfilled, the parameters are redrawn + and tested again. + Parameters + ---------- + i : int + Index of the current set of sampling parameters. + """ time_steps = self.calc_time_steps(i) src_crd = SkyCoord( self.samp_ops["src_ra"][i], self.samp_ops["src_dec"][i], unit=un.deg ) - total_stations = len(array.st_num) + total_stations = len(self.array.st_num) locations = EarthLocation.from_geocentric( - x=array.x, - y=array.y, - z=array.z, + x=self.array.x, + y=self.array.y, + z=self.array.z, unit=un.m, ) @@ -370,7 +475,7 @@ def test_rand_opts(self, i): # check which stations can see the source for each time step visible = np.logical_and( - array.el_low.numpy() <= src_alt, src_alt <= array.el_high.numpy() + self.array.el_low.numpy() <= src_alt, src_alt <= self.array.el_high.numpy() ) # We want at least half of the telescopes to see the source @@ -390,7 +495,20 @@ def test_rand_opts(self, i): self.test_rand_opts(i) - def calc_time_steps(self, i): + def calc_time_steps(self, i: int) -> Time: + """Calculates time steps for given sampling + parameter set. Used in testing. + + Parameters + ---------- + i : int + Index of the current set of sampling parameters. + + See Also + -------- + pyvisgen.simulation.data_set.SimulateDataSet.test_rand_opts : + Tests randomized sampling parameters. + """ start_time = Time(self.samp_ops["start_time"][i].isoformat(), format="isot") scan_separation = self.samp_ops_const["scan_separation"] num_scans = self.samp_ops["num_scans"][i] From a2798fdd42a73cf10729956595e39cf26e567204 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 15:34:57 +0100 Subject: [PATCH 20/34] Allow HDF5 key to be passed to open_bundle --- pyvisgen/utils/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyvisgen/utils/data.py b/pyvisgen/utils/data.py index d02082a..aa83274 100644 --- a/pyvisgen/utils/data.py +++ b/pyvisgen/utils/data.py @@ -48,7 +48,7 @@ def get_bundles(path: str | Path) -> np.array: return bundles -def open_bundles(path: str | Path) -> np.array: +def open_bundles(path: str | Path, key: str = "y") -> np.array: """Opens a bundle HDF5 file. Parameters @@ -63,6 +63,6 @@ def open_bundles(path: str | Path) -> np.array: the bundle file. """ f = h5py.File(path, "r") - bundle_y = np.array(f["y"]) + bundle_y = np.array(f[key]) return bundle_y From 6859f9dd90bd9d1acd227e419df6abd8dbdbb26c Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 16:08:35 +0100 Subject: [PATCH 21/34] Add changelog, minor fixes --- docs/changes/53.feature.rst | 1 + docs/changes/53.maintenance.rst | 10 ++++++ pyvisgen/gridding/gridder.py | 35 ++++++++++++++++--- pyvisgen/simulation/scan.py | 1 - pyvisgen/simulation/scripts/create_dataset.py | 25 +++++++++---- 5 files changed, 60 insertions(+), 12 deletions(-) create mode 100644 docs/changes/53.feature.rst create mode 100644 docs/changes/53.maintenance.rst diff --git a/docs/changes/53.feature.rst b/docs/changes/53.feature.rst new file mode 100644 index 0000000..5fcfd8d --- /dev/null +++ b/docs/changes/53.feature.rst @@ -0,0 +1 @@ +- Add new gridder that can handle vis data returned by the ``vis_loop`` diff --git a/docs/changes/53.maintenance.rst b/docs/changes/53.maintenance.rst new file mode 100644 index 0000000..290b8a4 --- /dev/null +++ b/docs/changes/53.maintenance.rst @@ -0,0 +1,10 @@ +- Complete rewrite of dataset creation routine ``pyvisgen.simulation.data_set.SimulateDataSet`` + - Accessible using a classmethod to load a config file + - Add optional multiprocessing support + - Draw and fully test parameters before simulation loop. Previously this was done in the loop and tests were only performed for two time steps + - Support for polarization +- Add new default config file for new dataset creation routine +- Update CLI tool for dataset creation routine +- Allow passing HDF5 key in ``pyvisgen.utils.data.open_bundles`` +- Restructure ``pyvisgen.gridding`` module by adding a ``utils`` submodule that contains all utility functions that previously were in the ``gridder`` submodule + - Also fix parts of the utility functions diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py index 873d40c..4845441 100644 --- a/pyvisgen/gridding/gridder.py +++ b/pyvisgen/gridding/gridder.py @@ -118,8 +118,29 @@ def grid_data(uv_data, freq_data, conf): return gridded_vis -def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0): - """Grid data coming from the vis_loop.""" +def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0) -> np.array: + """Grid data returned by :func:`~pyvisgen.simulation.vis_loop`. + + Parameters + ---------- + uu : :func:`~torch.tensor` + vv : :func:`~torch.tensor` + vis_data : :class:`~pyvisgen.simulation.Visibilities` + :class:`~pyvisgen.simulation.Visibilities` dataclass object + containing the visibilities measured by the array. + freq_bands : list + List of frequency bands of the observation. + conf : dict + Dictionary containing the configuration of the observation. + stokes_comp : int, optional + Index of the stokes component to grid. Defaults to stokes I. + Default: 0 + + Returns + ------- + gridded_vis : :func:`~np.array` + Array of gridded visibilities. + """ if vis_data.ndim != 7: if vis_data.ndim == 3: vis_data = np.stack( @@ -165,13 +186,17 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0): # Generate Mask N = conf["grid_size"] # image size - fov = conf["grid_fov"] * np.pi / (3600 * 180) - + fov = np.deg2rad(conf["grid_fov"] / 3600) delta = 1 / fov # bins are shifted by delta/2 so that maximum in uv space matches maximum # in numpy fft - bins = np.arange(start=-((N + 1) / 2) * delta, stop=(N / 2) * delta, step=delta) + bins = np.arange( + start=-((N + 1) / 2) * delta, + stop=((N + 1) / 2) * delta, + step=delta, + dtype=np.float128, + ) mask, *_ = np.histogram2d(samps[0], samps[1], bins=[bins, bins], density=False) mask[mask == 0] = 1 diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py index 08f6604..f564f6e 100644 --- a/pyvisgen/simulation/scan.py +++ b/pyvisgen/simulation/scan.py @@ -105,7 +105,6 @@ def calc_fourier( v_cmplt = bas[5] w_cmplt = bas[8] - l = lm[..., 0] # noqa: E741 m = lm[..., 1] n = torch.sqrt(1 - l**2 - m**2) diff --git a/pyvisgen/simulation/scripts/create_dataset.py b/pyvisgen/simulation/scripts/create_dataset.py index e01242f..4707ee5 100644 --- a/pyvisgen/simulation/scripts/create_dataset.py +++ b/pyvisgen/simulation/scripts/create_dataset.py @@ -28,8 +28,8 @@ default="y", help="Key under which the images are saved in the HDF5 file", ) -@click.option("-j", "--job_id", required=False, type=int, default=None) -@click.option("--n", required=False, type=int, default=None) +@click.option("--slurm_job_id", required=False, type=int, default=None) +@click.option("--slurm_n", required=False, type=int, default=None) @click.option( "--num_images", required=False, @@ -40,14 +40,25 @@ Will skip the automatic count. """, ) +@click.option( + "-p", + "--multiprocess", + required=False, + help=""" + Number of processes to run in parallel while + sampling and testing parameters. If -1 or + 'all', will use all available cores. + """, +) def main( configuration_path: str | click.Path, mode: str, key: str = "y", - job_id=None, - n=None, + slurm_job_id=None, + slurm_n=None, date_fmt="%d-%m-%Y %H:%M:%S", num_images: int | None = None, + multiprocess: int | str = 1, ): if mode == "simulate": SimulateDataSet.from_config( @@ -56,14 +67,15 @@ def main( grid=False, date_fmt=date_fmt, num_images=num_images, + multiprocess=multiprocess, ) if mode == "slurm": SimulateDataSet.from_config( configuration_path, image_key=key, slurm=True, - job_id=job_id, - n=n, + slurm_job_id=slurm_job_id, + slurm_n=slurm_n, date_fmt=date_fmt, num_images=num_images, ) @@ -74,6 +86,7 @@ def main( grid=True, date_fmt=date_fmt, num_images=num_images, + multiprocess=multiprocess, ) From 0a02353623542fdbc22e918699978c4f225d6cd2 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 18:59:03 +0100 Subject: [PATCH 22/34] Change to threading backend, fix variable names --- pyvisgen/simulation/data_set.py | 96 ++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 24 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index e6e5b57..9d500b7 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -135,7 +135,7 @@ def _run(self) -> None: colour="#52ba66", ) - samp_ops_idx = 0 + samp_opts_idx = 0 for i in data: SIs = self.get_images(i) truth_fft = calc_truth_fft(SIs) @@ -150,7 +150,7 @@ def _run(self) -> None: sim_data = [] for SI in SIs: - obs = self.create_observation(samp_ops_idx) + obs = self.create_observation(samp_opts_idx) vis = vis_loop( obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"] ) @@ -162,7 +162,7 @@ def _run(self) -> None: sim_data.append(gridded) - samp_ops_idx += 1 + samp_opts_idx += 1 sim_data = np.array(sim_data) @@ -199,7 +199,7 @@ def _run(self) -> None: ) print( - f"Successfully simulated and saved {samp_ops_idx} images " + f"Successfully simulated and saved {samp_opts_idx} images " f"to '{path_msg}'!" ) @@ -261,7 +261,7 @@ def create_observation(self, i: int) -> Observation: :class:`~pyvisgen.simulation.Observation` dataclass object for image ``i``. """ - rc = self.samp_ops + rc = self.samp_opts # put the respective values inside the # pol_kwargs and field_kwargs dicts. @@ -282,7 +282,7 @@ def create_observation(self, i: int) -> Observation: dense = True obs = Observation( - **self.samp_ops_const, + **self.samp_opts_const, src_ra=rc["src_ra"][i], src_dec=rc["src_dec"][i], start_time=rc["start_time"][i], @@ -319,7 +319,7 @@ def create_sampling_rc(self, size: int) -> None: # Split sampling options into two dicts: # samps_ops_const is always the same, values in # samps_ops, however, will be drawn randomly. - self.samp_ops_const = dict( + self.samp_opts_const = dict( array_layout=self.conf["layout"][0], image_size=self.conf["img_size"][0], fov=self.conf["fov_size"], @@ -336,19 +336,19 @@ def create_sampling_rc(self, size: int) -> None: # get second half of the sampling options; # this is the randomly drawn, i.e. non-constant, part - self.samp_ops = self.draw_sampling_opts(size) + self.samp_opts = self.draw_sampling_opts(size) test_idx = tqdm( - range(self.samp_ops["src_ra"].size), + range(self.samp_opts["src_ra"].size), position=0, desc="Pre-drawing and testing sample parameters", colour="#00c1a2", leave=False, ) - self.array = layouts.get_array_layout(self.samp_ops_const["array_layout"]) + self.array = layouts.get_array_layout(self.samp_opts_const["array_layout"]) - Parallel(n_jobs=self.multiprocess)( + Parallel(n_jobs=self.multiprocess, backend="threading")( delayed(self.test_rand_opts)(i) for i in test_idx ) @@ -440,7 +440,7 @@ def draw_sampling_opts(self, size: int) -> dict: # as threshold=None should be suitable for almost all cases. # However, since threshold has to be in the field_kwargs dict # later, we need to include it here instead of inside the - # samp_ops_const dictionary. + # samp_opts_const dictionary. return samp_opts @@ -458,7 +458,7 @@ def test_rand_opts(self, i: int) -> None: """ time_steps = self.calc_time_steps(i) src_crd = SkyCoord( - self.samp_ops["src_ra"][i], self.samp_ops["src_dec"][i], unit=un.deg + self.samp_opts["src_ra"][i], self.samp_opts["src_dec"][i], unit=un.deg ) total_stations = len(self.array.st_num) @@ -486,12 +486,12 @@ def test_rand_opts(self, i: int) -> None: # we redraw the source ra and dec and scan start times, duration, # and number of scans. Then we test again by calling this function recursively. if visible_half.sum() < time_steps.size // 2: - redrawn_samp_ops = self.draw_sampling_opts(1) - self.samp_ops["src_ra"][i] = redrawn_samp_ops["src_ra"][0] - self.samp_ops["src_dec"][i] = redrawn_samp_ops["src_dec"][0] - self.samp_ops["start_time"][i] = redrawn_samp_ops["start_time"][0] - self.samp_ops["scan_duration"][i] = redrawn_samp_ops["scan_duration"][0] - self.samp_ops["num_scans"][i] = redrawn_samp_ops["num_scans"][0] + redrawn_samp_opts = self.draw_sampling_opts(1) + self.samp_opts["src_ra"][i] = redrawn_samp_opts["src_ra"][0] + self.samp_opts["src_dec"][i] = redrawn_samp_opts["src_dec"][0] + self.samp_opts["start_time"][i] = redrawn_samp_opts["start_time"][0] + self.samp_opts["scan_duration"][i] = redrawn_samp_opts["scan_duration"][0] + self.samp_opts["num_scans"][i] = redrawn_samp_opts["num_scans"][0] self.test_rand_opts(i) @@ -509,11 +509,11 @@ def calc_time_steps(self, i: int) -> Time: pyvisgen.simulation.data_set.SimulateDataSet.test_rand_opts : Tests randomized sampling parameters. """ - start_time = Time(self.samp_ops["start_time"][i].isoformat(), format="isot") - scan_separation = self.samp_ops_const["scan_separation"] - num_scans = self.samp_ops["num_scans"][i] - scan_duration = self.samp_ops["scan_duration"][i] - int_time = self.samp_ops_const["integration_time"] + start_time = Time(self.samp_opts["start_time"][i].isoformat(), format="isot") + scan_separation = self.samp_opts_const["scan_separation"] + num_scans = self.samp_opts["num_scans"][i] + scan_duration = self.samp_opts["scan_duration"][i] + int_time = self.samp_opts_const["integration_time"] time_lst = [ start_time @@ -529,3 +529,51 @@ def calc_time_steps(self, i: int) -> Time: time = Time(time_lst) return time + + @classmethod + def _get_obs_test( + cls, + config: str | Path, + /, + image_key: str = "y", + *, + date_fmt: str = DATEFMT, + ) -> tuple: # pragma: no cover + """Creates an :class:`~pyvisgen.simulation.Observation` class + object for tests. + + Parameters + ---------- + config : str or Path + Path to the config file. + image_key : str, optional + Key under which the true sky distributions are saved + in the HDF5 file. Default: ``'y'`` + date_fmt : str, optional + Format string for datetime objects. + Default: ``'%d-%m-%Y %H:%M:%S'`` + + Returns + ------- + self : SimulateDataSet + Class object. + obs : :class:`~pyvisgen.simulation.Observation` + :class:`~pyvisgen.simulation.Observation` class object. + """ + cls = cls() + cls.key = image_key + cls.date_fmt = date_fmt + cls.multiprocess = 1 + + if isinstance(config, (str, Path)): + cls.conf = read_data_set_conf(config) + elif isinstance(config, dict): + cls.conf = config + else: + raise ValueError("Expected config to be one of str, Path or dict!") + + cls.data_paths = load_bundles(cls.conf["in_path"])[0] + cls.create_sampling_rc(1) + obs = cls.create_observation(0) + + return cls, obs From 889cbf9e9b04911a18cf7bac3fa9a146c38f57a8 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 18:59:31 +0100 Subject: [PATCH 23/34] Update and fix tests --- tests/test_conf.toml | 11 +- tests/test_simulation.py | 333 +++++++++++++++++++-------------------- tests/test_utils.py | 9 +- 3 files changed, 184 insertions(+), 169 deletions(-) diff --git a/tests/test_conf.toml b/tests/test_conf.toml index b9b0e36..7502c5b 100644 --- a/tests/test_conf.toml +++ b/tests/test_conf.toml @@ -19,8 +19,17 @@ noisy = 380 corrupted = true sensitivity_cut = 1e-6 +[polarisation_options] +mode = "none" # linear, circular, or "none" +delta = 45 # phase angle +amp_ratio = 0.5 # polarisation amplitude ratio +field_order = [0.1, 0.1] # (x, y) orders of the random polarisation field +field_scale = [0, 1] # scaling of the intensity of the polarisation field +field_threshold = "none" + [bundle_options] -in_path = "./tests/data" +file_prefix="" +in_path = "/home/knierim/work/pyvisgen/tests/data" out_path_fits = "./tests/build/fits" out_path_gridded = "./tests/build/gridded" num_test_images = 1000 diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 066eb71..05a8729 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -6,8 +6,10 @@ from pyvisgen.utils.config import read_data_set_conf torch.manual_seed(1) -config = "tests/test_conf.toml" -conf = read_data_set_conf(config) + +CONFIG = "tests/test_conf.toml" + +conf = read_data_set_conf(CONFIG) out_path = Path(conf["out_path_fits"]) out_path.mkdir(parents=True, exist_ok=True) @@ -19,168 +21,162 @@ def test_get_data(): assert len(data) > 0 -def test_create_sampling_rc(): - from pyvisgen.simulation.data_set import create_sampling_rc, test_opts +class TestSimulateDataSet: + """Unit test class for :class:``pyvisgen.simulation.SimulateDataSet``.""" + + def setup_class(self): + """Set up common objects and variables for the following tests.""" + from pyvisgen.simulation.data_set import SimulateDataSet + + self.s = SimulateDataSet + + def test_run_no_slurm(self): + self.s.from_config(CONFIG) - samp_ops = create_sampling_rc(conf) - assert len(samp_ops) == 17 + def test_run_no_slurm_multiprocess(self): + self.s.from_config(CONFIG, multiprocess="all") - test_opts(samp_ops) + def test_run_no_slurm_num_images(self): + self.s.from_config(CONFIG, num_images=50) -def test_create_sampling_rc_no_seed(): - from pyvisgen.simulation.data_set import create_sampling_rc, test_opts +class TestVisLoop: + """Unit test class for :func:``pyvisgen.simulation.vis_loop``.""" - mod_conf = conf.copy() - mod_conf["seed"] = None - - samp_ops = create_sampling_rc(mod_conf) - assert len(samp_ops) == 17 - - test_opts(samp_ops) - - -def test_vis_loop(): - import torch - - import pyvisgen.fits.writer as writer - from pyvisgen.simulation.data_set import create_observation - from pyvisgen.simulation.visibility import vis_loop - from pyvisgen.utils.data import load_bundles, open_bundles - - bundles = load_bundles(conf["in_path"]) - obs = create_observation(conf) - # num_active_telescopes = test_opts(samp_ops) - data = open_bundles(bundles[0]) - SI = torch.tensor(data[0])[None] - vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"]) - - assert (vis_data[0].V_11[0]).dtype == torch.complex128 - assert (vis_data[0].V_22[0]).dtype == torch.complex128 - assert (vis_data[0].V_12[0]).dtype == torch.complex128 - assert (vis_data[0].V_21[0]).dtype == torch.complex128 - assert (vis_data[0].num).dtype == torch.float64 - assert (vis_data[0].base_num).dtype == torch.float64 - assert torch.is_tensor(vis_data[0].u) - assert torch.is_tensor(vis_data[0].v) - assert torch.is_tensor(vis_data[0].w) - assert (vis_data[0].date).dtype == torch.float64 - - # test num vis for time step 0 - # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2 - # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0] - # dunno what's going on here - # assert num_vis_theory == num_vis_calc - # - - out_path = Path(conf["out_path_fits"]) - out = out_path / Path("vis_0.fits") - hdu_list = writer.create_hdu_list(vis_data, obs) - hdu_list.writeto(out, overwrite=True) - - -def test_vis_loop_grid(): - import torch - - import pyvisgen.fits.writer as writer - from pyvisgen.simulation.data_set import create_observation - from pyvisgen.simulation.visibility import vis_loop - from pyvisgen.utils.data import load_bundles, open_bundles - - bundles = load_bundles(conf["in_path"]) - obs = create_observation(conf) - # num_active_telescopes = test_opts(samp_ops) - data = open_bundles(bundles[0]) - SI = torch.tensor(data[0])[None] - vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode="grid") - - assert (vis_data[0].V_11[0]).dtype == torch.complex128 - assert (vis_data[0].V_22[0]).dtype == torch.complex128 - assert (vis_data[0].V_12[0]).dtype == torch.complex128 - assert (vis_data[0].V_21[0]).dtype == torch.complex128 - assert (vis_data[0].num).dtype == torch.float64 - assert (vis_data[0].base_num).dtype == torch.float64 - assert torch.is_tensor(vis_data[0].u) - assert torch.is_tensor(vis_data[0].v) - assert torch.is_tensor(vis_data[0].w) - assert (vis_data[0].date).dtype == torch.float64 - - # test num vis for time step 0 - # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2 - # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0] - # dunno what's going on here - # assert num_vis_theory == num_vis_calc - # - - out_path = Path(conf["out_path_fits"]) - out = out_path / Path("vis_0.fits") - hdu_list = writer.create_hdu_list(vis_data, obs) - hdu_list.writeto(out, overwrite=True) - - -def test_vis_loop_batch_size_auto(): - import torch - - from pyvisgen.simulation.data_set import create_observation - from pyvisgen.simulation.visibility import vis_loop - from pyvisgen.utils.data import load_bundles, open_bundles - - bundles = load_bundles(conf["in_path"]) - obs = create_observation(conf) - data = open_bundles(bundles[0]) - SI = torch.tensor(data[0])[None] - - vis_data = vis_loop( - obs, - SI, - noisy=conf["noisy"], - mode=conf["mode"], - batch_size="auto", - ) - - assert (vis_data[0].V_11[0]).dtype == torch.complex128 - assert (vis_data[0].V_22[0]).dtype == torch.complex128 - assert (vis_data[0].V_12[0]).dtype == torch.complex128 - assert (vis_data[0].V_21[0]).dtype == torch.complex128 - assert (vis_data[0].num).dtype == torch.float64 - assert (vis_data[0].base_num).dtype == torch.float64 - assert torch.is_tensor(vis_data[0].u) - assert torch.is_tensor(vis_data[0].v) - assert torch.is_tensor(vis_data[0].w) - assert (vis_data[0].date).dtype == torch.float64 - - -def test_vis_loop_batch_size_invalid(): - import torch - - from pyvisgen.simulation.data_set import create_observation - from pyvisgen.simulation.visibility import vis_loop - from pyvisgen.utils.data import load_bundles, open_bundles - - bundles = load_bundles(conf["in_path"]) - obs = create_observation(conf) - data = open_bundles(bundles[0]) - SI = torch.tensor(data[0])[None] - - assert_raises( - ValueError, - vis_loop, - obs, - SI, - noisy=conf["noisy"], - mode=conf["mode"], - batch_size="abc", - ) - - assert_raises( - ValueError, - vis_loop, - obs, - SI, - noisy=conf["noisy"], - mode=conf["mode"], - batch_size=20.0, - ) + def setup_class(self): + """Set up common objects and variables for the following tests.""" + from pyvisgen.simulation.data_set import SimulateDataSet + + self.s = SimulateDataSet + + def test_vis_loop(self): + import pyvisgen.fits.writer as writer + from pyvisgen.simulation.visibility import vis_loop + from pyvisgen.utils.data import load_bundles, open_bundles + + bundles = load_bundles(conf["in_path"]) + _, obs = self.s._get_obs_test(CONFIG) + + data = open_bundles(bundles[0]) + SI = torch.tensor(data[0])[None] + vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"]) + + assert (vis_data[0].V_11[0]).dtype == torch.complex128 + assert (vis_data[0].V_22[0]).dtype == torch.complex128 + assert (vis_data[0].V_12[0]).dtype == torch.complex128 + assert (vis_data[0].V_21[0]).dtype == torch.complex128 + assert (vis_data[0].num).dtype == torch.float64 + assert (vis_data[0].base_num).dtype == torch.float64 + assert torch.is_tensor(vis_data[0].u) + assert torch.is_tensor(vis_data[0].v) + assert torch.is_tensor(vis_data[0].w) + assert (vis_data[0].date).dtype == torch.float64 + + # test num vis for time step 0 + # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2 + # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0] + # dunno what's going on here + # assert num_vis_theory == num_vis_calc + # + + out_path = Path(conf["out_path_fits"]) + out = out_path / Path("vis_0.fits") + hdu_list = writer.create_hdu_list(vis_data, obs) + hdu_list.writeto(out, overwrite=True) + + def test_vis_loop_grid(self): + import pyvisgen.fits.writer as writer + from pyvisgen.simulation.visibility import vis_loop + from pyvisgen.utils.data import load_bundles, open_bundles + + bundles = load_bundles(conf["in_path"]) + _, obs = self.s._get_obs_test(CONFIG) + + data = open_bundles(bundles[0]) + SI = torch.tensor(data[0])[None] + vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode="grid") + + assert (vis_data[0].V_11[0]).dtype == torch.complex128 + assert (vis_data[0].V_22[0]).dtype == torch.complex128 + assert (vis_data[0].V_12[0]).dtype == torch.complex128 + assert (vis_data[0].V_21[0]).dtype == torch.complex128 + assert (vis_data[0].num).dtype == torch.float64 + assert (vis_data[0].base_num).dtype == torch.float64 + assert torch.is_tensor(vis_data[0].u) + assert torch.is_tensor(vis_data[0].v) + assert torch.is_tensor(vis_data[0].w) + assert (vis_data[0].date).dtype == torch.float64 + + # test num vis for time step 0 + # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2 + # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0] + # dunno what's going on here + # assert num_vis_theory == num_vis_calc + # + + out_path = Path(conf["out_path_fits"]) + out = out_path / Path("vis_0.fits") + hdu_list = writer.create_hdu_list(vis_data, obs) + hdu_list.writeto(out, overwrite=True) + + def test_vis_loop_batch_size_auto(self): + from pyvisgen.simulation.visibility import vis_loop + from pyvisgen.utils.data import load_bundles, open_bundles + + bundles = load_bundles(conf["in_path"]) + _, obs = self.s._get_obs_test(CONFIG) + + data = open_bundles(bundles[0]) + SI = torch.tensor(data[0])[None] + + vis_data = vis_loop( + obs, + SI, + noisy=conf["noisy"], + mode=conf["mode"], + batch_size="auto", + ) + + assert (vis_data[0].V_11[0]).dtype == torch.complex128 + assert (vis_data[0].V_22[0]).dtype == torch.complex128 + assert (vis_data[0].V_12[0]).dtype == torch.complex128 + assert (vis_data[0].V_21[0]).dtype == torch.complex128 + assert (vis_data[0].num).dtype == torch.float64 + assert (vis_data[0].base_num).dtype == torch.float64 + assert torch.is_tensor(vis_data[0].u) + assert torch.is_tensor(vis_data[0].v) + assert torch.is_tensor(vis_data[0].w) + assert (vis_data[0].date).dtype == torch.float64 + + def test_vis_loop_batch_size_invalid(self): + from pyvisgen.simulation.visibility import vis_loop + from pyvisgen.utils.data import load_bundles, open_bundles + + bundles = load_bundles(conf["in_path"]) + _, obs = self.s._get_obs_test(CONFIG) + + data = open_bundles(bundles[0]) + SI = torch.tensor(data[0])[None] + + assert_raises( + ValueError, + vis_loop, + obs, + SI, + noisy=conf["noisy"], + mode=conf["mode"], + batch_size="abc", + ) + + assert_raises( + ValueError, + vis_loop, + obs, + SI, + noisy=conf["noisy"], + mode=conf["mode"], + batch_size=20.0, + ) class TestPolarisation: @@ -188,10 +184,19 @@ class TestPolarisation: def setup_class(self): """Set up common objects and variables for the following tests.""" - from pyvisgen.simulation.data_set import create_observation + from pyvisgen.simulation.data_set import SimulateDataSet + from pyvisgen.simulation.observation import ( + DEFAULT_FIELD_KWARGS, + DEFAULT_POL_KWARGS, + ) from pyvisgen.simulation.visibility import Polarisation - self.obs = create_observation(conf) + _, self.obs = SimulateDataSet._get_obs_test(conf) + + # set to default kwargs for tests, otherwise + # some parameters of the config would be None + self.obs.field_kwargs = DEFAULT_FIELD_KWARGS + self.obs.pol_kwargs = DEFAULT_POL_KWARGS self.SI = torch.zeros((100, 100)) self.SI[25::25, 25::25] = 1 @@ -408,9 +413,3 @@ def test_polarisation_field_threshold(self): # expected to raise an AssertionError assert_raises(AssertionError, assert_array_equal, pf, pf_ref) - - -def test_simulate_data_set_no_slurm(): - from pyvisgen.simulation.data_set import simulate_data_set - - simulate_data_set(config) diff --git a/tests/test_utils.py b/tests/test_utils.py index 35131a6..e47f966 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,7 +3,7 @@ def test_read_config(): conf = read_data_set_conf("config/default_data_set.toml") - assert type(conf) == dict + assert type(conf) is dict assert list(conf.keys()) == [ "mode", "device", @@ -24,6 +24,12 @@ def test_read_config(): "corrupted", "noisy", "sensitivty_cut", + "polarisation", + "pol_delta", + "pol_amp_ratio", + "field_order", + "field_scale", + "field_threshold", "num_test_images", "bundle_size", "train_valid_split", @@ -33,6 +39,7 @@ def test_read_config(): "in_path", "out_path_fits", "out_path_gridded", + "file_prefix", ] From ba0ad293da86429c9ffcd97c4cb43d2f2b8bcef6 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 19:00:07 +0100 Subject: [PATCH 24/34] Update changelog --- docs/changes/53.maintenance.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changes/53.maintenance.rst b/docs/changes/53.maintenance.rst index 290b8a4..bf99a92 100644 --- a/docs/changes/53.maintenance.rst +++ b/docs/changes/53.maintenance.rst @@ -1,6 +1,6 @@ - Complete rewrite of dataset creation routine ``pyvisgen.simulation.data_set.SimulateDataSet`` - Accessible using a classmethod to load a config file - - Add optional multiprocessing support + - Add optional multithreading support - Draw and fully test parameters before simulation loop. Previously this was done in the loop and tests were only performed for two time steps - Support for polarization - Add new default config file for new dataset creation routine @@ -8,3 +8,4 @@ - Allow passing HDF5 key in ``pyvisgen.utils.data.open_bundles`` - Restructure ``pyvisgen.gridding`` module by adding a ``utils`` submodule that contains all utility functions that previously were in the ``gridder`` submodule - Also fix parts of the utility functions +- Update and fix tests From 482d2c2550631612cb5512b943fb55f1d1106e47 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 19:06:28 +0100 Subject: [PATCH 25/34] Add joblib dependency --- environment.yml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 05effcf..babb153 100644 --- a/environment.yml +++ b/environment.yml @@ -13,6 +13,7 @@ dependencies: - pip - towncrier - jupyter + - joblib - pytest - pytest-cov - pytest-runner diff --git a/pyproject.toml b/pyproject.toml index 534f4d1..ade495a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "click", "h5py", "ipython", + "joblib", "jupyter", "matplotlib", "natsort", From 89c5a20e562e2df3fab9413c28aedd801ae2f63a Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 28 Feb 2025 19:11:46 +0100 Subject: [PATCH 26/34] Fix test_conf.toml --- tests/test_conf.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_conf.toml b/tests/test_conf.toml index 7502c5b..1cff0e6 100644 --- a/tests/test_conf.toml +++ b/tests/test_conf.toml @@ -29,7 +29,7 @@ field_threshold = "none" [bundle_options] file_prefix="" -in_path = "/home/knierim/work/pyvisgen/tests/data" +in_path = "./tests/data" out_path_fits = "./tests/build/fits" out_path_gridded = "./tests/build/gridded" num_test_images = 1000 From 50d9612507e713fe8edd28c581c6cabcb8e21020 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Mon, 3 Mar 2025 08:41:18 +0100 Subject: [PATCH 27/34] Allow dicts as config inputs --- pyvisgen/simulation/data_set.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index 9d500b7..b161ca5 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -33,7 +33,7 @@ def __init__(self): @classmethod def from_config( cls, - config: str | Path, + config: str | Path | dict, /, image_key: str = "y", *, @@ -49,8 +49,9 @@ def from_config( Parameters ---------- - config : str or Path - Path to the config file. + config : str or Path or dict + Path to the config file or dict containing the configuration + parameters. image_key : str, optional Key under which the true sky distributions are saved in the HDF5 file. Default: ``'y'`` @@ -88,7 +89,12 @@ def from_config( if multiprocess in ["all"]: cls.multiprocess = -1 - cls.conf = read_data_set_conf(config) + if isinstance(config, (str, Path)): + cls.conf = read_data_set_conf(config) + elif isinstance(config, dict): + cls.conf = config + else: + raise ValueError("Expected config to be one of str, Path or dict!") print("Simulation Config:\n", cls.conf) From 5625b3e02579cec6be8359519a51ad827039c097 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Mon, 3 Mar 2025 08:41:48 +0100 Subject: [PATCH 28/34] Add amp/phase test for dataset creation --- tests/test_simulation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 05a8729..759c5b9 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -39,6 +39,11 @@ def test_run_no_slurm_multiprocess(self): def test_run_no_slurm_num_images(self): self.s.from_config(CONFIG, num_images=50) + def test_run_no_slurm_amp_phase_false(self): + config = conf + config["amp_phase"] = False + self.s.from_config(config, multiprocess="all") + class TestVisLoop: """Unit test class for :func:``pyvisgen.simulation.vis_loop``.""" From 35532d74873bc63c8fee65391230e552d2b35616 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Mon, 3 Mar 2025 21:49:53 +0100 Subject: [PATCH 29/34] Update tests, don't cover slurm option --- pyvisgen/simulation/data_set.py | 4 ++-- tests/test_simulation.py | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index b161ca5..1d267d5 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -121,7 +121,7 @@ def from_config( [len(cls.get_images(bundle)) for bundle in data_bundles] ) - if slurm: + if slurm: # pragma: no cover cls._run_slurm() else: # draw parameters beforehand, i.e. outside the simulation loop @@ -209,7 +209,7 @@ def _run(self) -> None: f"to '{path_msg}'!" ) - def _run_slurm(self) -> None: + def _run_slurm(self) -> None: # pragma: no cover """Runs the simulation in slurm and saves visibility data as individual FITS files. """ diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 759c5b9..56cba15 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -40,9 +40,25 @@ def test_run_no_slurm_num_images(self): self.s.from_config(CONFIG, num_images=50) def test_run_no_slurm_amp_phase_false(self): - config = conf + config = conf.copy() config["amp_phase"] = False - self.s.from_config(config, multiprocess="all") + self.s.from_config(config) + + def test_raise_valerr_conf_path(self): + assert_raises(ValueError, self.s.from_config, 42) + + def test_run_dense(self): + config = conf.copy() + config["mode"] = "dense" + assert_raises(ValueError, self.s.from_config, config) + + def test_run_polarisation(self): + config = conf.copy() + config["polarisation"] = "linear" + self.s.from_config(config) + + def test_run_no_gridding(self): + self.s.from_config(CONFIG, grid=False) class TestVisLoop: From b9106e9f6b2f96d7c5e16f5a2f5c8995547d6d2b Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Mon, 3 Mar 2025 22:26:48 +0100 Subject: [PATCH 30/34] Fix feed rot computation shapes --- pyvisgen/simulation/scan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py index f564f6e..0c7f4d2 100644 --- a/pyvisgen/simulation/scan.py +++ b/pyvisgen/simulation/scan.py @@ -152,8 +152,8 @@ def calc_feed_rotation( X2 : :func:`~torch.tensor` Fourier kernel with the applied feed rotation. """ - q1 = torch.cat((bas[11], bas[12]))[..., None] - q2 = torch.cat((bas[14], bas[15]))[..., None] + q1 = bas[13][..., None] + q2 = bas[16][..., None] if polarisation == "linear": X1[..., 0, 0] *= torch.cos(q1) From 29b3d318e7db6abd60405ba94746896d424b6e0d Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Tue, 11 Mar 2025 14:15:14 +0100 Subject: [PATCH 31/34] Capture empty bundles --- pyvisgen/simulation/data_set.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index 1d267d5..ee75743 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -121,6 +121,12 @@ def from_config( [len(cls.get_images(bundle)) for bundle in data_bundles] ) + if isinstance(cls.num_images, (int, float)): + if int(cls.num_images) == 0: + raise ValueError( + "No images found in bundles! Please check your input path!" + ) + if slurm: # pragma: no cover cls._run_slurm() else: From 160aead8db179689faaf9719048d3c4515074f44 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 14 Mar 2025 09:37:59 +0100 Subject: [PATCH 32/34] Optimize random parameter sampling and testing speed --- pyproject.toml | 1 + pyvisgen/simulation/data_set.py | 201 +++++++++++++++++++++++--------- 2 files changed, 144 insertions(+), 58 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ade495a..e5f154c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "natsort", "numpy", "pandas", + "rich", "scipy", "toma", "toml", diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index ee75743..3e62310 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -4,9 +4,9 @@ import numpy as np import torch from astropy import units as un -from astropy.coordinates import AltAz, EarthLocation, SkyCoord from astropy.time import Time from joblib import Parallel, delayed +from rich import print from tqdm.autonotebook import tqdm import pyvisgen.fits.writer as writer @@ -25,6 +25,15 @@ DATEFMT = "%d-%m-%Y %H:%M:%S" +JD_EPOCH = Time("J2000.0").jd # Reference epoch (J2000.0) +DAYS_PER_CENTURY = 36525.0 # Number of days in a Julian century +GST_COEFFS = { + "const": 280.4606, + "linear": 360.985647366, + "quadratic": 0.000387933, + "cubic": -2.583e-8, +} + class SimulateDataSet: def __init__(self): @@ -44,6 +53,7 @@ def from_config( date_fmt: str = DATEFMT, num_images: int | None = None, multiprocess: int | str = 1, + device="cuda" if torch.cuda.is_available() else "cpu", ): """Simulates data from parameters in a config file. @@ -75,6 +85,9 @@ def from_config( Number of jobs to use in multiprocessing during the sampling and testing phase. If -1 or ``'all'``, use all available cores. Default: 1 + device : str, optional + Device to allocate :func:`~torch.tensor`s on. + Default: ``'cuda'`` if cuda is available, otherwise ``'cpu'`` """ cls = cls() cls.key = image_key @@ -85,6 +98,7 @@ def from_config( cls.date_fmt = date_fmt cls.num_images = num_images cls.multiprocess = multiprocess + cls.device = device if multiprocess in ["all"]: cls.multiprocess = -1 @@ -129,6 +143,7 @@ def from_config( if slurm: # pragma: no cover cls._run_slurm() + pass else: # draw parameters beforehand, i.e. outside the simulation loop cls.create_sampling_rc(cls.num_images) @@ -295,8 +310,8 @@ def create_observation(self, i: int) -> Observation: obs = Observation( **self.samp_opts_const, - src_ra=rc["src_ra"][i], - src_dec=rc["src_dec"][i], + src_ra=rc["src_ra"][i].cpu().numpy(), + src_dec=rc["src_dec"][i].cpu().numpy(), start_time=rc["start_time"][i], scan_duration=int(rc["scan_duration"][i]), num_scans=int(rc["num_scans"][i]), @@ -351,14 +366,20 @@ def create_sampling_rc(self, size: int) -> None: self.samp_opts = self.draw_sampling_opts(size) test_idx = tqdm( - range(self.samp_opts["src_ra"].size), + range(self.samp_opts["src_ra"].size()[0]), position=0, desc="Pre-drawing and testing sample parameters", colour="#00c1a2", leave=False, ) + # get array for later use and also get lon/lat conversion self.array = layouts.get_array_layout(self.samp_opts_const["array_layout"]) + self.array_lat, self.array_lon = self._geocentric_to_spherical( + self.array.x.to(self.device), + self.array.y.to(self.device), + self.array.z.to(self.device), + ) Parallel(n_jobs=self.multiprocess, backend="threading")( delayed(self.test_rand_opts)(i) for i in test_idx @@ -409,7 +430,7 @@ def draw_sampling_opts(self, size: int) -> dict: # if polarisation is None, we don't need to enter the # conditional below, so we set delta, amp_ratio, field_order, # and field_scale to None. - delta, amp_ratio, field_order, field_scale = np.full((4, size), None) + delta, amp_ratio, field_order, field_scale = np.full((4, size), np.NaN) if self.conf["polarisation"]: if self.conf["pol_delta"]: @@ -437,15 +458,15 @@ def draw_sampling_opts(self, size: int) -> dict: field_scale = np.stack((a, b), axis=1) samp_opts = dict( - src_ra=ra, - src_dec=dec, + src_ra=torch.from_numpy(ra).to(self.device), + src_dec=torch.from_numpy(dec).to(self.device), start_time=scan_start, - scan_duration=scan_duration, - num_scans=num_scans, - delta=delta, - amp_ratio=amp_ratio, - order=field_order, - scale=field_scale, + scan_duration=torch.from_numpy(scan_duration).to(self.device), + num_scans=torch.from_numpy(num_scans).to(self.device), + delta=torch.from_numpy(delta).to(self.device), + amp_ratio=torch.from_numpy(amp_ratio).to(self.device), + order=torch.from_numpy(field_order).to(self.device), + scale=torch.from_numpy(field_scale).to(self.device), threshold=self.conf["field_threshold"], ) # NOTE: We don't need to draw random values for threshold @@ -468,44 +489,48 @@ def test_rand_opts(self, i: int) -> None: i : int Index of the current set of sampling parameters. """ - time_steps = self.calc_time_steps(i) - src_crd = SkyCoord( - self.samp_opts["src_ra"][i], self.samp_opts["src_dec"][i], unit=un.deg - ) + # Loop until a valid observation is found + while True: + time_steps = self.calc_time_steps(i) + ra = self.samp_opts["src_ra"][i] + dec = self.samp_opts["src_dec"][i] + + # calculate Greenwich sidereal time + jd = Time(time_steps).jd + + jd_diff = jd - JD_EPOCH + T = jd_diff / DAYS_PER_CENTURY + gst = ( + GST_COEFFS["const"] + + GST_COEFFS["linear"] * jd_diff + + T * (GST_COEFFS["quadratic"] + T * GST_COEFFS["cubic"]) + ) + gst = gst % 360 - total_stations = len(self.array.st_num) + # Compute local sidereal time + lst = (gst[:, np.newaxis] + self.array_lon.cpu().numpy()) % 360 + lst = torch.tensor(lst, device=self.device) - locations = EarthLocation.from_geocentric( - x=self.array.x, - y=self.array.y, - z=self.array.z, - unit=un.m, - ) + alt = self._compute_altitude(ra, dec, lst) - altaz_frames = AltAz(obstime=time_steps[:, None], location=locations[None]) - src_alt = src_crd.transform_to(altaz_frames).alt.degree - - # check which stations can see the source for each time step - visible = np.logical_and( - self.array.el_low.numpy() <= src_alt, src_alt <= self.array.el_high.numpy() - ) + # Check visibility + visible = torch.logical_and( + self.array.el_low.to(self.device) <= alt, + alt <= self.array.el_high.to(self.device), + ) + visible_count_per_t = visible.sum(dim=1) + visible_half = visible_count_per_t > len(self.array.st_num) // 2 - # We want at least half of the telescopes to see the source - visible_count_per_t = visible.sum(axis=1) - visible_half = visible_count_per_t > total_stations // 2 + # Exit the loop if the condition is met + if visible_half.sum().item() >= time_steps.size // 2: + break - # If the source is not seen by half the telescopes half of the observation time, - # we redraw the source ra and dec and scan start times, duration, - # and number of scans. Then we test again by calling this function recursively. - if visible_half.sum() < time_steps.size // 2: + # Redraw sampling parameters if the condition is not met redrawn_samp_opts = self.draw_sampling_opts(1) - self.samp_opts["src_ra"][i] = redrawn_samp_opts["src_ra"][0] - self.samp_opts["src_dec"][i] = redrawn_samp_opts["src_dec"][0] - self.samp_opts["start_time"][i] = redrawn_samp_opts["start_time"][0] - self.samp_opts["scan_duration"][i] = redrawn_samp_opts["scan_duration"][0] - self.samp_opts["num_scans"][i] = redrawn_samp_opts["num_scans"][0] + keys = ["src_ra", "src_dec", "start_time", "scan_duration", "num_scans"] - self.test_rand_opts(i) + for key in keys: + self.samp_opts[key][i] = redrawn_samp_opts[key][0] def calc_time_steps(self, i: int) -> Time: """Calculates time steps for given sampling @@ -516,31 +541,91 @@ def calc_time_steps(self, i: int) -> Time: i : int Index of the current set of sampling parameters. + Returns + ------- + time_steps : :class:`~astropy.time.Time` + Observation time steps. + See Also -------- pyvisgen.simulation.data_set.SimulateDataSet.test_rand_opts : Tests randomized sampling parameters. """ start_time = Time(self.samp_opts["start_time"][i].isoformat(), format="isot") - scan_separation = self.samp_opts_const["scan_separation"] + num_scans = self.samp_opts["num_scans"][i] + scan_separation = self.samp_opts_const["scan_separation"] scan_duration = self.samp_opts["scan_duration"][i] + int_time = self.samp_opts_const["integration_time"] - time_lst = [ + time_steps = ( start_time - + scan_separation * i * un.second - + i * scan_duration * un.second - + j * int_time * un.second - for i in range(num_scans) - for j in range(int(scan_duration / int_time) + 1) - ] - # +1 because t_1 is the stop time of t_0. - # In order to save computing power we take - # one time more to complete interval - time = Time(time_lst) - - return time + + torch.arange(num_scans)[:, None] * scan_separation * un.second + + torch.arange(int(scan_duration / int_time) + 1)[None, :] + * int_time + * un.second + ).flatten() + + return Time(time_steps) + + def _geocentric_to_spherical( + self, x: torch.tensor, y: torch.tensor, z: torch.tensor + ) -> torch.tensor: + """Convert geocentric coordinates to lon/lat. + + Parameters + ---------- + x, y, z : :func:`~torch.tensor` + Cartesian coordinates in the geocentric coordinate + system. + + Returns + ------- + lon, lat : :func:`~torch.tensor` + Longitude and latitude representation of the + geocentric coordinates. + """ + r = torch.sqrt(x**2 + y**2 + z**2) + lat = torch.rad2deg(torch.arcsin(z / r)) + lon = torch.rad2deg(torch.atan2(y, x)) + + return lat, lon + + def _compute_altitude( + self, ra: torch.tensor, dec: torch.tensor, lst: torch.tensor + ) -> torch.tensor: + """Computes altitude for a given RA/DEC, and local sidereal time (LST). + + Parameters + ---------- + ra, dec : :func:`~torch.tensor` + Right ascension and declination of the source. + lst : :func:`~torch.tensor` + Local sidereal time of the source. + + Returns + ------- + alt_rad : :func:`~torch.tensor` + Altitude of the source. + """ + ra_rad = torch.deg2rad(ra) + dec_rad = torch.deg2rad(dec) + lst_rad = torch.deg2rad(lst) + lat_rad = torch.deg2rad(self.array_lat) + + ha_rad = lst_rad - ra_rad + + # Compute altitude using spherical trigonometry + sin_alt = torch.sin(dec_rad) * torch.sin(lat_rad) + torch.cos( + dec_rad + ) * torch.cos(lat_rad) * torch.cos(ha_rad) + + # limit sin_alt to (-1, 1) to ensure numerical stability + # in the arcsin below + sin_alt = torch.clamp(sin_alt, -1, 1) + alt_rad = torch.arcsin(sin_alt) + return torch.rad2deg(alt_rad) @classmethod def _get_obs_test( From e18735d484110f04ea1bd0d79f13b9e916ea57b6 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 14 Mar 2025 09:54:17 +0100 Subject: [PATCH 33/34] Get device from config --- pyvisgen/simulation/data_set.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py index 3e62310..75fe88c 100644 --- a/pyvisgen/simulation/data_set.py +++ b/pyvisgen/simulation/data_set.py @@ -53,7 +53,6 @@ def from_config( date_fmt: str = DATEFMT, num_images: int | None = None, multiprocess: int | str = 1, - device="cuda" if torch.cuda.is_available() else "cpu", ): """Simulates data from parameters in a config file. @@ -85,9 +84,6 @@ def from_config( Number of jobs to use in multiprocessing during the sampling and testing phase. If -1 or ``'all'``, use all available cores. Default: 1 - device : str, optional - Device to allocate :func:`~torch.tensor`s on. - Default: ``'cuda'`` if cuda is available, otherwise ``'cpu'`` """ cls = cls() cls.key = image_key @@ -98,7 +94,6 @@ def from_config( cls.date_fmt = date_fmt cls.num_images = num_images cls.multiprocess = multiprocess - cls.device = device if multiprocess in ["all"]: cls.multiprocess = -1 @@ -112,6 +107,8 @@ def from_config( print("Simulation Config:\n", cls.conf) + cls.device = cls.conf["device"] + if grid: cls.out_path = Path(cls.conf["out_path_gridded"]) else: From 56cf15a9ab862ef62b7e1696a3a9b233b9bfbda7 Mon Sep 17 00:00:00 2001 From: Anno Knierim Date: Fri, 14 Mar 2025 10:56:32 +0100 Subject: [PATCH 34/34] Fix tensor construction and cloning --- pyvisgen/simulation/scan.py | 4 ++-- pyvisgen/simulation/visibility.py | 25 ++++++++++++++----------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py index 0ba21f3..7816c4a 100644 --- a/pyvisgen/simulation/scan.py +++ b/pyvisgen/simulation/scan.py @@ -180,8 +180,8 @@ def calc_feed_rotation( xb[..., 1, 0] = X2[..., 1, 0] * torch.exp(1j * q2) xb[..., 1, 1] = X2[..., 1, 1] * torch.exp(-1j * q2) - X1 = xa.clone() - X2 = xb.clone() + X1 = xa.detach().clone() + X2 = xb.detach().clone() del xa, xb diff --git a/pyvisgen/simulation/visibility.py b/pyvisgen/simulation/visibility.py index 3d3e421..c89acc1 100644 --- a/pyvisgen/simulation/visibility.py +++ b/pyvisgen/simulation/visibility.py @@ -161,17 +161,20 @@ def __init__( **field_kwargs, ) + if isinstance(delta, float): + delta = torch.tensor(delta) + self.delta = delta if amp_ratio and (amp_ratio >= 0): ax2 = amp_ratio else: - ax2 = torch.rand(1) + ax2 = torch.rand(1).to(self.device) ay2 = 1 - ax2 - self.ax2 = self.SI[..., 0].clone() * ax2 - self.ay2 = self.SI[..., 0].clone() * ay2 + self.ax2 = self.SI[..., 0].detach().clone().to(self.device) * ax2 + self.ay2 = self.SI[..., 0].detach().clone().to(self.device) * ay2 else: self.ax2 = self.SI[..., 0] self.ay2 = torch.zeros_like(self.ax2) @@ -198,13 +201,13 @@ def linear(self) -> None: 2 * torch.sqrt(self.ax2) * torch.sqrt(self.ay2) - * torch.cos(torch.deg2rad(torch.tensor(self.delta))) + * torch.cos(torch.deg2rad(self.delta)) ) self.I[..., 3] = ( -2 * torch.sqrt(self.ax2) * torch.sqrt(self.ay2) - * torch.sin(torch.deg2rad(torch.tensor(self.delta))) + * torch.sin(torch.deg2rad(self.delta)) ) def circular(self) -> None: @@ -225,13 +228,13 @@ def circular(self) -> None: 2 * torch.sqrt(self.ax2) * torch.sqrt(self.ay2) - * torch.cos(torch.deg2rad(torch.tensor(self.delta))) + * torch.cos(torch.deg2rad(self.delta)) ) self.I[..., 2] = ( -2 * torch.sqrt(self.ax2) * torch.sqrt(self.ay2) - * torch.sin(torch.deg2rad(torch.tensor(self.delta))) + * torch.sin(torch.deg2rad(self.delta)) ) self.I[..., 3] = self.ax2 - self.ay2 @@ -244,13 +247,13 @@ def dop(self) -> None: self.I[..., 2] *= self.polarisation_field self.I[..., 3] *= self.polarisation_field - dop_I = self.I[..., 0].real.clone() + dop_I = self.I[..., 0].real.detach().clone() dop_I[~mask] = float("nan") - dop_Q = self.I[..., 1].real.clone() + dop_Q = self.I[..., 1].real.detach().clone() dop_Q[~mask] = float("nan") - dop_U = self.I[..., 2].real.clone() + dop_U = self.I[..., 2].real.detach().clone() dop_U[~mask] = float("nan") - dop_V = self.I[..., 3].real.clone() + dop_V = self.I[..., 3].real.detach().clone() dop_V[~mask] = float("nan") self.lin_dop = torch.sqrt(dop_Q**2 + dop_U**2) / dop_I