From 6cb0abd5a26a8bd4e8ae5e7efc89ed5b0bff7f0e Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Tue, 28 Jan 2025 13:09:00 +0100
Subject: [PATCH 01/34] Rewrite data set creation routine

---
 pyvisgen/simulation/data_set.py    | 418 ++++++++++++++++++++++++++++-
 pyvisgen/simulation/observation.py |  51 ++--
 pyvisgen/utils/data.py             |   4 +-
 3 files changed, 434 insertions(+), 39 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index e75dcdb..f0d464a 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -7,7 +7,7 @@
 from astropy import units as un
 from astropy.coordinates import AltAz, Angle, EarthLocation, SkyCoord
 from astropy.time import Time
-from tqdm import tqdm
+from tqdm.autonotebook import tqdm
 
 import pyvisgen.fits.writer as writer
 import pyvisgen.layouts.layouts as layouts
@@ -17,6 +17,422 @@
 from pyvisgen.utils.data import load_bundles, open_bundles
 
 
+class SimulateDataSet:
+    def __init__(self):
+        pass
+
+    @classmethod
+    def from_config(
+        cls,
+        config: str | Path,
+        slurm=False,
+        job_id=None,
+        n=None,
+        image_key="y",
+    ) -> None:
+        """
+        Parameters
+        ----------
+        config : str or Path
+            path to config file
+        slurm : bool
+            True, if slurm is used
+        job_id : int
+            job_id, given by slurm
+        n : int
+            running index
+        """
+        cls = cls()
+        cls.job_id = job_id
+        cls.n = n
+        cls.key = image_key
+
+        cls.conf = read_data_set_conf(config)
+
+        cls.out_path = Path(cls.conf["out_path_fits"])
+        cls.out_path.mkdir(parents=True, exist_ok=True)
+
+        cls.data = load_bundles(cls.conf["in_path"])
+
+        if slurm:
+            cls._run_slurm()
+        else:
+            cls._run()
+
+        return cls
+
+    @classmethod
+    def from_params(
+        cls,
+        images,
+        ra,
+        dec,
+        layout,
+        img_size,
+        fov,
+        int_time,
+        scan_start,
+        scan_duration,
+        num_scans,
+        scan_sep,
+        ref_freq,
+        freq_offsets,
+        bandwidths,
+        *,
+        corrupted=False,
+        noisy=0,
+        sensitivty_cut=1e-6,
+        mode="full",
+        grid_size=128,
+        grid_fov=100,
+        amp_phase=False,
+        num_test_images=500,
+        bundle_size=100,
+        train_valid_split=0.2,
+        seed=42,
+        device="cuda",
+        input_path="skies/",
+        out_path_fits="build/uvfits",
+        out_path_gridded="build/gridded",
+        slurm=False,
+        job_id=None,
+        n=None,
+    ) -> None:
+        """ """
+        cls.job_id = job_id
+        cls.n = n
+
+        cls.conf = {
+            "mode": mode,
+            "device": device,
+            "seed": seed,
+            "layout": (layout,),
+            "img_size": (img_size,),
+            "fov_center_ra": ([ra, ra],),
+            "fov_center_dec": ([dec, dec],),
+            "fov_size": fov,
+            "corr_int_time": int_time,
+            "scan_start": scan_start,
+            "scan_duration": scan_duration,
+            "num_scans": num_scans,
+            "scan_separation": scan_sep,
+            "ref_frequency": ref_freq,
+            "frequency_offsets": freq_offsets,
+            "bandwidths": bandwidths,
+            "corrupted": corrupted,
+            "noisy": noisy,
+            "sensitivty_cut": sensitivty_cut,
+            "num_test_images": num_test_images,
+            "bundle_size": bundle_size,
+            "train_valid_split": train_valid_split,
+            "grid_size": grid_size,
+            "grid_fov": grid_fov,
+            "amp_phase": amp_phase,
+            "in_path": input_path,
+            "out_path_fits": out_path_fits,
+            "out_path_gridded": out_path_gridded,
+        }
+
+        cls.out_path = Path(cls.conf["out_path_fits"])
+
+        if not cls.out_path.isdir():
+            cls.out_path.mkdir(parents=True, exist_ok=True)
+
+        cls.data = load_bundles(cls.conf["in_path"])
+
+        if slurm:
+            cls._run_slurm()
+        else:
+            cls._run()
+
+    def _run_slurm(self):
+        """ """
+        job_id = int(self.job_id + self.n * 500)
+        out = self.out_path / Path("vis_" + str(job_id) + ".fits")
+
+        imgs_bundle = len(open_bundles(self.data[0]))
+        bundle = torch.div(job_id, imgs_bundle, rounding_mode="floor")
+        image = job_id - bundle * imgs_bundle
+
+        SI = torch.tensor(open_bundles(self.data[bundle])[image])
+
+        if len(SI.shape) == 2:
+            SI = SI.unsqueeze(0)
+
+        obs = self.create_observation(self.conf)
+        vis_data = vis_loop(obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"])
+        hdu_list = writer.create_hdu_list(vis_data, obs)
+        hdu_list.writeto(out, overwrite=True)
+
+    def _run(self):
+        """ """
+        for i in tqdm(range(len(self.data))):
+            SIs = self.get_images(self.data, i)
+
+            for j, SI in enumerate(tqdm(SIs)):
+                obs = create_observation(self.conf)
+                vis_data = vis_loop(
+                    obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"]
+                )
+
+                out = self.out_path / Path("vis_" + str(j + len(SIs) * i) + ".fits")
+                hdu_list = writer.create_hdu_list(vis_data, obs)
+                hdu_list.writeto(out, overwrite=True)
+
+    def get_images(self, bundles, i) -> torch.tensor:
+        """ """
+        SIs = torch.tensor(open_bundles(bundles[i], key=self.key))
+
+        if len(SIs.shape) == 3:
+            SIs = SIs.unsqueeze(1)
+
+        return SIs
+
+    def _create_observation(self) -> Observation:
+        rc = create_sampling_rc(self.conf)
+
+        dense = False
+        if rc["mode"] == "dense":
+            dense = True
+
+        obs = Observation(
+            src_ra=rc["fov_center_ra"],
+            src_dec=rc["fov_center_dec"],
+            start_time=rc["scan_start"],
+            scan_duration=rc["scan_duration"],
+            num_scans=rc["num_scans"],
+            scan_separation=rc["scan_separation"],
+            integration_time=rc["corr_int_time"],
+            ref_frequency=rc["ref_frequency"],
+            frequency_offsets=rc["frequency_offsets"],
+            bandwidths=rc["bandwidths"],
+            fov=rc["fov_size"],
+            image_size=rc["img_size"],
+            array_layout=rc["layout"],
+            corrupted=rc["corrupted"],
+            device=rc["device"],
+            dense=dense,
+            sensitivity_cut=rc["sensitivity_cut"],
+        )
+
+        return obs
+
+    def _create_sampling_rc(self):
+        """
+        Draw sampling options and test if at least half of the
+        telescopes can see the source. If not, then new
+        parameters are drawn.
+
+        Parameters
+        ----------
+        conf : dict
+            simulation options
+
+        Returns
+        -------
+        dict
+            contains the observation parameters
+        """
+
+        global rng
+
+        if self.conf["seed"] is not None:
+            rng = np.random.default_rng(self.conf["seed"])
+        else:
+            rng = np.random.default_rng()
+
+        samp_ops = draw_sampling_opts()
+        array_layout = layouts.get_array_layout(self.conf["layout"][0])
+        half_telescopes = array_layout.x.shape[0] // 2
+
+        while test_opts(samp_ops) <= half_telescopes:
+            samp_ops = draw_sampling_opts(self.conf)
+
+        return samp_ops
+
+    def draw_sampling_opts(self):
+        """
+        Draw observation options from given intervals.
+
+        Parameters
+        ----------
+        conf : dict
+            simulation options
+
+        Returns
+        -------
+        dict
+            contains randomly drawn observation options
+        """
+
+        if "rng" not in globals():
+            global rng
+            rng = np.random.default_rng(self.conf["seed"])
+
+        angles_ra = np.arange(
+            self.conf["fov_center_ra"][0][0],
+            self.conf["fov_center_ra"][0][1],
+            step=0.1,
+        )
+        angles_dec = np.arange(
+            self.conf["fov_center_dec"][0][0],
+            self.conf["fov_center_dec"][0][1],
+            step=0.1,
+        )
+
+        fov_center_ra = rng.choice(angles_ra)
+        fov_center_dec = rng.choice(angles_dec)
+
+        start_time_l = datetime.strptime(
+            self.conf["scan_start"][0], "%d-%m-%Y %H:%M:%S"
+        )
+        start_time_h = datetime.strptime(
+            self.conf["scan_start"][1], "%d-%m-%Y %H:%M:%S"
+        )
+        start_times = pd.date_range(
+            start_time_l,
+            start_time_h,
+            freq="1h",
+        ).strftime("%d-%m-%Y %H:%M:%S")
+
+        scan_start = rng.choice(
+            [datetime.strptime(time, "%d-%m-%Y %H:%M:%S") for time in start_times]
+        )
+        scan_duration = int(
+            rng.integers(self.conf["scan_duration"][0], self.conf["scan_duration"][1])
+        )
+
+        num_scans = int(
+            rng.integers(self.conf["num_scans"][0], self.conf["num_scans"][1])
+        )
+
+        # TODO: is this really necessary?
+        opts = np.array(
+            [
+                self.conf["mode"],
+                self.conf["layout"][0],
+                self.conf["img_size"][0],
+                fov_center_ra,
+                fov_center_dec,
+                self.conf["fov_size"],
+                self.conf["corr_int_time"],
+                scan_start,
+                scan_duration,
+                num_scans,
+                self.conf["scan_separation"],
+                self.conf["ref_frequency"],
+                self.conf["frequency_offsets"],
+                self.conf["bandwidths"],
+                self.conf["corrupted"],
+                self.conf["device"],
+                self.conf["sensitivty_cut"],
+            ],
+            dtype="object",
+        )
+
+        samp_ops = {
+            "mode": opts[0],
+            "layout": opts[1],
+            "img_size": opts[2],
+            "fov_center_ra": opts[3],
+            "fov_center_dec": opts[4],
+            "fov_size": opts[5],
+            "corr_int_time": opts[6],
+            "scan_start": opts[7],
+            "scan_duration": opts[8],
+            "num_scans": opts[9],
+            "scan_separation": opts[10],
+            "ref_frequency": opts[11],
+            "frequency_offsets": opts[12],
+            "bandwidths": opts[13],
+            "corrupted": opts[14],
+            "device": opts[15],
+            "sensitivity_cut": opts[16],
+        }
+        return samp_ops
+
+    def test_opts(self, rc):
+        """
+        Compute the number of telescopes that can observe the source given
+        certain randomly drawn parameters.
+
+        Parameters
+        ----------
+        rc : dict
+            randomly drawn observational parameters
+
+        Returns
+        -------
+
+        """
+        array_layout = layouts.get_array_layout(rc["layout"])
+
+        src_crd = SkyCoord(
+            ra=rc["fov_center_ra"], dec=rc["fov_center_dec"], unit=(un.deg, un.deg)
+        )
+
+        time = self._calc_time_steps(rc)
+
+        _, el_st_0 = self.calc_ref_elev(src_crd, time[0], array_layout)
+        _, el_st_1 = self.calc_ref_elev(src_crd, time[1], array_layout)
+
+        el_min = 15
+        el_max = 85
+
+        active_telescopes_0 = np.sum((el_st_0 >= el_min) & (el_st_0 <= el_max))
+        active_telescopes_1 = np.sum((el_st_1 >= el_min) & (el_st_1 <= el_max))
+
+        return min(active_telescopes_0, active_telescopes_1)
+
+    def calc_ref_elev(self, src_crd, time, array_layout):
+        if time.shape == ():
+            time = time[None]
+
+        # Calculate for all times
+        # calculate GHA, Greenwich as reference for EHT
+        ha_all = Angle(
+            [t.sidereal_time("apparent", "greenwich") - src_crd.ra for t in time]
+        )
+
+        # calculate elevations
+        el_st_all = src_crd.transform_to(
+            AltAz(
+                obstime=time.reshape(len(time), -1),
+                location=EarthLocation.from_geocentric(
+                    np.repeat([array_layout.x], len(time), axis=0),
+                    np.repeat([array_layout.y], len(time), axis=0),
+                    np.repeat([array_layout.z], len(time), axis=0),
+                    unit=un.m,
+                ),
+            )
+        ).alt.degree
+
+        assert len(ha_all.value) == len(el_st_all)
+
+        return ha_all, el_st_all
+
+    def _calc_time_steps(self, conf):
+        start_time = Time(conf["scan_start"].isoformat(), format="isot")
+        scan_separation = conf["scan_separation"]
+        num_scans = conf["num_scans"]
+        scan_duration = conf["scan_duration"]
+        int_time = conf["corr_int_time"]
+
+        time_lst = [
+            start_time
+            + scan_separation * i * un.second
+            + i * scan_duration * un.second
+            + j * int_time * un.second
+            for i in range(num_scans)
+            for j in range(int(scan_duration / int_time) + 1)
+        ]
+        # +1 because t_1 is the stop time of t_0
+        # in order to save computing power we take one time more to complete interval
+        time = Time(time_lst)
+
+        return time
+
+
 def simulate_data_set(config, slurm=False, job_id=None, n=None):
     """
     Wrapper function for simulating visibilities.
diff --git a/pyvisgen/simulation/observation.py b/pyvisgen/simulation/observation.py
index 8f4ad61..5f2658e 100644
--- a/pyvisgen/simulation/observation.py
+++ b/pyvisgen/simulation/observation.py
@@ -6,14 +6,13 @@
 import astropy.units as un
 import torch
 from astropy.constants import c
-from astropy.coordinates import AltAz, Angle, EarthLocation, SkyCoord, Longitude
+from astropy.coordinates import AltAz, Angle, EarthLocation, Longitude, SkyCoord
 from astropy.time import Time
 from tqdm.autonotebook import tqdm
 
 from pyvisgen.layouts import layouts
 from pyvisgen.simulation.array import Array
 
-
 DEFAULT_POL_KWARGS = {
     "delta": 0,
     "amp_ratio": 0.5,
@@ -277,20 +276,20 @@ def __init__(
             the simulation of polarisation. Default: `None`
         pol_kwargs : dict, optional
             Additional keyword arguments for the simulation
-            of polarisation. Default: `{
+            of polarisation. Default: ``{
                 "delta": 0,
                 "amp_ratio": 0.5,
                 "random_state": 42,
-            }`
+            }``
         field_kwargs : dict, optional
             Additional keyword arguments for the random polarisation
             field that is applied when simulating polarisation.
-            Default: `{
+            Default: ``{
                 "order": [1, 1],
                 "scale": [0, 1],
                 "threshold": None,
                 "random_state": 42
-            }`
+            }``
         show_progress : bool, optional
             If `True`, show a progress bar during the iteration over the
             scans. Default: False
@@ -464,8 +463,9 @@ def calc_baselines(self):
             self.baselines.add_baseline(bas)
 
     def get_baselines(self, times):
-        """Calculates baselines from source coordinates and time of observation for
-        every antenna station in array_layout.
+        """Calculates baselines from source coordinates
+        and time of observation for every antenna station
+        in array_layout.
 
         Parameters
         ----------
@@ -583,7 +583,8 @@ def calc_ref_elev(self, time=None):
         )
 
     def calc_feed_rotation(self, ha: Angle) -> Angle:
-        r"""Calculates feed rotation for every antenna at every time step.
+        r"""Calculates feed rotation for every antenna at
+        every time step.
 
         Notes
         -----
@@ -592,10 +593,12 @@ def calc_feed_rotation(self, ha: Angle) -> Angle:
 
         .. math::
 
-            q = \atan\left(\frac{\sin h}{\cos\delta \tan\varphi - \sin\delta \cos h\right),
+            q = \atan\left(\frac{\sin h}{\cos\delta \tan\varphi
+            - \sin\delta \cos h\right),
 
-        where $h$ is the local hour angle, $\varphi$ the geographical latitude
-        of the observer, and $\delta$ the declination of the source.
+        where $h$ is the local hour angle, $\varphi$ the geographical
+        latitude of the observer, and $\delta$ the declination of
+        the source.
         """
         # We need to create a tensor from the EarthLocation object
         # and save only the geographical latitude of each antenna
@@ -612,30 +615,6 @@ def calc_feed_rotation(self, ha: Angle) -> Angle:
 
         return q
 
-    def calc_direction_cosines(self, ha, el_st, delta_x, delta_y, delta_z):
-        src_dec = torch.deg2rad(self.dec)
-        ha = torch.deg2rad(ha)
-
-        u = (torch.sin(ha) * delta_x + torch.cos(ha) * delta_y).reshape(-1)
-        v = (
-            -torch.sin(src_dec) * torch.cos(ha) * delta_x
-            + torch.sin(src_dec) * torch.sin(ha) * delta_y
-            + torch.cos(src_dec) * delta_z
-        ).reshape(-1)
-        w = (
-            torch.cos(src_dec) * torch.cos(ha) * delta_x
-            - torch.cos(src_dec) * torch.sin(ha) * delta_y
-            + torch.sin(src_dec) * delta_z
-        ).reshape(-1)
-
-        if not (u.shape == v.shape == w.shape):
-            raise ValueError(
-                "Expected u, v, and w to have the same shapes: "
-                f"{u.shape}, {v.shape}, {w.shape}"
-            )
-
-        return u, v, w
-
     def create_rd_grid(self):
         """Calculates RA and Dec values for a given fov around a source position
 
diff --git a/pyvisgen/utils/data.py b/pyvisgen/utils/data.py
index adfc441..f7958b6 100644
--- a/pyvisgen/utils/data.py
+++ b/pyvisgen/utils/data.py
@@ -18,7 +18,7 @@ def get_bundles(path):
     return bundles
 
 
-def open_bundles(path):
+def open_bundles(path, key="y"):
     f = h5py.File(path, "r")
-    bundle_y = np.array(f["y"])
+    bundle_y = np.array(f[key])
     return bundle_y

From 3579cfb841fd2d2825af0d5864590112b2aac44f Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Tue, 28 Jan 2025 13:41:10 +0100
Subject: [PATCH 02/34] Remove unused packages from dependencies

---
 pyproject.toml | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5afa926..5b6e05a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,6 @@ classifiers = [
 requires-python = ">=3.10"
 
 dependencies = [
-  "astroplan",
   "astropy<=6.1.0",
   "click",
   "h5py",
@@ -36,17 +35,12 @@ dependencies = [
   "jupyter",
   "matplotlib",
   "natsort",
-  "numexpr",
   "numpy",
   "pandas",
-  "pre-commit",
-  "pytest",
-  "pytest-cov",
   "scipy",
   "toma",
   "toml",
   "torch",
-  "torch",
   "tqdm",
 ]
 
@@ -59,6 +53,10 @@ tests = [
   "tomli",
 ]
 
+dev = [
+  "pre-commit",
+]
+
 docs = [
   "graphviz",
   "ipython",

From fe7c0d0219670883462f33b735a9e1110813a23f Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:48:35 +0100
Subject: [PATCH 03/34] Rename simulation script entry point

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5b6e05a..534f4d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,7 +77,7 @@ docs = [
 ]
 
 [project.scripts]
-pyvisgen_create_dataset = "pyvisgen.simulation.scripts.create_dataset:main"
+pyvisgen-simulate = "pyvisgen.simulation.scripts.create_dataset:main"
 
 [tool.setuptools_scm]
 write_to = "pyvisgen/_version.py"

From e856947f893cf72560b12fb630221fa77e3bee86 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:51:24 +0100
Subject: [PATCH 04/34] Refactor gridding module

- Add new grid_vis_loop_data method that allows to easily grid
  data coming from the vis_loop
- Move some methods to new gridding.utils submodule
- Update gridding script
---
 pyvisgen/gridding/__init__.py          |   7 +-
 pyvisgen/gridding/gridder.py           | 251 ++++++++-----------------
 pyvisgen/gridding/scripts/grid_data.py |  13 ++
 pyvisgen/gridding/utils.py             | 154 +++++++++++++++
 4 files changed, 247 insertions(+), 178 deletions(-)
 create mode 100644 pyvisgen/gridding/scripts/grid_data.py
 create mode 100644 pyvisgen/gridding/utils.py

diff --git a/pyvisgen/gridding/__init__.py b/pyvisgen/gridding/__init__.py
index 9bcde62..c53f9ce 100644
--- a/pyvisgen/gridding/__init__.py
+++ b/pyvisgen/gridding/__init__.py
@@ -1,13 +1,11 @@
 from .gridder import (
-    calc_truth_fft,
     convert_amp_phase,
     convert_real_imag,
-    create_gridded_data_set,
     ducc0_gridding,
     grid_data,
-    open_data,
-    save_fft_pair,
+    grid_vis_loop_data,
 )
+from .utils import calc_truth_fft, create_gridded_data_set, open_data, save_fft_pair
 
 __all__ = [
     "calc_truth_fft",
@@ -16,6 +14,7 @@
     "create_gridded_data_set",
     "ducc0_gridding",
     "grid_data",
+    "grid_vis_loop_data",
     "open_data",
     "save_fft_pair",
 ]
diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py
index eb6f372..d7d2ef9 100644
--- a/pyvisgen/gridding/gridder.py
+++ b/pyvisgen/gridding/gridder.py
@@ -1,154 +1,8 @@
-import os
-from pathlib import Path
-
 import astropy.constants as const
-import h5py
 import numpy as np
-from tqdm import tqdm
+from numpy import AxisError
 
-from pyvisgen.fits.data import fits_data
 from pyvisgen.gridding.alt_gridder import ms2dirty_python_fast
-from pyvisgen.utils.config import read_data_set_conf
-from pyvisgen.utils.data import load_bundles, open_bundles
-
-os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
-
-
-def create_gridded_data_set(config):
-    conf = read_data_set_conf(config)
-    out_path_fits = Path(conf["out_path_fits"])
-    out_path = Path(conf["out_path_gridded"])
-    out_path.mkdir(parents=True, exist_ok=True)
-
-    sky_dist = load_bundles(conf["in_path"])
-    fits_files = fits_data(out_path_fits)
-    size = len(fits_files)
-    print(size)
-
-    ###################
-    # test
-    if conf["num_test_images"] > 0:
-        bundle_test = int(conf["num_test_images"] // conf["bundle_size"])
-        size -= conf["num_test_images"]
-
-        for i in tqdm(range(bundle_test)):
-            (
-                uv_data_test,
-                freq_data_test,
-                gridded_data_test,
-                sky_dist_test,
-            ) = open_data(fits_files, sky_dist, conf, i)
-
-            truth_fft_test = calc_truth_fft(sky_dist_test)
-
-            if conf["amp_phase"]:
-                gridded_data_test = convert_amp_phase(gridded_data_test, sky_sim=False)
-                truth_amp_phase_test = convert_amp_phase(truth_fft_test, sky_sim=True)
-            else:
-                gridded_data_test = convert_real_imag(gridded_data_test, sky_sim=False)
-                truth_amp_phase_test = convert_real_imag(truth_fft_test, sky_sim=True)
-            assert gridded_data_test.shape[1] == 2
-
-            out = out_path / Path("samp_test" + str(i) + ".h5")
-
-            save_fft_pair(out, gridded_data_test, truth_amp_phase_test)
-    #
-    ###################
-
-    size_train = int(size // (1 + conf["train_valid_split"]))
-    size_valid = size - size_train
-    print(f"Training size: {size_train}, Validation size: {size_valid}")
-    bundle_train = int(size_train // conf["bundle_size"])
-    bundle_valid = int(size_valid // conf["bundle_size"])
-
-    ###################
-    # train
-    for i in tqdm(range(bundle_train)):
-        i += bundle_test
-        uv_data_train, freq_data_train, gridded_data_train, sky_dist_train = open_data(
-            fits_files, sky_dist, conf, i
-        )
-
-        truth_fft_train = calc_truth_fft(sky_dist_train)
-
-        if conf["amp_phase"]:
-            gridded_data_train = convert_amp_phase(gridded_data_train, sky_sim=False)
-            truth_amp_phase_train = convert_amp_phase(truth_fft_train, sky_sim=True)
-        else:
-            gridded_data_train = convert_real_imag(gridded_data_train, sky_sim=False)
-            truth_amp_phase_train = convert_real_imag(truth_fft_train, sky_sim=True)
-
-        out = out_path / Path("samp_train" + str(i - bundle_test) + ".h5")
-
-        save_fft_pair(out, gridded_data_train, truth_amp_phase_train)
-        train_index_last = i
-    #
-    ###################
-
-    ###################
-    # valid
-    for i in tqdm(range(bundle_valid)):
-        i += train_index_last
-        uv_data_valid, freq_data_valid, gridded_data_valid, sky_dist_valid = open_data(
-            fits_files, sky_dist, conf, i
-        )
-
-        truth_fft_valid = calc_truth_fft(sky_dist_valid)
-
-        if conf["amp_phase"]:
-            gridded_data_valid = convert_amp_phase(gridded_data_valid, sky_sim=False)
-            truth_amp_phase_valid = convert_amp_phase(truth_fft_valid, sky_sim=True)
-        else:
-            gridded_data_valid = convert_real_imag(gridded_data_valid, sky_sim=False)
-            truth_amp_phase_valid = convert_real_imag(truth_fft_valid, sky_sim=True)
-
-        out = out_path / Path("samp_valid" + str(i - train_index_last) + ".h5")
-
-        save_fft_pair(out, gridded_data_valid, truth_amp_phase_valid)
-    #
-    ###################
-
-
-def open_data(fits_files, sky_dist, conf, i):
-    sky_sim_bundle_size = len(open_bundles(sky_dist[0]))
-    uv_data = [
-        fits_files.get_uv_data(n).copy()
-        for n in np.arange(
-            i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size
-        )
-    ]
-    freq_data = np.array(
-        [
-            fits_files.get_freq_data(n)
-            for n in np.arange(
-                i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size
-            )
-        ],
-        dtype="object",
-    )
-    gridded_data = np.array(
-        [grid_data(data, freq, conf).copy() for data, freq in zip(uv_data, freq_data)]
-    )
-    bundle = np.floor_divide(i * sky_sim_bundle_size, sky_sim_bundle_size)
-    gridded_truth = np.array(
-        [
-            open_bundles(sky_dist[bundle])[n]
-            for n in np.arange(
-                i * sky_sim_bundle_size - bundle * sky_sim_bundle_size,
-                (i * sky_sim_bundle_size)
-                + sky_sim_bundle_size
-                - bundle * sky_sim_bundle_size,
-            )
-        ]
-    )
-    return uv_data, freq_data, gridded_data, gridded_truth
-
-
-def calc_truth_fft(sky_dist):
-    truth_fft = np.fft.fftshift(
-        np.fft.fft2(np.fft.fftshift(sky_dist, axes=(1, 2)), axes=(1, 2)), axes=(1, 2)
-    )
-    return truth_fft
 
 
 def ducc0_gridding(uv_data, freq_data):
@@ -227,10 +81,10 @@ def grid_data(uv_data, freq_data, conf):
 
     samps = np.array(
         [
-            np.append(u, -u),
-            np.append(v, -v),
+            np.append(-u, u),
+            np.append(-v, v),
             np.append(real, real),
-            np.append(imag, -imag),
+            np.append(-imag, imag),
         ]
     )
     # Generate Mask
@@ -241,12 +95,79 @@ def grid_data(uv_data, freq_data, conf):
 
     # bins are shifted by delta/2 so that maximum in uv space matches maximum
     # in numpy fft
-    bins = (
-        np.arange(start=-(N / 2) * delta, stop=(N / 2 + 1) * delta, step=delta)
-        - delta / 2
+    bins = np.arange(start=-((N + 1) / 2) * delta, stop=(N / 2) * delta, step=delta)
+
+    mask, *_ = np.histogram2d(samps[0], samps[1], bins=[bins, bins], density=False)
+    mask[mask == 0] = 1
+
+    mask_real, x_edges, y_edges = np.histogram2d(
+        samps[0], samps[1], bins=[bins, bins], weights=samps[2], density=False
+    )
+    mask_imag, x_edges, y_edges = np.histogram2d(
+        samps[0], samps[1], bins=[bins, bins], weights=samps[3], density=False
     )
-    # if len(bins) - 1 > N:
-    #   bins = np.delete(bins, -1)
+
+    mask_real /= mask
+    mask_imag /= mask
+
+    assert mask_real.shape == (conf["grid_size"], conf["grid_size"])
+    gridded_vis = np.zeros((2, N, N))
+    gridded_vis[0] = mask_real
+    gridded_vis[1] = mask_imag
+
+    return gridded_vis
+
+
+def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0):
+    """Grid data coming from the vis_loop."""
+    if vis_data.ndim != 7:
+        if vis_data.ndim == 3:
+            vis_data = np.stack(
+                [vis_data.real, vis_data.imag, np.ones(vis_data.shape)],
+                axis=3,
+            )[:, None, None, :, None, ...]
+    else:
+        raise ValueError("Expected vis_data to be of dimension 3 or 7")
+
+    if isinstance(freq_bands, float):
+        freq_bands = [freq_bands]
+
+    u = np.array([uu * np.array(freq) for freq in freq_bands]).ravel()
+    v = np.array([vv * np.array(freq) for freq in freq_bands]).ravel()
+
+    try:
+        stokes_vis = (
+            np.squeeze(
+                (vis_data[..., stokes_comp, 0] + 1j * vis_data[..., stokes_comp, 1])
+            )
+            .swapaxes(0, 1)
+            .ravel()
+        )
+    except AxisError:
+        stokes_vis = np.squeeze(
+            (vis_data[..., stokes_comp, 0] + 1j * vis_data[..., stokes_comp, 1])
+        ).ravel()
+
+    real = stokes_vis.real
+    imag = stokes_vis.imag
+
+    samps = np.array(
+        [
+            np.concatenate([-u, u]),
+            np.concatenate([-v, v]),
+            np.concatenate([real, real]),
+            np.concatenate([-imag, imag]),
+        ]
+    )
+    # Generate Mask
+    N = conf["grid_size"]  # image size
+    fov = conf["grid_fov"] * np.pi / (3600 * 180)
+
+    delta = 1 / fov
+
+    # bins are shifted by delta/2 so that maximum in uv space matches maximum
+    # in numpy fft
+    bins = np.arange(start=-((N + 1) / 2) * delta, stop=(N / 2) * delta, step=delta)
 
     mask, *_ = np.histogram2d(samps[0], samps[1], bins=[bins, bins], density=False)
     mask[mask == 0] = 1
@@ -265,6 +186,7 @@ def grid_data(uv_data, freq_data, conf):
     gridded_vis = np.zeros((2, N, N))
     gridded_vis[0] = mask_real
     gridded_vis[1] = mask_imag
+
     return gridded_vis
 
 
@@ -293,22 +215,3 @@ def convert_real_imag(data, sky_sim=False):
 
         data = np.stack((real, imag), axis=1)
     return data
-
-
-def save_fft_pair(path, x, y, name_x="x", name_y="y"):
-    """
-    write fft_pairs created in second analysis step to h5 file
-    """
-    half_image = x.shape[2] // 2
-    x = x[:, :, : half_image + 1, :]
-    y = y[:, :, : half_image + 1, :]
-    with h5py.File(path, "w") as hf:
-        hf.create_dataset(name_x, data=x)
-        hf.create_dataset(name_y, data=y)
-        hf.close()
-
-
-if __name__ == "__main__":
-    create_gridded_data_set(
-        "/net/big-tank/POOL/projects/radio/test_rime/create_dataset.toml"
-    )
diff --git a/pyvisgen/gridding/scripts/grid_data.py b/pyvisgen/gridding/scripts/grid_data.py
new file mode 100644
index 0000000..8e46e7d
--- /dev/null
+++ b/pyvisgen/gridding/scripts/grid_data.py
@@ -0,0 +1,13 @@
+import click
+
+from pyvisgen.gridding.utils import create_gridded_data_set
+
+
+@click.command()
+@click.option("-c" "--config", required=True, type=str, default=None)
+def grid_data(config):
+    create_gridded_data_set(config)
+
+
+if __name__ == "__main__":
+    grid_data()
diff --git a/pyvisgen/gridding/utils.py b/pyvisgen/gridding/utils.py
new file mode 100644
index 0000000..6aeebdd
--- /dev/null
+++ b/pyvisgen/gridding/utils.py
@@ -0,0 +1,154 @@
+import os
+from pathlib import Path
+
+import h5py
+import numpy as np
+from tqdm.autonotebook import tqdm
+
+from pyvisgen.fits.data import fits_data
+from pyvisgen.gridding import convert_amp_phase, convert_real_imag, grid_data
+from pyvisgen.utils.config import read_data_set_conf
+from pyvisgen.utils.data import load_bundles, open_bundles
+
+os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
+
+
+def create_gridded_data_set(config):
+    conf = read_data_set_conf(config)
+    out_path_fits = Path(conf["out_path_fits"])
+    out_path = Path(conf["out_path_gridded"])
+    out_path.mkdir(parents=True, exist_ok=True)
+
+    sky_dist = load_bundles(conf["in_path"])
+    fits_files = fits_data(out_path_fits)
+    size = len(fits_files)
+    print(size)
+
+    # test
+    if conf["num_test_images"] > 0:
+        bundle_test = int(conf["num_test_images"] // conf["bundle_size"])
+        size -= conf["num_test_images"]
+
+        for i in tqdm(range(bundle_test)):
+            (
+                uv_data_test,
+                freq_data_test,
+                gridded_data_test,
+                sky_dist_test,
+            ) = open_data(fits_files, sky_dist, conf, i)
+
+            truth_fft_test = calc_truth_fft(sky_dist_test)
+
+            if conf["amp_phase"]:
+                gridded_data_test = convert_amp_phase(gridded_data_test, sky_sim=False)
+                truth_amp_phase_test = convert_amp_phase(truth_fft_test, sky_sim=True)
+            else:
+                gridded_data_test = convert_real_imag(gridded_data_test, sky_sim=False)
+                truth_amp_phase_test = convert_real_imag(truth_fft_test, sky_sim=True)
+            assert gridded_data_test.shape[1] == 2
+
+            out = out_path / Path("samp_test" + str(i) + ".h5")
+
+            save_fft_pair(out, gridded_data_test, truth_amp_phase_test)
+
+    size_train = int(size // (1 + conf["train_valid_split"]))
+    size_valid = size - size_train
+    print(f"Training size: {size_train}, Validation size: {size_valid}")
+    bundle_train = int(size_train // conf["bundle_size"])
+    bundle_valid = int(size_valid // conf["bundle_size"])
+
+    # train
+    for i in tqdm(range(bundle_train)):
+        i += bundle_test
+        uv_data_train, freq_data_train, gridded_data_train, sky_dist_train = open_data(
+            fits_files, sky_dist, conf, i
+        )
+
+        truth_fft_train = calc_truth_fft(sky_dist_train)
+
+        if conf["amp_phase"]:
+            gridded_data_train = convert_amp_phase(gridded_data_train, sky_sim=False)
+            truth_amp_phase_train = convert_amp_phase(truth_fft_train, sky_sim=True)
+        else:
+            gridded_data_train = convert_real_imag(gridded_data_train, sky_sim=False)
+            truth_amp_phase_train = convert_real_imag(truth_fft_train, sky_sim=True)
+
+        out = out_path / Path("samp_train" + str(i - bundle_test) + ".h5")
+
+        save_fft_pair(out, gridded_data_train, truth_amp_phase_train)
+        train_index_last = i
+
+    # valid
+    for i in tqdm(range(bundle_valid)):
+        i += train_index_last
+        uv_data_valid, freq_data_valid, gridded_data_valid, sky_dist_valid = open_data(
+            fits_files, sky_dist, conf, i
+        )
+
+        truth_fft_valid = calc_truth_fft(sky_dist_valid)
+
+        if conf["amp_phase"]:
+            gridded_data_valid = convert_amp_phase(gridded_data_valid, sky_sim=False)
+            truth_amp_phase_valid = convert_amp_phase(truth_fft_valid, sky_sim=True)
+        else:
+            gridded_data_valid = convert_real_imag(gridded_data_valid, sky_sim=False)
+            truth_amp_phase_valid = convert_real_imag(truth_fft_valid, sky_sim=True)
+
+        out = out_path / Path("samp_valid" + str(i - train_index_last) + ".h5")
+
+        save_fft_pair(out, gridded_data_valid, truth_amp_phase_valid)
+
+
+def open_data(fits_files, sky_dist, conf, i):
+    sky_sim_bundle_size = len(open_bundles(sky_dist[0]))
+    uv_data = [
+        fits_files.get_uv_data(n).copy()
+        for n in np.arange(
+            i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size
+        )
+    ]
+    freq_data = np.array(
+        [
+            fits_files.get_freq_data(n)
+            for n in np.arange(
+                i * sky_sim_bundle_size, (i * sky_sim_bundle_size) + sky_sim_bundle_size
+            )
+        ],
+        dtype="object",
+    )
+    gridded_data = np.array(
+        [grid_data(data, freq, conf).copy() for data, freq in zip(uv_data, freq_data)]
+    )
+    bundle = np.floor_divide(i * sky_sim_bundle_size, sky_sim_bundle_size)
+    gridded_truth = np.array(
+        [
+            open_bundles(sky_dist[bundle])[n]
+            for n in np.arange(
+                i * sky_sim_bundle_size - bundle * sky_sim_bundle_size,
+                (i * sky_sim_bundle_size)
+                + sky_sim_bundle_size
+                - bundle * sky_sim_bundle_size,
+            )
+        ]
+    )
+    return uv_data, freq_data, gridded_data, gridded_truth
+
+
+def save_fft_pair(path, x, y, name_x="x", name_y="y"):
+    """
+    write fft_pairs created in second analysis step to h5 file
+    """
+    half_image = x.shape[2] // 2
+    x = x[:, :, : half_image + 1, :]
+    y = y[:, :, : half_image + 1, :]
+    with h5py.File(path, "w") as hf:
+        hf.create_dataset(name_x, data=x)
+        hf.create_dataset(name_y, data=y)
+        hf.close()
+
+
+def calc_truth_fft(sky_dist):
+    truth_fft = np.fft.fftshift(
+        np.fft.fft2(np.fft.fftshift(sky_dist, axes=(1, 2)), axes=(1, 2)), axes=(1, 2)
+    )
+    return truth_fft

From eb1b4042e115931ce9d1863e5fa865d7bf9e4b40 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:56:24 +0100
Subject: [PATCH 05/34] Catch case where times would be a 0dim array in
 Observation.get_baselines()

---
 pyvisgen/simulation/observation.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pyvisgen/simulation/observation.py b/pyvisgen/simulation/observation.py
index 5f2658e..7d2a851 100644
--- a/pyvisgen/simulation/observation.py
+++ b/pyvisgen/simulation/observation.py
@@ -388,8 +388,9 @@ def calc_time_steps(self):
             for i in range(self.num_scans)
             for j in range(int(self.scan_duration / self.int_time) + 1)
         ]
-        # +1 because t_1 is the stop time of t_0
-        # in order to save computing power we take one time more to complete interval
+        # +1 because t_1 is the stop time of t_0.
+        # In order to save computing power we take
+        # one time more to complete interval
         time = Time(time_lst)
 
         return time, time.mjd * (60 * 60 * 24)
@@ -477,6 +478,10 @@ def get_baselines(self, times):
         dataclass object
             baselines between telescopes with visibility flags
         """
+        # catch rare case where dimension of times is 0
+        if times.ndim == 0:
+            times = Time([times])
+
         # calculate GHA, local HA, and station elevation for all times.
         GHA, ha_local, el_st_all = self.calc_ref_elev(time=times)
 

From 015bdde97d36acdae8d618e45887f0fae595f236 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:57:11 +0100
Subject: [PATCH 06/34] Completely rewrite data set creation routine in
 pyvisgen.simulation.data_set

---
 pyvisgen/simulation/data_set.py | 902 +++++++++++---------------------
 1 file changed, 298 insertions(+), 604 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index f0d464a..b6afb71 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -1,21 +1,28 @@
-from datetime import datetime
+from datetime import datetime, timedelta
 from pathlib import Path
 
 import numpy as np
-import pandas as pd
 import torch
 from astropy import units as un
-from astropy.coordinates import AltAz, Angle, EarthLocation, SkyCoord
+from astropy.coordinates import AltAz, EarthLocation, SkyCoord
 from astropy.time import Time
 from tqdm.autonotebook import tqdm
 
-import pyvisgen.fits.writer as writer
 import pyvisgen.layouts.layouts as layouts
+from pyvisgen.gridding import (
+    calc_truth_fft,
+    convert_amp_phase,
+    convert_real_imag,
+    grid_vis_loop_data,
+    save_fft_pair,
+)
 from pyvisgen.simulation.observation import Observation
 from pyvisgen.simulation.visibility import vis_loop
 from pyvisgen.utils.config import read_data_set_conf
 from pyvisgen.utils.data import load_bundles, open_bundles
 
+DATEFMT = "%d-%m-%Y %H:%M:%S"
+
 
 class SimulateDataSet:
     def __init__(self):
@@ -25,398 +32,364 @@ def __init__(self):
     def from_config(
         cls,
         config: str | Path,
-        slurm=False,
-        job_id=None,
-        n=None,
-        image_key="y",
+        /,
+        image_key: str = "y",
+        *,
+        grid: bool = True,
+        slurm: bool = False,
+        job_id: int | None = None,
+        n: int | None = None,
+        date_fmt: str = DATEFMT,
+        num_images: int | None = None,
     ) -> None:
-        """
+        """Simulates data from parameters in a config file.
+
         Parameters
         ----------
         config : str or Path
-            path to config file
-        slurm : bool
-            True, if slurm is used
-        job_id : int
-            job_id, given by slurm
-        n : int
-            running index
+            Path to the config file.
+        image_key : str, optional
+            Key under which the true sky distributions are saved
+            in the HDF5 file. Default: ``'y'``
+        grid : bool, optional
+            If ``True``, apply gridding to visibility data and
+            save to HDF5 files. Default: ``True``
+        slurm : bool, optional
+            ``True``, if slurm is used, Default: ``False``
+        job_id : int or None, optional
+            ``job_id`` given by slurm. Default: ``None``
+        n : int or None, optional
+            Running index. Default: ``None``
+        date_fmt : str, optional
+            Format string for datetime objects.
+            Default: ``'%d-%m-%Y %H:%M:%S'``
+        num_images : int or None, optional
+            Number of combined total images in the bundles.
+            If not ``None``, will skip counting the images before
+            drawing the random parameters. Default: ``None``
         """
         cls = cls()
+        cls.key = image_key
+        cls.grid = grid
+        cls.slurm = slurm
         cls.job_id = job_id
         cls.n = n
-        cls.key = image_key
+        cls.date_fmt = date_fmt
 
         cls.conf = read_data_set_conf(config)
 
-        cls.out_path = Path(cls.conf["out_path_fits"])
-        cls.out_path.mkdir(parents=True, exist_ok=True)
-
-        cls.data = load_bundles(cls.conf["in_path"])
-
-        if slurm:
-            cls._run_slurm()
+        if grid:
+            cls.out_path = Path(cls.conf["out_path_gridded"])
         else:
-            cls._run()
+            cls.out_path = Path(cls.conf["out_path_fits"])
 
-        return cls
+        if not cls.out_path.is_dir():
+            cls.out_path.mkdir(parents=True)
 
-    @classmethod
-    def from_params(
-        cls,
-        images,
-        ra,
-        dec,
-        layout,
-        img_size,
-        fov,
-        int_time,
-        scan_start,
-        scan_duration,
-        num_scans,
-        scan_sep,
-        ref_freq,
-        freq_offsets,
-        bandwidths,
-        *,
-        corrupted=False,
-        noisy=0,
-        sensitivty_cut=1e-6,
-        mode="full",
-        grid_size=128,
-        grid_fov=100,
-        amp_phase=False,
-        num_test_images=500,
-        bundle_size=100,
-        train_valid_split=0.2,
-        seed=42,
-        device="cuda",
-        input_path="skies/",
-        out_path_fits="build/uvfits",
-        out_path_gridded="build/gridded",
-        slurm=False,
-        job_id=None,
-        n=None,
-    ) -> None:
-        """ """
-        cls.job_id = job_id
-        cls.n = n
+        cls.data = load_bundles(cls.conf["in_path"])
 
-        cls.conf = {
-            "mode": mode,
-            "device": device,
-            "seed": seed,
-            "layout": (layout,),
-            "img_size": (img_size,),
-            "fov_center_ra": ([ra, ra],),
-            "fov_center_dec": ([dec, dec],),
-            "fov_size": fov,
-            "corr_int_time": int_time,
-            "scan_start": scan_start,
-            "scan_duration": scan_duration,
-            "num_scans": num_scans,
-            "scan_separation": scan_sep,
-            "ref_frequency": ref_freq,
-            "frequency_offsets": freq_offsets,
-            "bandwidths": bandwidths,
-            "corrupted": corrupted,
-            "noisy": noisy,
-            "sensitivty_cut": sensitivty_cut,
-            "num_test_images": num_test_images,
-            "bundle_size": bundle_size,
-            "train_valid_split": train_valid_split,
-            "grid_size": grid_size,
-            "grid_fov": grid_fov,
-            "amp_phase": amp_phase,
-            "in_path": input_path,
-            "out_path_fits": out_path_fits,
-            "out_path_gridded": out_path_gridded,
-        }
-
-        cls.out_path = Path(cls.conf["out_path_fits"])
-
-        if not cls.out_path.isdir():
-            cls.out_path.mkdir(parents=True, exist_ok=True)
+        if not num_images:
+            len_data = tqdm(
+                range(len(cls.data)),
+                position=0,
+                leave=False,
+                desc="Counting Number of Images",
+                colour="#754fc9",
+            )
+            # get number of random parameter draws from number of images in data
+            num_draws = np.sum([len(cls.get_images(i)) for i in len_data])
 
-        cls.data = load_bundles(cls.conf["in_path"])
+        # draw parameters beforehand, i.e. outside the simulation loop
+        cls.create_sampling_rc(num_draws)
 
         if slurm:
             cls._run_slurm()
         else:
             cls._run()
 
-    def _run_slurm(self):
-        """ """
-        job_id = int(self.job_id + self.n * 500)
-        out = self.out_path / Path("vis_" + str(job_id) + ".fits")
+        return cls
 
-        imgs_bundle = len(open_bundles(self.data[0]))
-        bundle = torch.div(job_id, imgs_bundle, rounding_mode="floor")
-        image = job_id - bundle * imgs_bundle
+    def _run(self):
+        data = tqdm(
+            range(len(self.data)),
+            position=0,
+            desc="Processing Bundles",
+            colour="#52ba66",
+        )
 
-        SI = torch.tensor(open_bundles(self.data[bundle])[image])
+        samp_ops_idx = 0
+        for i in data:
+            SIs = self.get_images(i)
+            truth_fft = calc_truth_fft(SIs)
+
+            SIs = tqdm(
+                SIs,
+                position=1,
+                desc=f"Bundle {i}",
+                colour="#595cbd",
+                leave=False,
+            )
 
-        if len(SI.shape) == 2:
-            SI = SI.unsqueeze(0)
+            sim_data = []
+            for SI in SIs:
+                obs = self.create_observation(samp_ops_idx)
+                vis = vis_loop(
+                    obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"]
+                )
 
-        obs = self.create_observation(self.conf)
-        vis_data = vis_loop(obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"])
-        hdu_list = writer.create_hdu_list(vis_data, obs)
-        hdu_list.writeto(out, overwrite=True)
+                if self.grid:
+                    sim_data.append(
+                        grid_vis_loop_data(
+                            vis.u, vis.v, vis.get_values(), self.freq_bands, self.conf
+                        )
+                    )
+                samp_ops_idx += 1
+
+            sim_data = np.array(sim_data)
+
+            if self.grid:
+                if self.conf["amp_phase"]:
+                    sim_data = convert_amp_phase(sim_data, sky_sim=False)
+                    truth_fft = convert_amp_phase(truth_fft, sky_sim=True)
+                else:
+                    sim_data = convert_real_imag(sim_data, sky_sim=False)
+                    truth_fft = convert_real_imag(truth_fft, sky_sim=True)
+
+                out = self.out_path / Path(
+                    f"samp_{self.conf['file_prefix']}_" + str(i) + ".h5"
+                )
 
-    def _run(self):
-        """ """
-        for i in tqdm(range(len(self.data))):
-            SIs = self.get_images(self.data, i)
+                save_fft_pair(out, sim_data, truth_fft)
 
-            for j, SI in enumerate(tqdm(SIs)):
-                obs = create_observation(self.conf)
-                vis_data = vis_loop(
-                    obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"]
-                )
+                path_msg = self.conf["out_path_gridded"]
+            else:
+                path_msg = self.conf["out_path_fits"]
+
+        print(
+            f"Successfully simulated and saved {samp_ops_idx} images "
+            f"to '{path_msg}'!"
+        )
 
-                out = self.out_path / Path("vis_" + str(j + len(SIs) * i) + ".fits")
-                hdu_list = writer.create_hdu_list(vis_data, obs)
-                hdu_list.writeto(out, overwrite=True)
+    def _run_slurm():
+        raise NotImplementedError("Not implememented yet!")
 
-    def get_images(self, bundles, i) -> torch.tensor:
-        """ """
-        SIs = torch.tensor(open_bundles(bundles[i], key=self.key))
+    def get_images(self, i):
+        SIs = torch.tensor(open_bundles(self.data[i], key=self.key))
 
         if len(SIs.shape) == 3:
             SIs = SIs.unsqueeze(1)
 
         return SIs
 
-    def _create_observation(self) -> Observation:
-        rc = create_sampling_rc(self.conf)
+    def create_observation(self, i):
+        rc = self.samp_ops
+
+        # put the respective values inside the
+        # pol_kwargs and field_kwargs dicts.
+        pol_kwargs = dict(
+            delta=rc["delta"][i],
+            amp_ratio=rc["amp_ratio"][i],
+            random_state=self.conf["seed"],
+        )
+        field_kwargs = dict(
+            order=rc["order"][i],
+            scale=rc["scale"][i],
+            threshold=rc["threshold"],
+            random_state=self.conf["seed"],
+        )
 
         dense = False
-        if rc["mode"] == "dense":
+        if self.conf["mode"] == "dense":
             dense = True
 
         obs = Observation(
-            src_ra=rc["fov_center_ra"],
-            src_dec=rc["fov_center_dec"],
-            start_time=rc["scan_start"],
-            scan_duration=rc["scan_duration"],
-            num_scans=rc["num_scans"],
-            scan_separation=rc["scan_separation"],
-            integration_time=rc["corr_int_time"],
-            ref_frequency=rc["ref_frequency"],
-            frequency_offsets=rc["frequency_offsets"],
-            bandwidths=rc["bandwidths"],
-            fov=rc["fov_size"],
-            image_size=rc["img_size"],
-            array_layout=rc["layout"],
-            corrupted=rc["corrupted"],
-            device=rc["device"],
+            **self.samp_ops_const,
+            src_ra=rc["src_ra"][i],
+            src_dec=rc["src_dec"][i],
+            start_time=rc["start_time"][i],
+            scan_duration=int(rc["scan_duration"][i]),
+            num_scans=int(rc["num_scans"][i]),
+            pol_kwargs=pol_kwargs,
+            field_kwargs=field_kwargs,
             dense=dense,
-            sensitivity_cut=rc["sensitivity_cut"],
         )
 
         return obs
 
-    def _create_sampling_rc(self):
-        """
-        Draw sampling options and test if at least half of the
-        telescopes can see the source. If not, then new
-        parameters are drawn.
-
-        Parameters
-        ----------
-        conf : dict
-            simulation options
-
-        Returns
-        -------
-        dict
-            contains the observation parameters
-        """
-
-        global rng
-
-        if self.conf["seed"] is not None:
-            rng = np.random.default_rng(self.conf["seed"])
+    def create_sampling_rc(self, size):
+        if self.conf["seed"]:
+            self.rng = np.random.default_rng(self.conf["seed"])
         else:
-            rng = np.random.default_rng()
-
-        samp_ops = draw_sampling_opts()
-        array_layout = layouts.get_array_layout(self.conf["layout"][0])
-        half_telescopes = array_layout.x.shape[0] // 2
-
-        while test_opts(samp_ops) <= half_telescopes:
-            samp_ops = draw_sampling_opts(self.conf)
-
-        return samp_ops
-
-    def draw_sampling_opts(self):
-        """
-        Draw observation options from given intervals.
+            self.rng = np.random.default_rng()
 
-        Parameters
-        ----------
-        conf : dict
-            simulation options
-
-        Returns
-        -------
-        dict
-            contains randomly drawn observation options
-        """
-
-        if "rng" not in globals():
-            global rng
-            rng = np.random.default_rng(self.conf["seed"])
+        if self.conf["mode"] == "dense":
+            self.freq_bands = np.array(self.conf["ref_frequency"])
+        else:
+            self.freq_bands = np.array(self.conf["ref_frequency"]) + np.array(
+                self.conf["frequency_offsets"]
+            )
 
-        angles_ra = np.arange(
-            self.conf["fov_center_ra"][0][0],
-            self.conf["fov_center_ra"][0][1],
-            step=0.1,
-        )
-        angles_dec = np.arange(
-            self.conf["fov_center_dec"][0][0],
-            self.conf["fov_center_dec"][0][1],
-            step=0.1,
+        # Split sampling options into two dicts:
+        # samps_ops_const is always the same, values in
+        # samps_ops, however, will be drawn randomly.
+        self.samp_ops_const = dict(
+            array_layout=self.conf["layout"][0],
+            image_size=self.conf["img_size"][0],
+            fov=self.conf["fov_size"],
+            integration_time=self.conf["corr_int_time"],
+            scan_separation=self.conf["scan_separation"],
+            ref_frequency=self.conf["ref_frequency"],
+            frequency_offsets=self.conf["frequency_offsets"],
+            bandwidths=self.conf["bandwidths"],
+            corrupted=self.conf["corrupted"],
+            device=self.conf["device"],
+            sensitivity_cut=self.conf["sensitivty_cut"],
+            polarisation=self.conf["polarisation"],
+        )  # NOTE: scan_separation and integration_time may change in the future
+
+        # get second half of the sampling options;
+        # this is the randomly drawn, i.e. non-constant, part
+        self.samp_ops = self.draw_sampling_opts(size)
+
+        test_idx = tqdm(
+            range(self.samp_ops["src_ra"].size),
+            position=0,
+            desc="Pre-drawing and testing sample parameters",
+            colour="#00c1a2",
+            leave=False,
         )
 
-        fov_center_ra = rng.choice(angles_ra)
-        fov_center_dec = rng.choice(angles_dec)
+        for i in test_idx:
+            self.test_rand_opts(i)
 
-        start_time_l = datetime.strptime(
-            self.conf["scan_start"][0], "%d-%m-%Y %H:%M:%S"
-        )
-        start_time_h = datetime.strptime(
-            self.conf["scan_start"][1], "%d-%m-%Y %H:%M:%S"
-        )
-        start_times = pd.date_range(
-            start_time_l,
-            start_time_h,
-            freq="1h",
-        ).strftime("%d-%m-%Y %H:%M:%S")
-
-        scan_start = rng.choice(
-            [datetime.strptime(time, "%d-%m-%Y %H:%M:%S") for time in start_times]
+    def draw_sampling_opts(self, size):
+        ra = self.rng.uniform(
+            self.conf["fov_center_ra"][0][0], self.conf["fov_center_ra"][0][1], size
         )
-        scan_duration = int(
-            rng.integers(self.conf["scan_duration"][0], self.conf["scan_duration"][1])
+        dec = self.rng.uniform(
+            self.conf["fov_center_dec"][0][0], self.conf["fov_center_dec"][0][1], size
         )
 
-        num_scans = int(
-            rng.integers(self.conf["num_scans"][0], self.conf["num_scans"][1])
+        start_time_l = datetime.strptime(self.conf["scan_start"][0], self.date_fmt)
+        start_time_h = datetime.strptime(self.conf["scan_start"][1], self.date_fmt)
+        start_times = np.arange(start_time_l, start_time_h, timedelta(hours=1)).astype(
+            datetime
         )
 
-        # TODO: is this really necessary?
-        opts = np.array(
-            [
-                self.conf["mode"],
-                self.conf["layout"][0],
-                self.conf["img_size"][0],
-                fov_center_ra,
-                fov_center_dec,
-                self.conf["fov_size"],
-                self.conf["corr_int_time"],
-                scan_start,
-                scan_duration,
-                num_scans,
-                self.conf["scan_separation"],
-                self.conf["ref_frequency"],
-                self.conf["frequency_offsets"],
-                self.conf["bandwidths"],
-                self.conf["corrupted"],
-                self.conf["device"],
-                self.conf["sensitivty_cut"],
-            ],
-            dtype="object",
+        scan_start = self.rng.choice(start_times, size)
+        scan_duration = self.rng.integers(
+            self.conf["scan_duration"][0],
+            self.conf["scan_duration"][1],
+            size,
+        )
+        num_scans = self.rng.integers(
+            self.conf["num_scans"][0], self.conf["num_scans"][1], size
         )
 
-        samp_ops = {
-            "mode": opts[0],
-            "layout": opts[1],
-            "img_size": opts[2],
-            "fov_center_ra": opts[3],
-            "fov_center_dec": opts[4],
-            "fov_size": opts[5],
-            "corr_int_time": opts[6],
-            "scan_start": opts[7],
-            "scan_duration": opts[8],
-            "num_scans": opts[9],
-            "scan_separation": opts[10],
-            "ref_frequency": opts[11],
-            "frequency_offsets": opts[12],
-            "bandwidths": opts[13],
-            "corrupted": opts[14],
-            "device": opts[15],
-            "sensitivity_cut": opts[16],
-        }
-        return samp_ops
+        if scan_duration.size == 1:
+            scan_duration = scan_duration.astype(int)
 
-    def test_opts(self, rc):
-        """
-        Compute the number of telescopes that can observe the source given
-        certain randomly drawn parameters.
-
-        Parameters
-        ----------
-        rc : dict
-            randomly drawn observational parameters
+        if num_scans.size == 1:
+            num_scans = num_scans.astype(int)
 
-        Returns
-        -------
+        # if polarisation is None, we don't need to enter the
+        # conditional below, so we set delta, amp_ratio, field_order,
+        # and field_scale to None.
+        delta, amp_ratio, field_order, field_scale = np.full((4, size), None)
 
-        """
-        array_layout = layouts.get_array_layout(rc["layout"])
+        if self.conf["polarisation"]:
+            if self.conf["pol_delta"]:
+                delta = np.repeat(self.conf["pol_delta"], size)
+            else:
+                delta = self.rng.uniform(0, 180, size)
 
-        src_crd = SkyCoord(
-            ra=rc["fov_center_ra"], dec=rc["fov_center_dec"], unit=(un.deg, un.deg)
-        )
+            if self.conf["pol_amp_ratio"]:
+                amp_ratio = np.repeat(self.conf["pol_amp_ratio"], size)
+            else:
+                amp_ratio = self.rng.uniform(0, 1, size)
 
-        time = self._calc_time_steps(rc)
+            if self.conf["field_order"]:
+                field_order = np.repeat(self.conf["field_order"], size).reshape(-1, 2)
+            else:
+                field_order = np.repeat(self.rng.uniform(0, 1, size), 2).reshape(-1, 2)
 
-        _, el_st_0 = self.calc_ref_elev(src_crd, time[0], array_layout)
-        _, el_st_1 = self.calc_ref_elev(src_crd, time[1], array_layout)
+            if self.conf["field_scale"]:
+                field_scale = np.stack(
+                    np.repeat(self.conf["field_scale"], size).reshape(2, -1), axis=1
+                )
+            else:
+                a = self.rng.uniform(0, 1 - 1e-6, size)
+                b = np.repeat(1, size, dtype=float)
+                field_scale = np.stack((a, b), axis=1)
+
+        samp_ops = dict(
+            src_ra=ra,
+            src_dec=dec,
+            start_time=scan_start,
+            scan_duration=scan_duration,
+            num_scans=num_scans,
+            delta=delta,
+            amp_ratio=amp_ratio,
+            order=field_order,
+            scale=field_scale,
+            threshold=self.conf["field_threshold"],
+        )
+        # NOTE: We don't need to draw random values for threshold
+        # as threshold=None should be suitable for almost all cases.
+        # However, since threshold has to be in the field_kwargs dict
+        # later, we need to include it here instead of inside the
+        # samp_ops_const dictionary.
 
-        el_min = 15
-        el_max = 85
+        return samp_ops
 
-        active_telescopes_0 = np.sum((el_st_0 >= el_min) & (el_st_0 <= el_max))
-        active_telescopes_1 = np.sum((el_st_1 >= el_min) & (el_st_1 <= el_max))
+    def test_rand_opts(self, i):
+        array = layouts.get_array_layout(self.samp_ops_const["array_layout"])
 
-        return min(active_telescopes_0, active_telescopes_1)
+        time_steps = self.calc_time_steps(i)
+        src_crd = SkyCoord(
+            self.samp_ops["src_ra"][i], self.samp_ops["src_dec"][i], unit=un.deg
+        )
 
-    def calc_ref_elev(self, src_crd, time, array_layout):
-        if time.shape == ():
-            time = time[None]
+        total_stations = len(array.st_num)
 
-        # Calculate for all times
-        # calculate GHA, Greenwich as reference for EHT
-        ha_all = Angle(
-            [t.sidereal_time("apparent", "greenwich") - src_crd.ra for t in time]
+        locations = EarthLocation.from_geocentric(
+            x=array.x,
+            y=array.y,
+            z=array.z,
+            unit=un.m,
         )
 
-        # calculate elevations
-        el_st_all = src_crd.transform_to(
-            AltAz(
-                obstime=time.reshape(len(time), -1),
-                location=EarthLocation.from_geocentric(
-                    np.repeat([array_layout.x], len(time), axis=0),
-                    np.repeat([array_layout.y], len(time), axis=0),
-                    np.repeat([array_layout.z], len(time), axis=0),
-                    unit=un.m,
-                ),
-            )
-        ).alt.degree
-
-        assert len(ha_all.value) == len(el_st_all)
+        altaz_frames = AltAz(obstime=time_steps[:, None], location=locations[None])
+        src_alt = src_crd.transform_to(altaz_frames).alt.degree
 
-        return ha_all, el_st_all
+        # check which stations can see the source for each time step
+        visible = np.logical_and(
+            array.el_low.numpy() <= src_alt, src_alt <= array.el_high.numpy()
+        )
 
-    def _calc_time_steps(self, conf):
-        start_time = Time(conf["scan_start"].isoformat(), format="isot")
-        scan_separation = conf["scan_separation"]
-        num_scans = conf["num_scans"]
-        scan_duration = conf["scan_duration"]
-        int_time = conf["corr_int_time"]
+        # We want at least half of the telescopes to see the source
+        visible_count_per_t = visible.sum(axis=1)
+        visible_half = visible_count_per_t > total_stations // 2
+
+        # If the source is not seen by half the telescopes half of the observation time,
+        # we redraw the source ra and dec and scan start times, duration,
+        # and number of scans. Then we test again by calling this function recursively.
+        if visible_half.sum() < time_steps.size // 2:
+            redrawn_samp_ops = self.draw_sampling_opts(1)
+            self.samp_ops["src_ra"][i] = redrawn_samp_ops["src_ra"][0]
+            self.samp_ops["src_dec"][i] = redrawn_samp_ops["src_dec"][0]
+            self.samp_ops["start_time"][i] = redrawn_samp_ops["start_time"][0]
+            self.samp_ops["scan_duration"][i] = redrawn_samp_ops["scan_duration"][0]
+            self.samp_ops["num_scans"][i] = redrawn_samp_ops["num_scans"][0]
+
+            self.test_rand_opts(i)
+
+    def calc_time_steps(self, i):
+        start_time = Time(self.samp_ops["start_time"][i].isoformat(), format="isot")
+        scan_separation = self.samp_ops_const["scan_separation"]
+        num_scans = self.samp_ops["num_scans"][i]
+        scan_duration = self.samp_ops["scan_duration"][i]
+        int_time = self.samp_ops_const["integration_time"]
 
         time_lst = [
             start_time
@@ -426,288 +399,9 @@ def _calc_time_steps(self, conf):
             for i in range(num_scans)
             for j in range(int(scan_duration / int_time) + 1)
         ]
-        # +1 because t_1 is the stop time of t_0
-        # in order to save computing power we take one time more to complete interval
+        # +1 because t_1 is the stop time of t_0.
+        # In order to save computing power we take
+        # one time more to complete interval
         time = Time(time_lst)
 
         return time
-
-
-def simulate_data_set(config, slurm=False, job_id=None, n=None):
-    """
-    Wrapper function for simulating visibilities.
-    Distinction between slurm and non-threaded simulation.
-
-    Parameters
-    ----------
-    config : toml file
-        path to config file
-    slurm : bool
-        True, if slurm is used
-    job_id : int
-        job_id, given by slurm
-    n : int
-        running index
-
-    """
-    conf = read_data_set_conf(config)
-    out_path = Path(conf["out_path_fits"])
-    out_path.mkdir(parents=True, exist_ok=True)
-    data = load_bundles(conf["in_path"])
-
-    if slurm:
-        job_id = int(job_id + n * 500)
-        out = out_path / Path("vis_" + str(job_id) + ".fits")
-        imgs_bundle = len(open_bundles(data[0]))
-        bundle = torch.div(job_id, imgs_bundle, rounding_mode="floor")
-        image = job_id - bundle * imgs_bundle
-        SI = torch.tensor(open_bundles(data[bundle])[image])
-        if len(SI.shape) == 2:
-            SI = SI.unsqueeze(0)
-
-        obs = create_observation(conf)
-        vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"])
-        hdu_list = writer.create_hdu_list(vis_data, obs)
-        hdu_list.writeto(out, overwrite=True)
-
-    else:
-        for i in tqdm(range(len(data))):
-            SIs = get_images(data, i)
-
-            for j, SI in enumerate(tqdm(SIs)):
-                obs = create_observation(conf)
-                vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"])
-
-                out = out_path / Path("vis_" + str(j + len(SIs) * i) + ".fits")
-                hdu_list = writer.create_hdu_list(vis_data, obs)
-                hdu_list.writeto(out, overwrite=True)
-
-
-def get_images(bundles, i):
-    SIs = torch.tensor(open_bundles(bundles[i]))
-    if len(SIs.shape) == 3:
-        SIs = SIs.unsqueeze(1)
-    return SIs
-
-
-def create_observation(conf):
-    rc = create_sampling_rc(conf)
-    dense = False
-    if rc["mode"] == "dense":
-        dense = True
-
-    obs = Observation(
-        src_ra=rc["fov_center_ra"],
-        src_dec=rc["fov_center_dec"],
-        start_time=rc["scan_start"],
-        scan_duration=rc["scan_duration"],
-        num_scans=rc["num_scans"],
-        scan_separation=rc["scan_separation"],
-        integration_time=rc["corr_int_time"],
-        ref_frequency=rc["ref_frequency"],
-        frequency_offsets=rc["frequency_offsets"],
-        bandwidths=rc["bandwidths"],
-        fov=rc["fov_size"],
-        image_size=rc["img_size"],
-        array_layout=rc["layout"],
-        corrupted=rc["corrupted"],
-        device=rc["device"],
-        dense=dense,
-        sensitivity_cut=rc["sensitivity_cut"],
-    )
-    return obs
-
-
-def create_sampling_rc(conf):
-    """
-    Draw sampling options and test if atleast half of the telescopes can see the source.
-    If not, then new parameters are drawn.
-
-    Parameters
-    ----------
-    conf : dict
-        simulation options
-
-    Returns
-    -------
-    dict
-        contains the observation parameters
-    """
-
-    global rng
-
-    if conf["seed"] is not None:
-        rng = np.random.default_rng(conf["seed"])
-    else:
-        rng = np.random.default_rng()
-
-    samp_ops = draw_sampling_opts(conf)
-    array_layout = layouts.get_array_layout(conf["layout"][0])
-    half_telescopes = array_layout.x.shape[0] // 2
-
-    while test_opts(samp_ops) <= half_telescopes:
-        samp_ops = draw_sampling_opts(conf)
-
-    return samp_ops
-
-
-def draw_sampling_opts(conf):
-    """
-    Draw observation options from given intervals.
-
-    Parameters
-    ----------
-    conf : dict
-        simulation options
-
-    Returns
-    -------
-    dict
-        contains randomly drawn observation options
-    """
-
-    if "rng" not in globals():
-        global rng
-        rng = np.random.default_rng(conf["seed"])
-
-    angles_ra = np.arange(
-        conf["fov_center_ra"][0][0], conf["fov_center_ra"][0][1], step=0.1
-    )
-    fov_center_ra = rng.choice(angles_ra)
-
-    angles_dec = np.arange(
-        conf["fov_center_dec"][0][0], conf["fov_center_dec"][0][1], step=0.1
-    )
-    fov_center_dec = rng.choice(angles_dec)
-    start_time_l = datetime.strptime(conf["scan_start"][0], "%d-%m-%Y %H:%M:%S")
-    start_time_h = datetime.strptime(conf["scan_start"][1], "%d-%m-%Y %H:%M:%S")
-    start_times = pd.date_range(start_time_l, start_time_h, freq="1h").strftime(
-        "%d-%m-%Y %H:%M:%S"
-    )
-    scan_start = rng.choice(
-        [datetime.strptime(time, "%d-%m-%Y %H:%M:%S") for time in start_times]
-    )
-    scan_duration = int(
-        rng.integers(conf["scan_duration"][0], conf["scan_duration"][1])
-    )
-    num_scans = int(rng.integers(conf["num_scans"][0], conf["num_scans"][1]))
-    opts = np.array(
-        [
-            conf["mode"],
-            conf["layout"][0],
-            conf["img_size"][0],
-            fov_center_ra,
-            fov_center_dec,
-            conf["fov_size"],
-            conf["corr_int_time"],
-            scan_start,
-            scan_duration,
-            num_scans,
-            conf["scan_separation"],
-            conf["ref_frequency"],
-            conf["frequency_offsets"],
-            conf["bandwidths"],
-            conf["corrupted"],
-            conf["device"],
-            conf["sensitivty_cut"],
-        ],
-        dtype="object",
-    )
-    samp_ops = {
-        "mode": opts[0],
-        "layout": opts[1],
-        "img_size": opts[2],
-        "fov_center_ra": opts[3],
-        "fov_center_dec": opts[4],
-        "fov_size": opts[5],
-        "corr_int_time": opts[6],
-        "scan_start": opts[7],
-        "scan_duration": opts[8],
-        "num_scans": opts[9],
-        "scan_separation": opts[10],
-        "ref_frequency": opts[11],
-        "frequency_offsets": opts[12],
-        "bandwidths": opts[13],
-        "corrupted": opts[14],
-        "device": opts[15],
-        "sensitivity_cut": opts[16],
-    }
-    return samp_ops
-
-
-def test_opts(rc):
-    """
-    Compute the number of telescopes that can observe the source given
-    certain randomly drawn parameters.
-
-    Parameters
-    ----------
-    rc : dict
-        randomly drawn observational parameters
-
-    Returns
-    -------
-
-    """
-    array_layout = layouts.get_array_layout(rc["layout"])
-    src_crd = SkyCoord(
-        ra=rc["fov_center_ra"], dec=rc["fov_center_dec"], unit=(un.deg, un.deg)
-    )
-    time = calc_time_steps(rc)
-    _, el_st_0 = calc_ref_elev(src_crd, time[0], array_layout)
-    _, el_st_1 = calc_ref_elev(src_crd, time[1], array_layout)
-    el_min = 15
-    el_max = 85
-    active_telescopes_0 = np.sum((el_st_0 >= el_min) & (el_st_0 <= el_max))
-    active_telescopes_1 = np.sum((el_st_1 >= el_min) & (el_st_1 <= el_max))
-    return min(active_telescopes_0, active_telescopes_1)
-
-
-def calc_ref_elev(src_crd, time, array_layout):
-    if time.shape == ():
-        time = time[None]
-    # Calculate for all times
-    # calculate GHA, Greenwich as reference for EHT
-    ha_all = Angle(
-        [t.sidereal_time("apparent", "greenwich") - src_crd.ra for t in time]
-    )
-
-    # calculate elevations
-    el_st_all = src_crd.transform_to(
-        AltAz(
-            obstime=time.reshape(len(time), -1),
-            location=EarthLocation.from_geocentric(
-                np.repeat([array_layout.x], len(time), axis=0),
-                np.repeat([array_layout.y], len(time), axis=0),
-                np.repeat([array_layout.z], len(time), axis=0),
-                unit=un.m,
-            ),
-        )
-    ).alt.degree
-    assert len(ha_all.value) == len(el_st_all)
-    return ha_all, el_st_all
-
-
-def calc_time_steps(conf):
-    start_time = Time(conf["scan_start"].isoformat(), format="isot")
-    scan_separation = conf["scan_separation"]
-    num_scans = conf["num_scans"]
-    scan_duration = conf["scan_duration"]
-    int_time = conf["corr_int_time"]
-
-    time_lst = [
-        start_time
-        + scan_separation * i * un.second
-        + i * scan_duration * un.second
-        + j * int_time * un.second
-        for i in range(num_scans)
-        for j in range(int(scan_duration / int_time) + 1)
-    ]
-    # +1 because t_1 is the stop time of t_0
-    # in order to save computing power we take one time more to complete interval
-    time = Time(time_lst)
-    return time
-
-
-if __name__ == "__main__":
-    simulate_data_set()

From 3014cdbdf2f605daed669bfd31af7300425a8b81 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:57:42 +0100
Subject: [PATCH 07/34] Rewrite data set creation script

---
 pyvisgen/simulation/scripts/create_dataset.py | 66 ++++++++++++++++---
 1 file changed, 57 insertions(+), 9 deletions(-)

diff --git a/pyvisgen/simulation/scripts/create_dataset.py b/pyvisgen/simulation/scripts/create_dataset.py
index 4af7903..e01242f 100644
--- a/pyvisgen/simulation/scripts/create_dataset.py
+++ b/pyvisgen/simulation/scripts/create_dataset.py
@@ -1,13 +1,13 @@
 import click
 
-from pyvisgen.gridding.gridder import create_gridded_data_set
-from pyvisgen.simulation.data_set import simulate_data_set
+from pyvisgen.simulation.data_set import SimulateDataSet
 
 
 @click.command()
-@click.argument("configuration_path", type=click.Path(exists=True, dir_okay=False))
-@click.option("--job_id", required=False, type=int, default=None)
-@click.option("--n", required=False, type=int, default=None)
+@click.argument(
+    "configuration_path",
+    type=click.Path(exists=True, dir_okay=False),
+)
 @click.option(
     "--mode",
     type=click.Choice(
@@ -20,13 +20,61 @@
     ),
     default="simulate",
 )
-def main(configuration_path, mode, job_id=None, n=None):
+@click.option(
+    "-k",
+    "--key",
+    required=False,
+    type=str,
+    default="y",
+    help="Key under which the images are saved in the HDF5 file",
+)
+@click.option("-j", "--job_id", required=False, type=int, default=None)
+@click.option("--n", required=False, type=int, default=None)
+@click.option(
+    "--num_images",
+    required=False,
+    type=int,
+    default=None,
+    help="""
+        Number of images in all bundles combined.
+        Will skip the automatic count.
+    """,
+)
+def main(
+    configuration_path: str | click.Path,
+    mode: str,
+    key: str = "y",
+    job_id=None,
+    n=None,
+    date_fmt="%d-%m-%Y %H:%M:%S",
+    num_images: int | None = None,
+):
     if mode == "simulate":
-        simulate_data_set(configuration_path)
+        SimulateDataSet.from_config(
+            configuration_path,
+            image_key=key,
+            grid=False,
+            date_fmt=date_fmt,
+            num_images=num_images,
+        )
     if mode == "slurm":
-        simulate_data_set(configuration_path, slurm=True, job_id=job_id, n=n)
+        SimulateDataSet.from_config(
+            configuration_path,
+            image_key=key,
+            slurm=True,
+            job_id=job_id,
+            n=n,
+            date_fmt=date_fmt,
+            num_images=num_images,
+        )
     if mode == "gridding":
-        create_gridded_data_set(configuration_path)
+        SimulateDataSet.from_config(
+            configuration_path,
+            image_key=key,
+            grid=True,
+            date_fmt=date_fmt,
+            num_images=num_images,
+        )
 
 
 if __name__ == "__main__":

From c1fdf71fa3925c405cc8cb2c15266a45a362b940 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:58:10 +0100
Subject: [PATCH 08/34] Update config parser for new data set creation routine

---
 pyvisgen/utils/config.py | 41 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/pyvisgen/utils/config.py b/pyvisgen/utils/config.py
index ebad4e2..64b91ef 100644
--- a/pyvisgen/utils/config.py
+++ b/pyvisgen/utils/config.py
@@ -15,6 +15,7 @@ def read_data_set_conf(conf_toml):
         simulation configuration
     """
     config = toml.load(conf_toml)
+    config = sanitize_conf(config)
     conf = {}
 
     conf["mode"] = config["sampling_options"]["mode"]
@@ -37,6 +38,13 @@ def read_data_set_conf(conf_toml):
     conf["noisy"] = config["sampling_options"]["noisy"]
     conf["sensitivty_cut"] = config["sampling_options"]["sensitivity_cut"]
 
+    conf["polarisation"] = config["polarisation_options"]["mode"]
+    conf["pol_delta"] = config["polarisation_options"]["delta"]
+    conf["pol_amp_ratio"] = config["polarisation_options"]["amp_ratio"]
+    conf["field_order"] = config["polarisation_options"]["field_order"]
+    conf["field_scale"] = config["polarisation_options"]["field_scale"]
+    conf["field_threshold"] = config["polarisation_options"]["field_threshold"]
+
     conf["num_test_images"] = config["bundle_options"]["num_test_images"]
     conf["bundle_size"] = config["bundle_options"]["bundle_size"]
     conf["train_valid_split"] = config["bundle_options"]["train_valid_split"]
@@ -46,4 +54,37 @@ def read_data_set_conf(conf_toml):
     conf["in_path"] = config["bundle_options"]["in_path"]
     conf["out_path_fits"] = config["bundle_options"]["out_path_fits"]
     conf["out_path_gridded"] = config["bundle_options"]["out_path_gridded"]
+    conf["file_prefix"] = config["bundle_options"]["file_prefix"]
+
+    # handle case if file_prefix = None
+    if not conf["file_prefix"]:
+        conf["file_prefix"] = ""
+
     return conf
+
+
+def sanitize_conf(conf: dict) -> dict:
+    """Sanitizes a given dict by replacinginstances of
+    'none' str with None.
+
+    Parameters
+    ----------
+    conf : list
+        Unsanitized config dict.
+
+    Returns
+    -------
+    sanitized_conf : list
+        Sanitized conf dict where all instances of 'none'
+        are replaced with None.
+    """
+    sanitized_conf = {}
+    for key, val in conf.items():
+        if isinstance(val, dict):
+            val = sanitize_conf(val)
+        elif isinstance(val, str) and val == "none":
+            val = None
+
+        sanitized_conf[key] = val
+
+    return sanitized_conf

From 07c2b6a603012eca337d016a766145d96a2387f9 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:58:50 +0100
Subject: [PATCH 09/34] Remove torch.flip in vis_loop

---
 pyvisgen/simulation/visibility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyvisgen/simulation/visibility.py b/pyvisgen/simulation/visibility.py
index fb15eb0..507780d 100644
--- a/pyvisgen/simulation/visibility.py
+++ b/pyvisgen/simulation/visibility.py
@@ -386,7 +386,7 @@ def vis_loop(
         raise ValueError("Expected batch_size to be 'auto' or type int")
 
     pol = Polarisation(
-        torch.flip(SI, dims=[1]),
+        SI,
         sensitivity_cut=obs.sensitivity_cut,
         polarisation=obs.polarisation,
         device=obs.device,

From 78022889295a9e2d03612ba640b38510d974025e Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 16:59:10 +0100
Subject: [PATCH 10/34] Update pyvisgen.simulation.__init__

---
 pyvisgen/simulation/__init__.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/pyvisgen/simulation/__init__.py b/pyvisgen/simulation/__init__.py
index c7fd0c7..4d81ab6 100644
--- a/pyvisgen/simulation/__init__.py
+++ b/pyvisgen/simulation/__init__.py
@@ -1,14 +1,5 @@
 from .array import Array
-from .data_set import (
-    calc_ref_elev,
-    calc_time_steps,
-    create_observation,
-    create_sampling_rc,
-    draw_sampling_opts,
-    get_images,
-    simulate_data_set,
-    test_opts,
-)
+from .data_set import SimulateDataSet
 from .observation import Baselines, Observation, ValidBaselineSubset
 from .scan import angularDistance, calc_beam, calc_fourier, integrate, jinc, rime
 from .visibility import Visibilities, generate_noise, vis_loop
@@ -17,22 +8,16 @@
     "Array",
     "Baselines",
     "Observation",
+    "SimulateDataSet",
     "ValidBaselineSubset",
     "Visibilities",
     "angularDistance",
     "calc_beam",
     "calc_fourier",
-    "calc_ref_elev",
     "calc_time_steps",
-    "create_observation",
-    "create_sampling_rc",
-    "draw_sampling_opts",
     "generate_noise",
-    "get_images",
     "integrate",
     "jinc",
     "rime",
-    "simulate_data_set",
-    "test_opts",
     "vis_loop",
 ]

From aba72f4685a924f8f5e004fd8eab672b26d43dea Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Wed, 29 Jan 2025 17:03:49 +0100
Subject: [PATCH 11/34] Update default data set config

---
 config/default_data_set.toml | 37 ++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/config/default_data_set.toml b/config/default_data_set.toml
index 6310c87..ae72748 100644
--- a/config/default_data_set.toml
+++ b/config/default_data_set.toml
@@ -2,30 +2,39 @@
 mode = "full"
 device = "cpu"
 seed = 1337
-layout = "vla"
-img_size = 128
+layout = "vlba"
+img_size = 1024
 fov_center_ra = [100, 110]
 fov_center_dec = [30, 40]
-fov_size = 100
+fov_size = 0.24
 corr_int_time = 30.0
-scan_start = ["16-01-2020 00:04:01", "16-01-2020 08:59:59"]
-scan_duration = [60, 90]
-num_scans = [1, 2]
+scan_start = ["01-01-1995 00:00:01", "01-01-2025 23:59:59"]
+scan_duration = [20, 600]
+num_scans = [6, 10]
 scan_separation = 360
-ref_frequency = 15.21e9
-frequency_offsets = [0e8, 0.8e8, 1.44e8, 2.08e8]
-bandwidths = [6.4e7, 6.4e7, 6.4e7, 6.4e7]
+ref_frequency = 15.17600e9
+frequency_offsets = [0e8, 1.28e8, 2.56e8, 3.84e8]
+bandwidths = [1.28e8, 1.28e8, 1.28e8, 1.28e8]
 noisy = 0
 corrupted = false
 sensitivity_cut = 1e-6
 
+[polarisation_options]
+mode = "none"  # linear, circular, or "none"
+delta = 45  # phase angle 
+amp_ratio = 0.5  # polarisation amplitude ratio
+field_order = [0.01, 0.01]  # (x, y) orders of the random polarisation field
+field_scale = [0, 1]  # scaling of the intensity of the polarisation field
+field_threshold = "none"
+
 [bundle_options]
-in_path = "skies/"
-out_path_fits = "build/uvfits"
-out_path_gridded = "build/gridded"
+file_prefix="train"  # e.g. train, test, or valid. If "none", no prefix will be applied
+in_path = "/path/to/input/data/"
+out_path_fits = "/output/path/"
+out_path_gridded = "/output/path/for/gridding/"
 num_test_images = 500
 bundle_size = 100
 train_valid_split = 0.2
-grid_size = 128
-grid_fov = 100
+grid_size = 1024
+grid_fov = 0.24
 amp_phase = false

From 5ae214c74a3b0c4fa3ae32685443c46224449507 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Thu, 30 Jan 2025 15:28:08 +0100
Subject: [PATCH 12/34] Test shapes, fix fft truth

---
 pyvisgen/gridding/gridder.py    |  4 ++--
 pyvisgen/gridding/utils.py      | 15 ++++++++++++++-
 pyvisgen/simulation/data_set.py |  3 +++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py
index d7d2ef9..1ab198c 100644
--- a/pyvisgen/gridding/gridder.py
+++ b/pyvisgen/gridding/gridder.py
@@ -194,7 +194,7 @@ def convert_amp_phase(data, sky_sim=False):
     if sky_sim:
         amp = np.abs(data)
         phase = np.angle(data)
-        data = np.stack((amp, phase), axis=1)
+        data = np.concatenate((amp, phase), axis=1)
     else:
         test = data[:, 0] + 1j * data[:, 1]
         amp = np.abs(test)
@@ -208,7 +208,7 @@ def convert_real_imag(data, sky_sim=False):
         real = data.real
         imag = data.imag
 
-        data = np.stack((real, imag), axis=1)
+        data = np.concatenate((real, imag), axis=1)
     else:
         real = data[:, 0]
         imag = data[:, 1]
diff --git a/pyvisgen/gridding/utils.py b/pyvisgen/gridding/utils.py
index 6aeebdd..438385e 100644
--- a/pyvisgen/gridding/utils.py
+++ b/pyvisgen/gridding/utils.py
@@ -141,14 +141,27 @@ def save_fft_pair(path, x, y, name_x="x", name_y="y"):
     half_image = x.shape[2] // 2
     x = x[:, :, : half_image + 1, :]
     y = y[:, :, : half_image + 1, :]
+
+    test_shapes(x)
+    test_shapes(y)
+
     with h5py.File(path, "w") as hf:
         hf.create_dataset(name_x, data=x)
         hf.create_dataset(name_y, data=y)
         hf.close()
 
 
+def test_shapes(array):
+    if array.shape[1] != 2:
+        raise ValueError("Expected array axis 1 to be 2!")
+
+    if len(array.shape) != 4:
+        raise ValueError("Expected array shape to be of len 4!")
+
+
 def calc_truth_fft(sky_dist):
     truth_fft = np.fft.fftshift(
-        np.fft.fft2(np.fft.fftshift(sky_dist, axes=(1, 2)), axes=(1, 2)), axes=(1, 2)
+        np.fft.fft2(np.fft.fftshift(sky_dist, axes=(2, 3)), axes=(2, 3)), axes=(2, 3)
     )
+
     return truth_fft
diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index b6afb71..629f575 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -155,6 +155,9 @@ def _run(self):
                     sim_data = convert_real_imag(sim_data, sky_sim=False)
                     truth_fft = convert_real_imag(truth_fft, sky_sim=True)
 
+                if sim_data.shape[1] != 2:
+                    raise ValueError("Expected sim_data axis 1 to be 2!")
+
                 out = self.out_path / Path(
                     f"samp_{self.conf['file_prefix']}_" + str(i) + ".h5"
                 )

From 6f08598299ef217b829caa3f523dcd08f2852b12 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Thu, 30 Jan 2025 18:46:10 +0100
Subject: [PATCH 13/34] Fix gridder

---
 pyvisgen/gridding/gridder.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py
index 1ab198c..68f9c2d 100644
--- a/pyvisgen/gridding/gridder.py
+++ b/pyvisgen/gridding/gridder.py
@@ -132,6 +132,9 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0):
     if isinstance(freq_bands, float):
         freq_bands = [freq_bands]
 
+    uu /= const.c
+    vv /= const.c
+
     u = np.array([uu * np.array(freq) for freq in freq_bands]).ravel()
     v = np.array([vv * np.array(freq) for freq in freq_bands]).ravel()
 
@@ -159,6 +162,7 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0):
             np.concatenate([-imag, imag]),
         ]
     )
+
     # Generate Mask
     N = conf["grid_size"]  # image size
     fov = conf["grid_fov"] * np.pi / (3600 * 180)
@@ -182,7 +186,12 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0):
     mask_real /= mask
     mask_imag /= mask
 
-    assert mask_real.shape == (conf["grid_size"], conf["grid_size"])
+    if mask_real.shape != (conf["grid_size"], conf["grid_size"]):
+        raise ValueError(
+            "shape mismatch: Expected mask_real to be "
+            f"of shape {(conf['grid_size'], conf['grid_size'])}"
+        )
+
     gridded_vis = np.zeros((2, N, N))
     gridded_vis[0] = mask_real
     gridded_vis[1] = mask_imag

From 3ef8dece65c63cbc87555c2f1d9cf0a5c6ca3980 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Thu, 30 Jan 2025 18:47:33 +0100
Subject: [PATCH 14/34] Update sim data loop

---
 pyvisgen/simulation/data_set.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index 629f575..f9fc8eb 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -138,11 +138,12 @@ def _run(self):
                 )
 
                 if self.grid:
-                    sim_data.append(
-                        grid_vis_loop_data(
-                            vis.u, vis.v, vis.get_values(), self.freq_bands, self.conf
-                        )
+                    gridded = grid_vis_loop_data(
+                        vis.u, vis.v, vis.get_values(), self.freq_bands, self.conf
                     )
+
+                    sim_data.append(gridded)
+
                 samp_ops_idx += 1
 
             sim_data = np.array(sim_data)

From 359db2cf29f965d68291cceeb8e3e51fede0a425 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Thu, 30 Jan 2025 18:48:57 +0100
Subject: [PATCH 15/34] Change u, v, and w in scan, don't integrate over mean

---
 pyvisgen/simulation/scan.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py
index a4a7129..ad977c9 100644
--- a/pyvisgen/simulation/scan.py
+++ b/pyvisgen/simulation/scan.py
@@ -78,9 +78,9 @@ def calc_fourier(img, bas, lm, spw_low, spw_high):
         Return Fourier Kernel for every pixel in lm grid and given baselines.
         Shape is given by lm axes and baseline axis
     """
-    u_cmplt = torch.cat((bas[0], bas[1]))
-    v_cmplt = torch.cat((bas[3], bas[4]))
-    w_cmplt = torch.cat((bas[6], bas[7]))
+    u_cmplt = bas[0]
+    v_cmplt = bas[3]
+    w_cmplt = bas[6]
 
     l = lm[..., 0]  # noqa: E741
     m = lm[..., 1]
@@ -219,10 +219,4 @@ def integrate(X1, X2):
     int_f = 0.5 * torch.sum(int_m, dim=0)
     del int_m
 
-    X_t = torch.stack(torch.split(int_f, int(int_f.shape[0] / 2), dim=0))
-    del int_f
-
-    int_t = 0.5 * torch.sum(X_t, dim=0)
-    del X_t
-
-    return int_t
+    return int_f

From 45c26a5b1f12bdfe22f3b5ba2c1a11febec6c89a Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 31 Jan 2025 14:22:14 +0100
Subject: [PATCH 16/34] Update progress bar descriptions

---
 pyvisgen/simulation/data_set.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index f9fc8eb..4d95280 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -93,7 +93,7 @@ def from_config(
                 range(len(cls.data)),
                 position=0,
                 leave=False,
-                desc="Counting Number of Images",
+                desc="Counting images",
                 colour="#754fc9",
             )
             # get number of random parameter draws from number of images in data
@@ -113,7 +113,7 @@ def _run(self):
         data = tqdm(
             range(len(self.data)),
             position=0,
-            desc="Processing Bundles",
+            desc="Processing bundles",
             colour="#52ba66",
         )
 
@@ -125,7 +125,7 @@ def _run(self):
             SIs = tqdm(
                 SIs,
                 position=1,
-                desc=f"Bundle {i}",
+                desc=f"Bundle {i + 1}",
                 colour="#595cbd",
                 leave=False,
             )

From acfb4407223459ecbb50a81caeb6a913ce26d0d7 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Sat, 1 Feb 2025 16:02:49 +0100
Subject: [PATCH 17/34] Improve readability of the SimulateDataSet class, print
 config

---
 pyvisgen/simulation/data_set.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index 4d95280..872f21a 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -78,6 +78,8 @@ def from_config(
 
         cls.conf = read_data_set_conf(config)
 
+        print("Simulation Config:\n", cls.conf)
+
         if grid:
             cls.out_path = Path(cls.conf["out_path_gridded"])
         else:
@@ -86,11 +88,11 @@ def from_config(
         if not cls.out_path.is_dir():
             cls.out_path.mkdir(parents=True)
 
-        cls.data = load_bundles(cls.conf["in_path"])
+        cls.data_paths = load_bundles(cls.conf["in_path"])
 
         if not num_images:
             len_data = tqdm(
-                range(len(cls.data)),
+                range(len(cls.data_paths)),
                 position=0,
                 leave=False,
                 desc="Counting images",
@@ -111,7 +113,7 @@ def from_config(
 
     def _run(self):
         data = tqdm(
-            range(len(self.data)),
+            range(len(self.data_paths)),
             position=0,
             desc="Processing bundles",
             colour="#52ba66",
@@ -163,7 +165,7 @@ def _run(self):
                     f"samp_{self.conf['file_prefix']}_" + str(i) + ".h5"
                 )
 
-                save_fft_pair(out, sim_data, truth_fft)
+                save_fft_pair(path=out, x=sim_data, y=truth_fft)
 
                 path_msg = self.conf["out_path_gridded"]
             else:
@@ -178,7 +180,7 @@ def _run_slurm():
         raise NotImplementedError("Not implememented yet!")
 
     def get_images(self, i):
-        SIs = torch.tensor(open_bundles(self.data[i], key=self.key))
+        SIs = torch.tensor(open_bundles(self.data_paths[i], key=self.key))
 
         if len(SIs.shape) == 3:
             SIs = SIs.unsqueeze(1)

From 67c83ed68d2c71ebf9620638cc1221096923b0c0 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Mon, 3 Feb 2025 11:32:46 +0100
Subject: [PATCH 18/34] Move convert_phase_amp and convert_real_imag to
 gridding.utils

---
 pyvisgen/gridding/__init__.py | 11 ++++----
 pyvisgen/gridding/gridder.py  | 27 --------------------
 pyvisgen/gridding/utils.py    | 47 ++++++++++++++++++++++++++++++-----
 3 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/pyvisgen/gridding/__init__.py b/pyvisgen/gridding/__init__.py
index c53f9ce..1b4319c 100644
--- a/pyvisgen/gridding/__init__.py
+++ b/pyvisgen/gridding/__init__.py
@@ -1,11 +1,12 @@
-from .gridder import (
+from .gridder import ducc0_gridding, grid_data, grid_vis_loop_data
+from .utils import (
+    calc_truth_fft,
     convert_amp_phase,
     convert_real_imag,
-    ducc0_gridding,
-    grid_data,
-    grid_vis_loop_data,
+    create_gridded_data_set,
+    open_data,
+    save_fft_pair,
 )
-from .utils import calc_truth_fft, create_gridded_data_set, open_data, save_fft_pair
 
 __all__ = [
     "calc_truth_fft",
diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py
index 68f9c2d..873d40c 100644
--- a/pyvisgen/gridding/gridder.py
+++ b/pyvisgen/gridding/gridder.py
@@ -197,30 +197,3 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0):
     gridded_vis[1] = mask_imag
 
     return gridded_vis
-
-
-def convert_amp_phase(data, sky_sim=False):
-    if sky_sim:
-        amp = np.abs(data)
-        phase = np.angle(data)
-        data = np.concatenate((amp, phase), axis=1)
-    else:
-        test = data[:, 0] + 1j * data[:, 1]
-        amp = np.abs(test)
-        phase = np.angle(test)
-        data = np.stack((amp, phase), axis=1)
-    return data
-
-
-def convert_real_imag(data, sky_sim=False):
-    if sky_sim:
-        real = data.real
-        imag = data.imag
-
-        data = np.concatenate((real, imag), axis=1)
-    else:
-        real = data[:, 0]
-        imag = data[:, 1]
-
-        data = np.stack((real, imag), axis=1)
-    return data
diff --git a/pyvisgen/gridding/utils.py b/pyvisgen/gridding/utils.py
index 438385e..45b57b5 100644
--- a/pyvisgen/gridding/utils.py
+++ b/pyvisgen/gridding/utils.py
@@ -6,7 +6,7 @@
 from tqdm.autonotebook import tqdm
 
 from pyvisgen.fits.data import fits_data
-from pyvisgen.gridding import convert_amp_phase, convert_real_imag, grid_data
+from pyvisgen.gridding import grid_data
 from pyvisgen.utils.config import read_data_set_conf
 from pyvisgen.utils.data import load_bundles, open_bundles
 
@@ -142,8 +142,8 @@ def save_fft_pair(path, x, y, name_x="x", name_y="y"):
     x = x[:, :, : half_image + 1, :]
     y = y[:, :, : half_image + 1, :]
 
-    test_shapes(x)
-    test_shapes(y)
+    test_shapes(x, "x")
+    test_shapes(y, "y")
 
     with h5py.File(path, "w") as hf:
         hf.create_dataset(name_x, data=x)
@@ -151,12 +151,18 @@ def save_fft_pair(path, x, y, name_x="x", name_y="y"):
         hf.close()
 
 
-def test_shapes(array):
+def test_shapes(array, name):
     if array.shape[1] != 2:
-        raise ValueError("Expected array axis 1 to be 2!")
+        raise ValueError(
+            f"Expected array {name} axis 1 to be 2 but got "
+            f"{array.shape} with axis 1: {array.shape[1]}!"
+        )
 
     if len(array.shape) != 4:
-        raise ValueError("Expected array shape to be of len 4!")
+        raise ValueError(
+            f"Expected array {name} shape to be of len 4 but got "
+            f"{array.shape} with len {len(array.shape)}!"
+        )
 
 
 def calc_truth_fft(sky_dist):
@@ -165,3 +171,32 @@ def calc_truth_fft(sky_dist):
     )
 
     return truth_fft
+
+
+def convert_amp_phase(data, sky_sim=False):
+    if sky_sim:
+        amp = np.abs(data)
+        phase = np.angle(data)
+        data = np.concatenate((amp, phase), axis=1)
+    else:
+        test = data[:, 0] + 1j * data[:, 1]
+        amp = np.abs(test)
+        phase = np.angle(test)
+        data = np.stack((amp, phase), axis=1)
+
+    return data
+
+
+def convert_real_imag(data, sky_sim=False):
+    if sky_sim:
+        real = data.real
+        imag = data.imag
+
+        data = np.concatenate((real, imag), axis=1)
+    else:
+        real = data[:, 0]
+        imag = data[:, 1]
+
+        data = np.stack((real, imag), axis=1)
+
+    return data

From 2e7e58721a499ef1a423bec42c68bad88950e0fe Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 15:34:22 +0100
Subject: [PATCH 19/34] Add slurm support, add FITS file support, add
 docstrings

---
 pyvisgen/simulation/data_set.py | 186 ++++++++++++++++++++++++++------
 1 file changed, 152 insertions(+), 34 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index 872f21a..e6e5b57 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -6,8 +6,10 @@
 from astropy import units as un
 from astropy.coordinates import AltAz, EarthLocation, SkyCoord
 from astropy.time import Time
+from joblib import Parallel, delayed
 from tqdm.autonotebook import tqdm
 
+import pyvisgen.fits.writer as writer
 import pyvisgen.layouts.layouts as layouts
 from pyvisgen.gridding import (
     calc_truth_fft,
@@ -37,11 +39,12 @@ def from_config(
         *,
         grid: bool = True,
         slurm: bool = False,
-        job_id: int | None = None,
-        n: int | None = None,
+        slurm_job_id: int | None = None,
+        slurm_n: int | None = None,
         date_fmt: str = DATEFMT,
         num_images: int | None = None,
-    ) -> None:
+        multiprocess: int | str = 1,
+    ):
         """Simulates data from parameters in a config file.
 
         Parameters
@@ -56,9 +59,9 @@ def from_config(
             save to HDF5 files. Default: ``True``
         slurm : bool, optional
             ``True``, if slurm is used, Default: ``False``
-        job_id : int or None, optional
+        slurm_job_id : int or None, optional
             ``job_id`` given by slurm. Default: ``None``
-        n : int or None, optional
+        slurm_n : int or None, optional
             Running index. Default: ``None``
         date_fmt : str, optional
             Format string for datetime objects.
@@ -67,14 +70,23 @@ def from_config(
             Number of combined total images in the bundles.
             If not ``None``, will skip counting the images before
             drawing the random parameters. Default: ``None``
+        multiprocess : int or str, optional
+            Number of jobs to use in multiprocessing during the
+            sampling and testing phase. If -1 or ``'all'``,
+            use all available cores. Default: 1
         """
         cls = cls()
         cls.key = image_key
         cls.grid = grid
         cls.slurm = slurm
-        cls.job_id = job_id
-        cls.n = n
+        cls.job_id = slurm_job_id
+        cls.n = slurm_n
         cls.date_fmt = date_fmt
+        cls.num_images = num_images
+        cls.multiprocess = multiprocess
+
+        if multiprocess in ["all"]:
+            cls.multiprocess = -1
 
         cls.conf = read_data_set_conf(config)
 
@@ -90,8 +102,8 @@ def from_config(
 
         cls.data_paths = load_bundles(cls.conf["in_path"])
 
-        if not num_images:
-            len_data = tqdm(
+        if not cls.num_images:
+            data_bundles = tqdm(
                 range(len(cls.data_paths)),
                 position=0,
                 leave=False,
@@ -99,19 +111,23 @@ def from_config(
                 colour="#754fc9",
             )
             # get number of random parameter draws from number of images in data
-            num_draws = np.sum([len(cls.get_images(i)) for i in len_data])
-
-        # draw parameters beforehand, i.e. outside the simulation loop
-        cls.create_sampling_rc(num_draws)
+            cls.num_images = np.sum(
+                [len(cls.get_images(bundle)) for bundle in data_bundles]
+            )
 
         if slurm:
             cls._run_slurm()
         else:
+            # draw parameters beforehand, i.e. outside the simulation loop
+            cls.create_sampling_rc(cls.num_images)
             cls._run()
 
         return cls
 
-    def _run(self):
+    def _run(self) -> None:
+        """Runs the simulation and saves visibility data either as
+        bundled HDF5 files or as individual FITS files.
+        """
         data = tqdm(
             range(len(self.data_paths)),
             position=0,
@@ -167,19 +183,62 @@ def _run(self):
 
                 save_fft_pair(path=out, x=sim_data, y=truth_fft)
 
-                path_msg = self.conf["out_path_gridded"]
+                path_msg = Path(self.conf["out_path_gridded"]) / Path(
+                    f"samp_{self.conf['file_prefix']}_<id>.h5"
+                )
             else:
-                path_msg = self.conf["out_path_fits"]
+                for i, vis_data in enumerate(sim_data):
+                    out = self.out_path / Path(
+                        f"vis_{self.conf['file_prefix']}_" + str(i) + ".fits"
+                    )
+                    hdu_list = writer.create_hdu_list(vis_data, obs)
+                    hdu_list.writeto(out, overwrite=True)
+
+                path_msg = self.conf["out_path_fits"] / Path(
+                    f"samp_{self.conf['file_prefix']}_<id>.fits"
+                )
 
         print(
             f"Successfully simulated and saved {samp_ops_idx} images "
             f"to '{path_msg}'!"
         )
 
-    def _run_slurm():
-        raise NotImplementedError("Not implememented yet!")
+    def _run_slurm(self) -> None:
+        """Runs the simulation in slurm and saves visibility data
+        as individual FITS files.
+        """
+        job_id = int(self.slurm_job_id + self.slurm_n * 500)
+        out = self.conf["out_path_fits"] / Path("vis_" + str(job_id) + ".fits")
+
+        bundle = torch.div(job_id, self.num_images, rounding_mode="floor")
+        image = job_id - bundle * self.num_images
+
+        SI = torch.tensor(open_bundles(self.data_paths[bundle])[image])
 
-    def get_images(self, i):
+        if len(SI.shape) == 2:
+            SI = SI.unsqueeze(0)
+
+        self.create_sampling_rc(1)
+        obs = self.create_observation(0)
+        vis_data = vis_loop(obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"])
+
+        hdu_list = writer.create_hdu_list(vis_data, obs)
+        hdu_list.writeto(out, overwrite=True)
+
+    def get_images(self, i: int) -> torch.tensor:
+        """Opens bundle with index i and returns :func:`~torch.tensor`
+        of images.
+
+        Parameters
+        ----------
+        i : int
+            Bundle index.
+
+        Returns
+        -------
+        SIs : :func:`~torch.tensor`
+            :func:`~torch.tensor` of images from bundle ``i``.
+        """
         SIs = torch.tensor(open_bundles(self.data_paths[i], key=self.key))
 
         if len(SIs.shape) == 3:
@@ -187,7 +246,21 @@ def get_images(self, i):
 
         return SIs
 
-    def create_observation(self, i):
+    def create_observation(self, i: int) -> Observation:
+        """Creates :class:`~pyvisgen.simulation.Observation`
+        dataclass object for image ``i``.
+
+        Parameters
+        ----------
+        i : int
+            Index of image for which the observation is created.
+
+        Returns
+        -------
+        obs : Observation
+            :class:`~pyvisgen.simulation.Observation` dataclass
+            object for image ``i``.
+        """
         rc = self.samp_ops
 
         # put the respective values inside the
@@ -222,7 +295,15 @@ def create_observation(self, i):
 
         return obs
 
-    def create_sampling_rc(self, size):
+    def create_sampling_rc(self, size: int) -> None:
+        """Creates sampling runtime configuration containing
+        all relevant parameters for the simulation.
+
+        Parameters
+        ----------
+        size : int
+            Number of parameters to draw, equal to number of images.
+        """
         if self.conf["seed"]:
             self.rng = np.random.default_rng(self.conf["seed"])
         else:
@@ -265,10 +346,25 @@ def create_sampling_rc(self, size):
             leave=False,
         )
 
-        for i in test_idx:
-            self.test_rand_opts(i)
+        self.array = layouts.get_array_layout(self.samp_ops_const["array_layout"])
 
-    def draw_sampling_opts(self, size):
+        Parallel(n_jobs=self.multiprocess)(
+            delayed(self.test_rand_opts)(i) for i in test_idx
+        )
+
+    def draw_sampling_opts(self, size: int) -> dict:
+        """Draws randomized sampling parameters for the simulation.
+
+        Parameters
+        ----------
+        size : int
+            Number of parameters to draw, equal to number of images.
+
+        Returns
+        -------
+        samp_opts : dict
+            Sampling options/parameters stored inside a dictionary.
+        """
         ra = self.rng.uniform(
             self.conf["fov_center_ra"][0][0], self.conf["fov_center_ra"][0][1], size
         )
@@ -328,7 +424,7 @@ def draw_sampling_opts(self, size):
                 b = np.repeat(1, size, dtype=float)
                 field_scale = np.stack((a, b), axis=1)
 
-        samp_ops = dict(
+        samp_opts = dict(
             src_ra=ra,
             src_dec=dec,
             start_time=scan_start,
@@ -346,22 +442,31 @@ def draw_sampling_opts(self, size):
         # later, we need to include it here instead of inside the
         # samp_ops_const dictionary.
 
-        return samp_ops
+        return samp_opts
 
-    def test_rand_opts(self, i):
-        array = layouts.get_array_layout(self.samp_ops_const["array_layout"])
+    def test_rand_opts(self, i: int) -> None:
+        """Tests randomized sampling parameters by checking
+        if the source is visible for 50% of the telescopes in
+        the array for 50% of the observation time. If that
+        condition is not fullfilled, the parameters are redrawn
+        and tested again.
 
+        Parameters
+        ----------
+        i : int
+            Index of the current set of sampling parameters.
+        """
         time_steps = self.calc_time_steps(i)
         src_crd = SkyCoord(
             self.samp_ops["src_ra"][i], self.samp_ops["src_dec"][i], unit=un.deg
         )
 
-        total_stations = len(array.st_num)
+        total_stations = len(self.array.st_num)
 
         locations = EarthLocation.from_geocentric(
-            x=array.x,
-            y=array.y,
-            z=array.z,
+            x=self.array.x,
+            y=self.array.y,
+            z=self.array.z,
             unit=un.m,
         )
 
@@ -370,7 +475,7 @@ def test_rand_opts(self, i):
 
         # check which stations can see the source for each time step
         visible = np.logical_and(
-            array.el_low.numpy() <= src_alt, src_alt <= array.el_high.numpy()
+            self.array.el_low.numpy() <= src_alt, src_alt <= self.array.el_high.numpy()
         )
 
         # We want at least half of the telescopes to see the source
@@ -390,7 +495,20 @@ def test_rand_opts(self, i):
 
             self.test_rand_opts(i)
 
-    def calc_time_steps(self, i):
+    def calc_time_steps(self, i: int) -> Time:
+        """Calculates time steps for given sampling
+        parameter set. Used in testing.
+
+        Parameters
+        ----------
+        i : int
+            Index of the current set of sampling parameters.
+
+        See Also
+        --------
+        pyvisgen.simulation.data_set.SimulateDataSet.test_rand_opts :
+            Tests randomized sampling parameters.
+        """
         start_time = Time(self.samp_ops["start_time"][i].isoformat(), format="isot")
         scan_separation = self.samp_ops_const["scan_separation"]
         num_scans = self.samp_ops["num_scans"][i]

From a2798fdd42a73cf10729956595e39cf26e567204 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 15:34:57 +0100
Subject: [PATCH 20/34] Allow HDF5 key to be passed to open_bundle

---
 pyvisgen/utils/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyvisgen/utils/data.py b/pyvisgen/utils/data.py
index d02082a..aa83274 100644
--- a/pyvisgen/utils/data.py
+++ b/pyvisgen/utils/data.py
@@ -48,7 +48,7 @@ def get_bundles(path: str | Path) -> np.array:
     return bundles
 
 
-def open_bundles(path: str | Path) -> np.array:
+def open_bundles(path: str | Path, key: str = "y") -> np.array:
     """Opens a bundle HDF5 file.
 
     Parameters
@@ -63,6 +63,6 @@ def open_bundles(path: str | Path) -> np.array:
         the bundle file.
     """
     f = h5py.File(path, "r")
-    bundle_y = np.array(f["y"])
+    bundle_y = np.array(f[key])
 
     return bundle_y

From 6859f9dd90bd9d1acd227e419df6abd8dbdbb26c Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 16:08:35 +0100
Subject: [PATCH 21/34] Add changelog, minor fixes

---
 docs/changes/53.feature.rst                   |  1 +
 docs/changes/53.maintenance.rst               | 10 ++++++
 pyvisgen/gridding/gridder.py                  | 35 ++++++++++++++++---
 pyvisgen/simulation/scan.py                   |  1 -
 pyvisgen/simulation/scripts/create_dataset.py | 25 +++++++++----
 5 files changed, 60 insertions(+), 12 deletions(-)
 create mode 100644 docs/changes/53.feature.rst
 create mode 100644 docs/changes/53.maintenance.rst

diff --git a/docs/changes/53.feature.rst b/docs/changes/53.feature.rst
new file mode 100644
index 0000000..5fcfd8d
--- /dev/null
+++ b/docs/changes/53.feature.rst
@@ -0,0 +1 @@
+- Add new gridder that can handle vis data returned by the ``vis_loop``
diff --git a/docs/changes/53.maintenance.rst b/docs/changes/53.maintenance.rst
new file mode 100644
index 0000000..290b8a4
--- /dev/null
+++ b/docs/changes/53.maintenance.rst
@@ -0,0 +1,10 @@
+- Complete rewrite of dataset creation routine ``pyvisgen.simulation.data_set.SimulateDataSet``
+  - Accessible using a classmethod to load a config file
+  - Add optional multiprocessing support
+  - Draw and fully test parameters before simulation loop. Previously this was done in the loop and tests were only performed for two time steps
+  - Support for polarization
+- Add new default config file for new dataset creation routine
+- Update CLI tool for dataset creation routine
+- Allow passing HDF5 key in ``pyvisgen.utils.data.open_bundles``
+- Restructure ``pyvisgen.gridding`` module by adding a ``utils`` submodule that contains all utility functions that previously were in the ``gridder`` submodule
+  - Also fix parts of the utility functions
diff --git a/pyvisgen/gridding/gridder.py b/pyvisgen/gridding/gridder.py
index 873d40c..4845441 100644
--- a/pyvisgen/gridding/gridder.py
+++ b/pyvisgen/gridding/gridder.py
@@ -118,8 +118,29 @@ def grid_data(uv_data, freq_data, conf):
     return gridded_vis
 
 
-def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0):
-    """Grid data coming from the vis_loop."""
+def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0) -> np.array:
+    """Grid data returned by :func:`~pyvisgen.simulation.vis_loop`.
+
+    Parameters
+    ----------
+    uu : :func:`~torch.tensor`
+    vv : :func:`~torch.tensor`
+    vis_data : :class:`~pyvisgen.simulation.Visibilities`
+        :class:`~pyvisgen.simulation.Visibilities` dataclass object
+        containing the visibilities measured by the array.
+    freq_bands : list
+        List of frequency bands of the observation.
+    conf : dict
+        Dictionary containing the configuration of the observation.
+    stokes_comp : int, optional
+        Index of the stokes component to grid. Defaults to stokes I.
+        Default: 0
+
+    Returns
+    -------
+    gridded_vis : :func:`~np.array`
+        Array of gridded visibilities.
+    """
     if vis_data.ndim != 7:
         if vis_data.ndim == 3:
             vis_data = np.stack(
@@ -165,13 +186,17 @@ def grid_vis_loop_data(uu, vv, vis_data, freq_bands, conf, stokes_comp=0):
 
     # Generate Mask
     N = conf["grid_size"]  # image size
-    fov = conf["grid_fov"] * np.pi / (3600 * 180)
-
+    fov = np.deg2rad(conf["grid_fov"] / 3600)
     delta = 1 / fov
 
     # bins are shifted by delta/2 so that maximum in uv space matches maximum
     # in numpy fft
-    bins = np.arange(start=-((N + 1) / 2) * delta, stop=(N / 2) * delta, step=delta)
+    bins = np.arange(
+        start=-((N + 1) / 2) * delta,
+        stop=((N + 1) / 2) * delta,
+        step=delta,
+        dtype=np.float128,
+    )
 
     mask, *_ = np.histogram2d(samps[0], samps[1], bins=[bins, bins], density=False)
     mask[mask == 0] = 1
diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py
index 08f6604..f564f6e 100644
--- a/pyvisgen/simulation/scan.py
+++ b/pyvisgen/simulation/scan.py
@@ -105,7 +105,6 @@ def calc_fourier(
     v_cmplt = bas[5]
     w_cmplt = bas[8]
 
-
     l = lm[..., 0]  # noqa: E741
     m = lm[..., 1]
     n = torch.sqrt(1 - l**2 - m**2)
diff --git a/pyvisgen/simulation/scripts/create_dataset.py b/pyvisgen/simulation/scripts/create_dataset.py
index e01242f..4707ee5 100644
--- a/pyvisgen/simulation/scripts/create_dataset.py
+++ b/pyvisgen/simulation/scripts/create_dataset.py
@@ -28,8 +28,8 @@
     default="y",
     help="Key under which the images are saved in the HDF5 file",
 )
-@click.option("-j", "--job_id", required=False, type=int, default=None)
-@click.option("--n", required=False, type=int, default=None)
+@click.option("--slurm_job_id", required=False, type=int, default=None)
+@click.option("--slurm_n", required=False, type=int, default=None)
 @click.option(
     "--num_images",
     required=False,
@@ -40,14 +40,25 @@
         Will skip the automatic count.
     """,
 )
+@click.option(
+    "-p",
+    "--multiprocess",
+    required=False,
+    help="""
+        Number of processes to run in parallel while
+        sampling and testing parameters. If -1 or
+        'all', will use all available cores.
+    """,
+)
 def main(
     configuration_path: str | click.Path,
     mode: str,
     key: str = "y",
-    job_id=None,
-    n=None,
+    slurm_job_id=None,
+    slurm_n=None,
     date_fmt="%d-%m-%Y %H:%M:%S",
     num_images: int | None = None,
+    multiprocess: int | str = 1,
 ):
     if mode == "simulate":
         SimulateDataSet.from_config(
@@ -56,14 +67,15 @@ def main(
             grid=False,
             date_fmt=date_fmt,
             num_images=num_images,
+            multiprocess=multiprocess,
         )
     if mode == "slurm":
         SimulateDataSet.from_config(
             configuration_path,
             image_key=key,
             slurm=True,
-            job_id=job_id,
-            n=n,
+            slurm_job_id=slurm_job_id,
+            slurm_n=slurm_n,
             date_fmt=date_fmt,
             num_images=num_images,
         )
@@ -74,6 +86,7 @@ def main(
             grid=True,
             date_fmt=date_fmt,
             num_images=num_images,
+            multiprocess=multiprocess,
         )
 
 

From 0a02353623542fdbc22e918699978c4f225d6cd2 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 18:59:03 +0100
Subject: [PATCH 22/34] Change to threading backend, fix variable names

---
 pyvisgen/simulation/data_set.py | 96 ++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 24 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index e6e5b57..9d500b7 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -135,7 +135,7 @@ def _run(self) -> None:
             colour="#52ba66",
         )
 
-        samp_ops_idx = 0
+        samp_opts_idx = 0
         for i in data:
             SIs = self.get_images(i)
             truth_fft = calc_truth_fft(SIs)
@@ -150,7 +150,7 @@ def _run(self) -> None:
 
             sim_data = []
             for SI in SIs:
-                obs = self.create_observation(samp_ops_idx)
+                obs = self.create_observation(samp_opts_idx)
                 vis = vis_loop(
                     obs, SI, noisy=self.conf["noisy"], mode=self.conf["mode"]
                 )
@@ -162,7 +162,7 @@ def _run(self) -> None:
 
                     sim_data.append(gridded)
 
-                samp_ops_idx += 1
+                samp_opts_idx += 1
 
             sim_data = np.array(sim_data)
 
@@ -199,7 +199,7 @@ def _run(self) -> None:
                 )
 
         print(
-            f"Successfully simulated and saved {samp_ops_idx} images "
+            f"Successfully simulated and saved {samp_opts_idx} images "
             f"to '{path_msg}'!"
         )
 
@@ -261,7 +261,7 @@ def create_observation(self, i: int) -> Observation:
             :class:`~pyvisgen.simulation.Observation` dataclass
             object for image ``i``.
         """
-        rc = self.samp_ops
+        rc = self.samp_opts
 
         # put the respective values inside the
         # pol_kwargs and field_kwargs dicts.
@@ -282,7 +282,7 @@ def create_observation(self, i: int) -> Observation:
             dense = True
 
         obs = Observation(
-            **self.samp_ops_const,
+            **self.samp_opts_const,
             src_ra=rc["src_ra"][i],
             src_dec=rc["src_dec"][i],
             start_time=rc["start_time"][i],
@@ -319,7 +319,7 @@ def create_sampling_rc(self, size: int) -> None:
         # Split sampling options into two dicts:
         # samps_ops_const is always the same, values in
         # samps_ops, however, will be drawn randomly.
-        self.samp_ops_const = dict(
+        self.samp_opts_const = dict(
             array_layout=self.conf["layout"][0],
             image_size=self.conf["img_size"][0],
             fov=self.conf["fov_size"],
@@ -336,19 +336,19 @@ def create_sampling_rc(self, size: int) -> None:
 
         # get second half of the sampling options;
         # this is the randomly drawn, i.e. non-constant, part
-        self.samp_ops = self.draw_sampling_opts(size)
+        self.samp_opts = self.draw_sampling_opts(size)
 
         test_idx = tqdm(
-            range(self.samp_ops["src_ra"].size),
+            range(self.samp_opts["src_ra"].size),
             position=0,
             desc="Pre-drawing and testing sample parameters",
             colour="#00c1a2",
             leave=False,
         )
 
-        self.array = layouts.get_array_layout(self.samp_ops_const["array_layout"])
+        self.array = layouts.get_array_layout(self.samp_opts_const["array_layout"])
 
-        Parallel(n_jobs=self.multiprocess)(
+        Parallel(n_jobs=self.multiprocess, backend="threading")(
             delayed(self.test_rand_opts)(i) for i in test_idx
         )
 
@@ -440,7 +440,7 @@ def draw_sampling_opts(self, size: int) -> dict:
         # as threshold=None should be suitable for almost all cases.
         # However, since threshold has to be in the field_kwargs dict
         # later, we need to include it here instead of inside the
-        # samp_ops_const dictionary.
+        # samp_opts_const dictionary.
 
         return samp_opts
 
@@ -458,7 +458,7 @@ def test_rand_opts(self, i: int) -> None:
         """
         time_steps = self.calc_time_steps(i)
         src_crd = SkyCoord(
-            self.samp_ops["src_ra"][i], self.samp_ops["src_dec"][i], unit=un.deg
+            self.samp_opts["src_ra"][i], self.samp_opts["src_dec"][i], unit=un.deg
         )
 
         total_stations = len(self.array.st_num)
@@ -486,12 +486,12 @@ def test_rand_opts(self, i: int) -> None:
         # we redraw the source ra and dec and scan start times, duration,
         # and number of scans. Then we test again by calling this function recursively.
         if visible_half.sum() < time_steps.size // 2:
-            redrawn_samp_ops = self.draw_sampling_opts(1)
-            self.samp_ops["src_ra"][i] = redrawn_samp_ops["src_ra"][0]
-            self.samp_ops["src_dec"][i] = redrawn_samp_ops["src_dec"][0]
-            self.samp_ops["start_time"][i] = redrawn_samp_ops["start_time"][0]
-            self.samp_ops["scan_duration"][i] = redrawn_samp_ops["scan_duration"][0]
-            self.samp_ops["num_scans"][i] = redrawn_samp_ops["num_scans"][0]
+            redrawn_samp_opts = self.draw_sampling_opts(1)
+            self.samp_opts["src_ra"][i] = redrawn_samp_opts["src_ra"][0]
+            self.samp_opts["src_dec"][i] = redrawn_samp_opts["src_dec"][0]
+            self.samp_opts["start_time"][i] = redrawn_samp_opts["start_time"][0]
+            self.samp_opts["scan_duration"][i] = redrawn_samp_opts["scan_duration"][0]
+            self.samp_opts["num_scans"][i] = redrawn_samp_opts["num_scans"][0]
 
             self.test_rand_opts(i)
 
@@ -509,11 +509,11 @@ def calc_time_steps(self, i: int) -> Time:
         pyvisgen.simulation.data_set.SimulateDataSet.test_rand_opts :
             Tests randomized sampling parameters.
         """
-        start_time = Time(self.samp_ops["start_time"][i].isoformat(), format="isot")
-        scan_separation = self.samp_ops_const["scan_separation"]
-        num_scans = self.samp_ops["num_scans"][i]
-        scan_duration = self.samp_ops["scan_duration"][i]
-        int_time = self.samp_ops_const["integration_time"]
+        start_time = Time(self.samp_opts["start_time"][i].isoformat(), format="isot")
+        scan_separation = self.samp_opts_const["scan_separation"]
+        num_scans = self.samp_opts["num_scans"][i]
+        scan_duration = self.samp_opts["scan_duration"][i]
+        int_time = self.samp_opts_const["integration_time"]
 
         time_lst = [
             start_time
@@ -529,3 +529,51 @@ def calc_time_steps(self, i: int) -> Time:
         time = Time(time_lst)
 
         return time
+
+    @classmethod
+    def _get_obs_test(
+        cls,
+        config: str | Path,
+        /,
+        image_key: str = "y",
+        *,
+        date_fmt: str = DATEFMT,
+    ) -> tuple:  # pragma: no cover
+        """Creates an :class:`~pyvisgen.simulation.Observation` class
+        object for tests.
+
+        Parameters
+        ----------
+        config : str or Path
+            Path to the config file.
+        image_key : str, optional
+            Key under which the true sky distributions are saved
+            in the HDF5 file. Default: ``'y'``
+        date_fmt : str, optional
+            Format string for datetime objects.
+            Default: ``'%d-%m-%Y %H:%M:%S'``
+
+        Returns
+        -------
+        self : SimulateDataSet
+            Class object.
+        obs : :class:`~pyvisgen.simulation.Observation`
+            :class:`~pyvisgen.simulation.Observation` class object.
+        """
+        cls = cls()
+        cls.key = image_key
+        cls.date_fmt = date_fmt
+        cls.multiprocess = 1
+
+        if isinstance(config, (str, Path)):
+            cls.conf = read_data_set_conf(config)
+        elif isinstance(config, dict):
+            cls.conf = config
+        else:
+            raise ValueError("Expected config to be one of str, Path or dict!")
+
+        cls.data_paths = load_bundles(cls.conf["in_path"])[0]
+        cls.create_sampling_rc(1)
+        obs = cls.create_observation(0)
+
+        return cls, obs

From 889cbf9e9b04911a18cf7bac3fa9a146c38f57a8 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 18:59:31 +0100
Subject: [PATCH 23/34] Update and fix tests

---
 tests/test_conf.toml     |  11 +-
 tests/test_simulation.py | 333 +++++++++++++++++++--------------------
 tests/test_utils.py      |   9 +-
 3 files changed, 184 insertions(+), 169 deletions(-)

diff --git a/tests/test_conf.toml b/tests/test_conf.toml
index b9b0e36..7502c5b 100644
--- a/tests/test_conf.toml
+++ b/tests/test_conf.toml
@@ -19,8 +19,17 @@ noisy = 380
 corrupted = true
 sensitivity_cut = 1e-6
 
+[polarisation_options]
+mode = "none"  # linear, circular, or "none"
+delta = 45  # phase angle 
+amp_ratio = 0.5  # polarisation amplitude ratio
+field_order = [0.1, 0.1]  # (x, y) orders of the random polarisation field
+field_scale = [0, 1]  # scaling of the intensity of the polarisation field
+field_threshold = "none"
+
 [bundle_options]
-in_path = "./tests/data"
+file_prefix=""
+in_path = "/home/knierim/work/pyvisgen/tests/data"
 out_path_fits = "./tests/build/fits"
 out_path_gridded = "./tests/build/gridded"
 num_test_images = 1000
diff --git a/tests/test_simulation.py b/tests/test_simulation.py
index 066eb71..05a8729 100644
--- a/tests/test_simulation.py
+++ b/tests/test_simulation.py
@@ -6,8 +6,10 @@
 from pyvisgen.utils.config import read_data_set_conf
 
 torch.manual_seed(1)
-config = "tests/test_conf.toml"
-conf = read_data_set_conf(config)
+
+CONFIG = "tests/test_conf.toml"
+
+conf = read_data_set_conf(CONFIG)
 out_path = Path(conf["out_path_fits"])
 out_path.mkdir(parents=True, exist_ok=True)
 
@@ -19,168 +21,162 @@ def test_get_data():
     assert len(data) > 0
 
 
-def test_create_sampling_rc():
-    from pyvisgen.simulation.data_set import create_sampling_rc, test_opts
+class TestSimulateDataSet:
+    """Unit test class for :class:``pyvisgen.simulation.SimulateDataSet``."""
+
+    def setup_class(self):
+        """Set up common objects and variables for the following tests."""
+        from pyvisgen.simulation.data_set import SimulateDataSet
+
+        self.s = SimulateDataSet
+
+    def test_run_no_slurm(self):
+        self.s.from_config(CONFIG)
 
-    samp_ops = create_sampling_rc(conf)
-    assert len(samp_ops) == 17
+    def test_run_no_slurm_multiprocess(self):
+        self.s.from_config(CONFIG, multiprocess="all")
 
-    test_opts(samp_ops)
+    def test_run_no_slurm_num_images(self):
+        self.s.from_config(CONFIG, num_images=50)
 
 
-def test_create_sampling_rc_no_seed():
-    from pyvisgen.simulation.data_set import create_sampling_rc, test_opts
+class TestVisLoop:
+    """Unit test class for :func:``pyvisgen.simulation.vis_loop``."""
 
-    mod_conf = conf.copy()
-    mod_conf["seed"] = None
-
-    samp_ops = create_sampling_rc(mod_conf)
-    assert len(samp_ops) == 17
-
-    test_opts(samp_ops)
-
-
-def test_vis_loop():
-    import torch
-
-    import pyvisgen.fits.writer as writer
-    from pyvisgen.simulation.data_set import create_observation
-    from pyvisgen.simulation.visibility import vis_loop
-    from pyvisgen.utils.data import load_bundles, open_bundles
-
-    bundles = load_bundles(conf["in_path"])
-    obs = create_observation(conf)
-    # num_active_telescopes = test_opts(samp_ops)
-    data = open_bundles(bundles[0])
-    SI = torch.tensor(data[0])[None]
-    vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"])
-
-    assert (vis_data[0].V_11[0]).dtype == torch.complex128
-    assert (vis_data[0].V_22[0]).dtype == torch.complex128
-    assert (vis_data[0].V_12[0]).dtype == torch.complex128
-    assert (vis_data[0].V_21[0]).dtype == torch.complex128
-    assert (vis_data[0].num).dtype == torch.float64
-    assert (vis_data[0].base_num).dtype == torch.float64
-    assert torch.is_tensor(vis_data[0].u)
-    assert torch.is_tensor(vis_data[0].v)
-    assert torch.is_tensor(vis_data[0].w)
-    assert (vis_data[0].date).dtype == torch.float64
-
-    # test num vis for time step 0
-    # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2
-    # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0]
-    # dunno what's going on here
-    # assert num_vis_theory == num_vis_calc
-    #
-
-    out_path = Path(conf["out_path_fits"])
-    out = out_path / Path("vis_0.fits")
-    hdu_list = writer.create_hdu_list(vis_data, obs)
-    hdu_list.writeto(out, overwrite=True)
-
-
-def test_vis_loop_grid():
-    import torch
-
-    import pyvisgen.fits.writer as writer
-    from pyvisgen.simulation.data_set import create_observation
-    from pyvisgen.simulation.visibility import vis_loop
-    from pyvisgen.utils.data import load_bundles, open_bundles
-
-    bundles = load_bundles(conf["in_path"])
-    obs = create_observation(conf)
-    # num_active_telescopes = test_opts(samp_ops)
-    data = open_bundles(bundles[0])
-    SI = torch.tensor(data[0])[None]
-    vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode="grid")
-
-    assert (vis_data[0].V_11[0]).dtype == torch.complex128
-    assert (vis_data[0].V_22[0]).dtype == torch.complex128
-    assert (vis_data[0].V_12[0]).dtype == torch.complex128
-    assert (vis_data[0].V_21[0]).dtype == torch.complex128
-    assert (vis_data[0].num).dtype == torch.float64
-    assert (vis_data[0].base_num).dtype == torch.float64
-    assert torch.is_tensor(vis_data[0].u)
-    assert torch.is_tensor(vis_data[0].v)
-    assert torch.is_tensor(vis_data[0].w)
-    assert (vis_data[0].date).dtype == torch.float64
-
-    # test num vis for time step 0
-    # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2
-    # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0]
-    # dunno what's going on here
-    # assert num_vis_theory == num_vis_calc
-    #
-
-    out_path = Path(conf["out_path_fits"])
-    out = out_path / Path("vis_0.fits")
-    hdu_list = writer.create_hdu_list(vis_data, obs)
-    hdu_list.writeto(out, overwrite=True)
-
-
-def test_vis_loop_batch_size_auto():
-    import torch
-
-    from pyvisgen.simulation.data_set import create_observation
-    from pyvisgen.simulation.visibility import vis_loop
-    from pyvisgen.utils.data import load_bundles, open_bundles
-
-    bundles = load_bundles(conf["in_path"])
-    obs = create_observation(conf)
-    data = open_bundles(bundles[0])
-    SI = torch.tensor(data[0])[None]
-
-    vis_data = vis_loop(
-        obs,
-        SI,
-        noisy=conf["noisy"],
-        mode=conf["mode"],
-        batch_size="auto",
-    )
-
-    assert (vis_data[0].V_11[0]).dtype == torch.complex128
-    assert (vis_data[0].V_22[0]).dtype == torch.complex128
-    assert (vis_data[0].V_12[0]).dtype == torch.complex128
-    assert (vis_data[0].V_21[0]).dtype == torch.complex128
-    assert (vis_data[0].num).dtype == torch.float64
-    assert (vis_data[0].base_num).dtype == torch.float64
-    assert torch.is_tensor(vis_data[0].u)
-    assert torch.is_tensor(vis_data[0].v)
-    assert torch.is_tensor(vis_data[0].w)
-    assert (vis_data[0].date).dtype == torch.float64
-
-
-def test_vis_loop_batch_size_invalid():
-    import torch
-
-    from pyvisgen.simulation.data_set import create_observation
-    from pyvisgen.simulation.visibility import vis_loop
-    from pyvisgen.utils.data import load_bundles, open_bundles
-
-    bundles = load_bundles(conf["in_path"])
-    obs = create_observation(conf)
-    data = open_bundles(bundles[0])
-    SI = torch.tensor(data[0])[None]
-
-    assert_raises(
-        ValueError,
-        vis_loop,
-        obs,
-        SI,
-        noisy=conf["noisy"],
-        mode=conf["mode"],
-        batch_size="abc",
-    )
-
-    assert_raises(
-        ValueError,
-        vis_loop,
-        obs,
-        SI,
-        noisy=conf["noisy"],
-        mode=conf["mode"],
-        batch_size=20.0,
-    )
+    def setup_class(self):
+        """Set up common objects and variables for the following tests."""
+        from pyvisgen.simulation.data_set import SimulateDataSet
+
+        self.s = SimulateDataSet
+
+    def test_vis_loop(self):
+        import pyvisgen.fits.writer as writer
+        from pyvisgen.simulation.visibility import vis_loop
+        from pyvisgen.utils.data import load_bundles, open_bundles
+
+        bundles = load_bundles(conf["in_path"])
+        _, obs = self.s._get_obs_test(CONFIG)
+
+        data = open_bundles(bundles[0])
+        SI = torch.tensor(data[0])[None]
+        vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode=conf["mode"])
+
+        assert (vis_data[0].V_11[0]).dtype == torch.complex128
+        assert (vis_data[0].V_22[0]).dtype == torch.complex128
+        assert (vis_data[0].V_12[0]).dtype == torch.complex128
+        assert (vis_data[0].V_21[0]).dtype == torch.complex128
+        assert (vis_data[0].num).dtype == torch.float64
+        assert (vis_data[0].base_num).dtype == torch.float64
+        assert torch.is_tensor(vis_data[0].u)
+        assert torch.is_tensor(vis_data[0].v)
+        assert torch.is_tensor(vis_data[0].w)
+        assert (vis_data[0].date).dtype == torch.float64
+
+        # test num vis for time step 0
+        # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2
+        # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0]
+        # dunno what's going on here
+        # assert num_vis_theory == num_vis_calc
+        #
+
+        out_path = Path(conf["out_path_fits"])
+        out = out_path / Path("vis_0.fits")
+        hdu_list = writer.create_hdu_list(vis_data, obs)
+        hdu_list.writeto(out, overwrite=True)
+
+    def test_vis_loop_grid(self):
+        import pyvisgen.fits.writer as writer
+        from pyvisgen.simulation.visibility import vis_loop
+        from pyvisgen.utils.data import load_bundles, open_bundles
+
+        bundles = load_bundles(conf["in_path"])
+        _, obs = self.s._get_obs_test(CONFIG)
+
+        data = open_bundles(bundles[0])
+        SI = torch.tensor(data[0])[None]
+        vis_data = vis_loop(obs, SI, noisy=conf["noisy"], mode="grid")
+
+        assert (vis_data[0].V_11[0]).dtype == torch.complex128
+        assert (vis_data[0].V_22[0]).dtype == torch.complex128
+        assert (vis_data[0].V_12[0]).dtype == torch.complex128
+        assert (vis_data[0].V_21[0]).dtype == torch.complex128
+        assert (vis_data[0].num).dtype == torch.float64
+        assert (vis_data[0].base_num).dtype == torch.float64
+        assert torch.is_tensor(vis_data[0].u)
+        assert torch.is_tensor(vis_data[0].v)
+        assert torch.is_tensor(vis_data[0].w)
+        assert (vis_data[0].date).dtype == torch.float64
+
+        # test num vis for time step 0
+        # num_vis_theory = num_active_telescopes * (num_active_telescopes - 1) / 2
+        # num_vis_calc = vis_data.base_num[vis_data.date == vis_data.date[0]].shape[0]
+        # dunno what's going on here
+        # assert num_vis_theory == num_vis_calc
+        #
+
+        out_path = Path(conf["out_path_fits"])
+        out = out_path / Path("vis_0.fits")
+        hdu_list = writer.create_hdu_list(vis_data, obs)
+        hdu_list.writeto(out, overwrite=True)
+
+    def test_vis_loop_batch_size_auto(self):
+        from pyvisgen.simulation.visibility import vis_loop
+        from pyvisgen.utils.data import load_bundles, open_bundles
+
+        bundles = load_bundles(conf["in_path"])
+        _, obs = self.s._get_obs_test(CONFIG)
+
+        data = open_bundles(bundles[0])
+        SI = torch.tensor(data[0])[None]
+
+        vis_data = vis_loop(
+            obs,
+            SI,
+            noisy=conf["noisy"],
+            mode=conf["mode"],
+            batch_size="auto",
+        )
+
+        assert (vis_data[0].V_11[0]).dtype == torch.complex128
+        assert (vis_data[0].V_22[0]).dtype == torch.complex128
+        assert (vis_data[0].V_12[0]).dtype == torch.complex128
+        assert (vis_data[0].V_21[0]).dtype == torch.complex128
+        assert (vis_data[0].num).dtype == torch.float64
+        assert (vis_data[0].base_num).dtype == torch.float64
+        assert torch.is_tensor(vis_data[0].u)
+        assert torch.is_tensor(vis_data[0].v)
+        assert torch.is_tensor(vis_data[0].w)
+        assert (vis_data[0].date).dtype == torch.float64
+
+    def test_vis_loop_batch_size_invalid(self):
+        from pyvisgen.simulation.visibility import vis_loop
+        from pyvisgen.utils.data import load_bundles, open_bundles
+
+        bundles = load_bundles(conf["in_path"])
+        _, obs = self.s._get_obs_test(CONFIG)
+
+        data = open_bundles(bundles[0])
+        SI = torch.tensor(data[0])[None]
+
+        assert_raises(
+            ValueError,
+            vis_loop,
+            obs,
+            SI,
+            noisy=conf["noisy"],
+            mode=conf["mode"],
+            batch_size="abc",
+        )
+
+        assert_raises(
+            ValueError,
+            vis_loop,
+            obs,
+            SI,
+            noisy=conf["noisy"],
+            mode=conf["mode"],
+            batch_size=20.0,
+        )
 
 
 class TestPolarisation:
@@ -188,10 +184,19 @@ class TestPolarisation:
 
     def setup_class(self):
         """Set up common objects and variables for the following tests."""
-        from pyvisgen.simulation.data_set import create_observation
+        from pyvisgen.simulation.data_set import SimulateDataSet
+        from pyvisgen.simulation.observation import (
+            DEFAULT_FIELD_KWARGS,
+            DEFAULT_POL_KWARGS,
+        )
         from pyvisgen.simulation.visibility import Polarisation
 
-        self.obs = create_observation(conf)
+        _, self.obs = SimulateDataSet._get_obs_test(conf)
+
+        # set to default kwargs for tests, otherwise
+        # some parameters of the config would be None
+        self.obs.field_kwargs = DEFAULT_FIELD_KWARGS
+        self.obs.pol_kwargs = DEFAULT_POL_KWARGS
 
         self.SI = torch.zeros((100, 100))
         self.SI[25::25, 25::25] = 1
@@ -408,9 +413,3 @@ def test_polarisation_field_threshold(self):
 
         # expected to raise an AssertionError
         assert_raises(AssertionError, assert_array_equal, pf, pf_ref)
-
-
-def test_simulate_data_set_no_slurm():
-    from pyvisgen.simulation.data_set import simulate_data_set
-
-    simulate_data_set(config)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 35131a6..e47f966 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,7 +3,7 @@ def test_read_config():
 
     conf = read_data_set_conf("config/default_data_set.toml")
 
-    assert type(conf) == dict
+    assert type(conf) is dict
     assert list(conf.keys()) == [
         "mode",
         "device",
@@ -24,6 +24,12 @@ def test_read_config():
         "corrupted",
         "noisy",
         "sensitivty_cut",
+        "polarisation",
+        "pol_delta",
+        "pol_amp_ratio",
+        "field_order",
+        "field_scale",
+        "field_threshold",
         "num_test_images",
         "bundle_size",
         "train_valid_split",
@@ -33,6 +39,7 @@ def test_read_config():
         "in_path",
         "out_path_fits",
         "out_path_gridded",
+        "file_prefix",
     ]
 
 

From ba0ad293da86429c9ffcd97c4cb43d2f2b8bcef6 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 19:00:07 +0100
Subject: [PATCH 24/34] Update changelog

---
 docs/changes/53.maintenance.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/changes/53.maintenance.rst b/docs/changes/53.maintenance.rst
index 290b8a4..bf99a92 100644
--- a/docs/changes/53.maintenance.rst
+++ b/docs/changes/53.maintenance.rst
@@ -1,6 +1,6 @@
 - Complete rewrite of dataset creation routine ``pyvisgen.simulation.data_set.SimulateDataSet``
   - Accessible using a classmethod to load a config file
-  - Add optional multiprocessing support
+  - Add optional multithreading support
   - Draw and fully test parameters before simulation loop. Previously this was done in the loop and tests were only performed for two time steps
   - Support for polarization
 - Add new default config file for new dataset creation routine
@@ -8,3 +8,4 @@
 - Allow passing HDF5 key in ``pyvisgen.utils.data.open_bundles``
 - Restructure ``pyvisgen.gridding`` module by adding a ``utils`` submodule that contains all utility functions that previously were in the ``gridder`` submodule
   - Also fix parts of the utility functions
+- Update and fix tests

From 482d2c2550631612cb5512b943fb55f1d1106e47 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 19:06:28 +0100
Subject: [PATCH 25/34] Add joblib dependency

---
 environment.yml | 1 +
 pyproject.toml  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/environment.yml b/environment.yml
index 05effcf..babb153 100644
--- a/environment.yml
+++ b/environment.yml
@@ -13,6 +13,7 @@ dependencies:
   - pip
   - towncrier
   - jupyter
+  - joblib
   - pytest
   - pytest-cov
   - pytest-runner
diff --git a/pyproject.toml b/pyproject.toml
index 534f4d1..ade495a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,7 @@ dependencies = [
   "click",
   "h5py",
   "ipython",
+  "joblib",
   "jupyter",
   "matplotlib",
   "natsort",

From 89c5a20e562e2df3fab9413c28aedd801ae2f63a Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 28 Feb 2025 19:11:46 +0100
Subject: [PATCH 26/34] Fix test_conf.toml

---
 tests/test_conf.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_conf.toml b/tests/test_conf.toml
index 7502c5b..1cff0e6 100644
--- a/tests/test_conf.toml
+++ b/tests/test_conf.toml
@@ -29,7 +29,7 @@ field_threshold = "none"
 
 [bundle_options]
 file_prefix=""
-in_path = "/home/knierim/work/pyvisgen/tests/data"
+in_path = "./tests/data"
 out_path_fits = "./tests/build/fits"
 out_path_gridded = "./tests/build/gridded"
 num_test_images = 1000

From 50d9612507e713fe8edd28c581c6cabcb8e21020 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Mon, 3 Mar 2025 08:41:18 +0100
Subject: [PATCH 27/34] Allow dicts as config inputs

---
 pyvisgen/simulation/data_set.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index 9d500b7..b161ca5 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -33,7 +33,7 @@ def __init__(self):
     @classmethod
     def from_config(
         cls,
-        config: str | Path,
+        config: str | Path | dict,
         /,
         image_key: str = "y",
         *,
@@ -49,8 +49,9 @@ def from_config(
 
         Parameters
         ----------
-        config : str or Path
-            Path to the config file.
+        config : str or Path or dict
+            Path to the config file or dict containing the configuration
+            parameters.
         image_key : str, optional
             Key under which the true sky distributions are saved
             in the HDF5 file. Default: ``'y'``
@@ -88,7 +89,12 @@ def from_config(
         if multiprocess in ["all"]:
             cls.multiprocess = -1
 
-        cls.conf = read_data_set_conf(config)
+        if isinstance(config, (str, Path)):
+            cls.conf = read_data_set_conf(config)
+        elif isinstance(config, dict):
+            cls.conf = config
+        else:
+            raise ValueError("Expected config to be one of str, Path or dict!")
 
         print("Simulation Config:\n", cls.conf)
 

From 5625b3e02579cec6be8359519a51ad827039c097 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Mon, 3 Mar 2025 08:41:48 +0100
Subject: [PATCH 28/34] Add amp/phase test for dataset creation

---
 tests/test_simulation.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/test_simulation.py b/tests/test_simulation.py
index 05a8729..759c5b9 100644
--- a/tests/test_simulation.py
+++ b/tests/test_simulation.py
@@ -39,6 +39,11 @@ def test_run_no_slurm_multiprocess(self):
     def test_run_no_slurm_num_images(self):
         self.s.from_config(CONFIG, num_images=50)
 
+    def test_run_no_slurm_amp_phase_false(self):
+        config = conf
+        config["amp_phase"] = False
+        self.s.from_config(config, multiprocess="all")
+
 
 class TestVisLoop:
     """Unit test class for :func:``pyvisgen.simulation.vis_loop``."""

From 35532d74873bc63c8fee65391230e552d2b35616 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Mon, 3 Mar 2025 21:49:53 +0100
Subject: [PATCH 29/34] Update tests, don't cover slurm option

---
 pyvisgen/simulation/data_set.py |  4 ++--
 tests/test_simulation.py        | 20 ++++++++++++++++++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index b161ca5..1d267d5 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -121,7 +121,7 @@ def from_config(
                 [len(cls.get_images(bundle)) for bundle in data_bundles]
             )
 
-        if slurm:
+        if slurm:  # pragma: no cover
             cls._run_slurm()
         else:
             # draw parameters beforehand, i.e. outside the simulation loop
@@ -209,7 +209,7 @@ def _run(self) -> None:
             f"to '{path_msg}'!"
         )
 
-    def _run_slurm(self) -> None:
+    def _run_slurm(self) -> None:  # pragma: no cover
         """Runs the simulation in slurm and saves visibility data
         as individual FITS files.
         """
diff --git a/tests/test_simulation.py b/tests/test_simulation.py
index 759c5b9..56cba15 100644
--- a/tests/test_simulation.py
+++ b/tests/test_simulation.py
@@ -40,9 +40,25 @@ def test_run_no_slurm_num_images(self):
         self.s.from_config(CONFIG, num_images=50)
 
     def test_run_no_slurm_amp_phase_false(self):
-        config = conf
+        config = conf.copy()
         config["amp_phase"] = False
-        self.s.from_config(config, multiprocess="all")
+        self.s.from_config(config)
+
+    def test_raise_valerr_conf_path(self):
+        assert_raises(ValueError, self.s.from_config, 42)
+
+    def test_run_dense(self):
+        config = conf.copy()
+        config["mode"] = "dense"
+        assert_raises(ValueError, self.s.from_config, config)
+
+    def test_run_polarisation(self):
+        config = conf.copy()
+        config["polarisation"] = "linear"
+        self.s.from_config(config)
+
+    def test_run_no_gridding(self):
+        self.s.from_config(CONFIG, grid=False)
 
 
 class TestVisLoop:

From b9106e9f6b2f96d7c5e16f5a2f5c8995547d6d2b Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Mon, 3 Mar 2025 22:26:48 +0100
Subject: [PATCH 30/34] Fix feed rot computation shapes

---
 pyvisgen/simulation/scan.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py
index f564f6e..0c7f4d2 100644
--- a/pyvisgen/simulation/scan.py
+++ b/pyvisgen/simulation/scan.py
@@ -152,8 +152,8 @@ def calc_feed_rotation(
     X2 : :func:`~torch.tensor`
         Fourier kernel with the applied feed rotation.
     """
-    q1 = torch.cat((bas[11], bas[12]))[..., None]
-    q2 = torch.cat((bas[14], bas[15]))[..., None]
+    q1 = bas[13][..., None]
+    q2 = bas[16][..., None]
 
     if polarisation == "linear":
         X1[..., 0, 0] *= torch.cos(q1)

From 29b3d318e7db6abd60405ba94746896d424b6e0d Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Tue, 11 Mar 2025 14:15:14 +0100
Subject: [PATCH 31/34] Capture empty bundles

---
 pyvisgen/simulation/data_set.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index 1d267d5..ee75743 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -121,6 +121,12 @@ def from_config(
                 [len(cls.get_images(bundle)) for bundle in data_bundles]
             )
 
+        if isinstance(cls.num_images, (int, float)):
+            if int(cls.num_images) == 0:
+                raise ValueError(
+                    "No images found in bundles! Please check your input path!"
+                )
+
         if slurm:  # pragma: no cover
             cls._run_slurm()
         else:

From 160aead8db179689faaf9719048d3c4515074f44 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 14 Mar 2025 09:37:59 +0100
Subject: [PATCH 32/34] Optimize random parameter sampling and testing speed

---
 pyproject.toml                  |   1 +
 pyvisgen/simulation/data_set.py | 201 +++++++++++++++++++++++---------
 2 files changed, 144 insertions(+), 58 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ade495a..e5f154c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
   "natsort",
   "numpy",
   "pandas",
+  "rich",
   "scipy",
   "toma",
   "toml",
diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index ee75743..3e62310 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -4,9 +4,9 @@
 import numpy as np
 import torch
 from astropy import units as un
-from astropy.coordinates import AltAz, EarthLocation, SkyCoord
 from astropy.time import Time
 from joblib import Parallel, delayed
+from rich import print
 from tqdm.autonotebook import tqdm
 
 import pyvisgen.fits.writer as writer
@@ -25,6 +25,15 @@
 
 DATEFMT = "%d-%m-%Y %H:%M:%S"
 
+JD_EPOCH = Time("J2000.0").jd  # Reference epoch (J2000.0)
+DAYS_PER_CENTURY = 36525.0  # Number of days in a Julian century
+GST_COEFFS = {
+    "const": 280.4606,
+    "linear": 360.985647366,
+    "quadratic": 0.000387933,
+    "cubic": -2.583e-8,
+}
+
 
 class SimulateDataSet:
     def __init__(self):
@@ -44,6 +53,7 @@ def from_config(
         date_fmt: str = DATEFMT,
         num_images: int | None = None,
         multiprocess: int | str = 1,
+        device="cuda" if torch.cuda.is_available() else "cpu",
     ):
         """Simulates data from parameters in a config file.
 
@@ -75,6 +85,9 @@ def from_config(
             Number of jobs to use in multiprocessing during the
             sampling and testing phase. If -1 or ``'all'``,
             use all available cores. Default: 1
+        device : str, optional
+            Device to allocate :func:`~torch.tensor`s on.
+            Default: ``'cuda'`` if cuda is available, otherwise ``'cpu'``
         """
         cls = cls()
         cls.key = image_key
@@ -85,6 +98,7 @@ def from_config(
         cls.date_fmt = date_fmt
         cls.num_images = num_images
         cls.multiprocess = multiprocess
+        cls.device = device
 
         if multiprocess in ["all"]:
             cls.multiprocess = -1
@@ -129,6 +143,7 @@ def from_config(
 
         if slurm:  # pragma: no cover
             cls._run_slurm()
+            pass
         else:
             # draw parameters beforehand, i.e. outside the simulation loop
             cls.create_sampling_rc(cls.num_images)
@@ -295,8 +310,8 @@ def create_observation(self, i: int) -> Observation:
 
         obs = Observation(
             **self.samp_opts_const,
-            src_ra=rc["src_ra"][i],
-            src_dec=rc["src_dec"][i],
+            src_ra=rc["src_ra"][i].cpu().numpy(),
+            src_dec=rc["src_dec"][i].cpu().numpy(),
             start_time=rc["start_time"][i],
             scan_duration=int(rc["scan_duration"][i]),
             num_scans=int(rc["num_scans"][i]),
@@ -351,14 +366,20 @@ def create_sampling_rc(self, size: int) -> None:
         self.samp_opts = self.draw_sampling_opts(size)
 
         test_idx = tqdm(
-            range(self.samp_opts["src_ra"].size),
+            range(self.samp_opts["src_ra"].size()[0]),
             position=0,
             desc="Pre-drawing and testing sample parameters",
             colour="#00c1a2",
             leave=False,
         )
 
+        # get array for later use and also get lon/lat conversion
         self.array = layouts.get_array_layout(self.samp_opts_const["array_layout"])
+        self.array_lat, self.array_lon = self._geocentric_to_spherical(
+            self.array.x.to(self.device),
+            self.array.y.to(self.device),
+            self.array.z.to(self.device),
+        )
 
         Parallel(n_jobs=self.multiprocess, backend="threading")(
             delayed(self.test_rand_opts)(i) for i in test_idx
@@ -409,7 +430,7 @@ def draw_sampling_opts(self, size: int) -> dict:
         # if polarisation is None, we don't need to enter the
         # conditional below, so we set delta, amp_ratio, field_order,
         # and field_scale to None.
-        delta, amp_ratio, field_order, field_scale = np.full((4, size), None)
+        delta, amp_ratio, field_order, field_scale = np.full((4, size), np.NaN)
 
         if self.conf["polarisation"]:
             if self.conf["pol_delta"]:
@@ -437,15 +458,15 @@ def draw_sampling_opts(self, size: int) -> dict:
                 field_scale = np.stack((a, b), axis=1)
 
         samp_opts = dict(
-            src_ra=ra,
-            src_dec=dec,
+            src_ra=torch.from_numpy(ra).to(self.device),
+            src_dec=torch.from_numpy(dec).to(self.device),
             start_time=scan_start,
-            scan_duration=scan_duration,
-            num_scans=num_scans,
-            delta=delta,
-            amp_ratio=amp_ratio,
-            order=field_order,
-            scale=field_scale,
+            scan_duration=torch.from_numpy(scan_duration).to(self.device),
+            num_scans=torch.from_numpy(num_scans).to(self.device),
+            delta=torch.from_numpy(delta).to(self.device),
+            amp_ratio=torch.from_numpy(amp_ratio).to(self.device),
+            order=torch.from_numpy(field_order).to(self.device),
+            scale=torch.from_numpy(field_scale).to(self.device),
             threshold=self.conf["field_threshold"],
         )
         # NOTE: We don't need to draw random values for threshold
@@ -468,44 +489,48 @@ def test_rand_opts(self, i: int) -> None:
         i : int
             Index of the current set of sampling parameters.
         """
-        time_steps = self.calc_time_steps(i)
-        src_crd = SkyCoord(
-            self.samp_opts["src_ra"][i], self.samp_opts["src_dec"][i], unit=un.deg
-        )
+        # Loop until a valid observation is found
+        while True:
+            time_steps = self.calc_time_steps(i)
+            ra = self.samp_opts["src_ra"][i]
+            dec = self.samp_opts["src_dec"][i]
+
+            # calculate Greenwich sidereal time
+            jd = Time(time_steps).jd
+
+            jd_diff = jd - JD_EPOCH
+            T = jd_diff / DAYS_PER_CENTURY
+            gst = (
+                GST_COEFFS["const"]
+                + GST_COEFFS["linear"] * jd_diff
+                + T * (GST_COEFFS["quadratic"] + T * GST_COEFFS["cubic"])
+            )
+            gst = gst % 360
 
-        total_stations = len(self.array.st_num)
+            # Compute local sidereal time
+            lst = (gst[:, np.newaxis] + self.array_lon.cpu().numpy()) % 360
+            lst = torch.tensor(lst, device=self.device)
 
-        locations = EarthLocation.from_geocentric(
-            x=self.array.x,
-            y=self.array.y,
-            z=self.array.z,
-            unit=un.m,
-        )
+            alt = self._compute_altitude(ra, dec, lst)
 
-        altaz_frames = AltAz(obstime=time_steps[:, None], location=locations[None])
-        src_alt = src_crd.transform_to(altaz_frames).alt.degree
-
-        # check which stations can see the source for each time step
-        visible = np.logical_and(
-            self.array.el_low.numpy() <= src_alt, src_alt <= self.array.el_high.numpy()
-        )
+            # Check visibility
+            visible = torch.logical_and(
+                self.array.el_low.to(self.device) <= alt,
+                alt <= self.array.el_high.to(self.device),
+            )
+            visible_count_per_t = visible.sum(dim=1)
+            visible_half = visible_count_per_t > len(self.array.st_num) // 2
 
-        # We want at least half of the telescopes to see the source
-        visible_count_per_t = visible.sum(axis=1)
-        visible_half = visible_count_per_t > total_stations // 2
+            # Exit the loop if the condition is met
+            if visible_half.sum().item() >= time_steps.size // 2:
+                break
 
-        # If the source is not seen by half the telescopes half of the observation time,
-        # we redraw the source ra and dec and scan start times, duration,
-        # and number of scans. Then we test again by calling this function recursively.
-        if visible_half.sum() < time_steps.size // 2:
+            # Redraw sampling parameters if the condition is not met
             redrawn_samp_opts = self.draw_sampling_opts(1)
-            self.samp_opts["src_ra"][i] = redrawn_samp_opts["src_ra"][0]
-            self.samp_opts["src_dec"][i] = redrawn_samp_opts["src_dec"][0]
-            self.samp_opts["start_time"][i] = redrawn_samp_opts["start_time"][0]
-            self.samp_opts["scan_duration"][i] = redrawn_samp_opts["scan_duration"][0]
-            self.samp_opts["num_scans"][i] = redrawn_samp_opts["num_scans"][0]
+            keys = ["src_ra", "src_dec", "start_time", "scan_duration", "num_scans"]
 
-            self.test_rand_opts(i)
+            for key in keys:
+                self.samp_opts[key][i] = redrawn_samp_opts[key][0]
 
     def calc_time_steps(self, i: int) -> Time:
         """Calculates time steps for given sampling
@@ -516,31 +541,91 @@ def calc_time_steps(self, i: int) -> Time:
         i : int
             Index of the current set of sampling parameters.
 
+        Returns
+        -------
+        time_steps : :class:`~astropy.time.Time`
+            Observation time steps.
+
         See Also
         --------
         pyvisgen.simulation.data_set.SimulateDataSet.test_rand_opts :
             Tests randomized sampling parameters.
         """
         start_time = Time(self.samp_opts["start_time"][i].isoformat(), format="isot")
-        scan_separation = self.samp_opts_const["scan_separation"]
+
         num_scans = self.samp_opts["num_scans"][i]
+        scan_separation = self.samp_opts_const["scan_separation"]
         scan_duration = self.samp_opts["scan_duration"][i]
+
         int_time = self.samp_opts_const["integration_time"]
 
-        time_lst = [
+        time_steps = (
             start_time
-            + scan_separation * i * un.second
-            + i * scan_duration * un.second
-            + j * int_time * un.second
-            for i in range(num_scans)
-            for j in range(int(scan_duration / int_time) + 1)
-        ]
-        # +1 because t_1 is the stop time of t_0.
-        # In order to save computing power we take
-        # one time more to complete interval
-        time = Time(time_lst)
-
-        return time
+            + torch.arange(num_scans)[:, None] * scan_separation * un.second
+            + torch.arange(int(scan_duration / int_time) + 1)[None, :]
+            * int_time
+            * un.second
+        ).flatten()
+
+        return Time(time_steps)
+
+    def _geocentric_to_spherical(
+        self, x: torch.tensor, y: torch.tensor, z: torch.tensor
+    ) -> torch.tensor:
+        """Convert geocentric coordinates to lon/lat.
+
+        Parameters
+        ----------
+        x, y, z : :func:`~torch.tensor`
+            Cartesian coordinates in the geocentric coordinate
+            system.
+
+        Returns
+        -------
+        lon, lat : :func:`~torch.tensor`
+            Longitude and latitude representation of the
+            geocentric coordinates.
+        """
+        r = torch.sqrt(x**2 + y**2 + z**2)
+        lat = torch.rad2deg(torch.arcsin(z / r))
+        lon = torch.rad2deg(torch.atan2(y, x))
+
+        return lat, lon
+
+    def _compute_altitude(
+        self, ra: torch.tensor, dec: torch.tensor, lst: torch.tensor
+    ) -> torch.tensor:
+        """Computes altitude for a given RA/DEC, and local sidereal time (LST).
+
+        Parameters
+        ----------
+        ra, dec : :func:`~torch.tensor`
+            Right ascension and declination of the source.
+        lst : :func:`~torch.tensor`
+            Local sidereal time of the source.
+
+        Returns
+        -------
+        alt_rad : :func:`~torch.tensor`
+            Altitude of the source.
+        """
+        ra_rad = torch.deg2rad(ra)
+        dec_rad = torch.deg2rad(dec)
+        lst_rad = torch.deg2rad(lst)
+        lat_rad = torch.deg2rad(self.array_lat)
+
+        ha_rad = lst_rad - ra_rad
+
+        # Compute altitude using spherical trigonometry
+        sin_alt = torch.sin(dec_rad) * torch.sin(lat_rad) + torch.cos(
+            dec_rad
+        ) * torch.cos(lat_rad) * torch.cos(ha_rad)
+
+        # limit sin_alt to (-1, 1) to ensure numerical stability
+        # in the arcsin below
+        sin_alt = torch.clamp(sin_alt, -1, 1)
+        alt_rad = torch.arcsin(sin_alt)
+        return torch.rad2deg(alt_rad)
 
     @classmethod
     def _get_obs_test(

From e18735d484110f04ea1bd0d79f13b9e916ea57b6 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 14 Mar 2025 09:54:17 +0100
Subject: [PATCH 33/34] Get device from config

---
 pyvisgen/simulation/data_set.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pyvisgen/simulation/data_set.py b/pyvisgen/simulation/data_set.py
index 3e62310..75fe88c 100644
--- a/pyvisgen/simulation/data_set.py
+++ b/pyvisgen/simulation/data_set.py
@@ -53,7 +53,6 @@ def from_config(
         date_fmt: str = DATEFMT,
         num_images: int | None = None,
         multiprocess: int | str = 1,
-        device="cuda" if torch.cuda.is_available() else "cpu",
     ):
         """Simulates data from parameters in a config file.
 
@@ -85,9 +84,6 @@ def from_config(
             Number of jobs to use in multiprocessing during the
             sampling and testing phase. If -1 or ``'all'``,
             use all available cores. Default: 1
-        device : str, optional
-            Device to allocate :func:`~torch.tensor`s on.
-            Default: ``'cuda'`` if cuda is available, otherwise ``'cpu'``
         """
         cls = cls()
         cls.key = image_key
@@ -98,7 +94,6 @@ def from_config(
         cls.date_fmt = date_fmt
         cls.num_images = num_images
         cls.multiprocess = multiprocess
-        cls.device = device
 
         if multiprocess in ["all"]:
             cls.multiprocess = -1
@@ -112,6 +107,8 @@ def from_config(
 
         print("Simulation Config:\n", cls.conf)
 
+        cls.device = cls.conf["device"]
+
         if grid:
             cls.out_path = Path(cls.conf["out_path_gridded"])
         else:

From 56cf15a9ab862ef62b7e1696a3a9b233b9bfbda7 Mon Sep 17 00:00:00 2001
From: Anno Knierim <anno.knierim@tu-dortmund.de>
Date: Fri, 14 Mar 2025 10:56:32 +0100
Subject: [PATCH 34/34] Fix tensor construction and cloning

---
 pyvisgen/simulation/scan.py       |  4 ++--
 pyvisgen/simulation/visibility.py | 25 ++++++++++++++-----------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/pyvisgen/simulation/scan.py b/pyvisgen/simulation/scan.py
index 0ba21f3..7816c4a 100644
--- a/pyvisgen/simulation/scan.py
+++ b/pyvisgen/simulation/scan.py
@@ -180,8 +180,8 @@ def calc_feed_rotation(
         xb[..., 1, 0] = X2[..., 1, 0] * torch.exp(1j * q2)
         xb[..., 1, 1] = X2[..., 1, 1] * torch.exp(-1j * q2)
 
-    X1 = xa.clone()
-    X2 = xb.clone()
+    X1 = xa.detach().clone()
+    X2 = xb.detach().clone()
 
     del xa, xb
 
diff --git a/pyvisgen/simulation/visibility.py b/pyvisgen/simulation/visibility.py
index 3d3e421..c89acc1 100644
--- a/pyvisgen/simulation/visibility.py
+++ b/pyvisgen/simulation/visibility.py
@@ -161,17 +161,20 @@ def __init__(
                 **field_kwargs,
             )
 
+            if isinstance(delta, float):
+                delta = torch.tensor(delta)
+
             self.delta = delta
 
             if amp_ratio and (amp_ratio >= 0):
                 ax2 = amp_ratio
             else:
-                ax2 = torch.rand(1)
+                ax2 = torch.rand(1).to(self.device)
 
             ay2 = 1 - ax2
 
-            self.ax2 = self.SI[..., 0].clone() * ax2
-            self.ay2 = self.SI[..., 0].clone() * ay2
+            self.ax2 = self.SI[..., 0].detach().clone().to(self.device) * ax2
+            self.ay2 = self.SI[..., 0].detach().clone().to(self.device) * ay2
         else:
             self.ax2 = self.SI[..., 0]
             self.ay2 = torch.zeros_like(self.ax2)
@@ -198,13 +201,13 @@ def linear(self) -> None:
             2
             * torch.sqrt(self.ax2)
             * torch.sqrt(self.ay2)
-            * torch.cos(torch.deg2rad(torch.tensor(self.delta)))
+            * torch.cos(torch.deg2rad(self.delta))
         )
         self.I[..., 3] = (
             -2
             * torch.sqrt(self.ax2)
             * torch.sqrt(self.ay2)
-            * torch.sin(torch.deg2rad(torch.tensor(self.delta)))
+            * torch.sin(torch.deg2rad(self.delta))
         )
 
     def circular(self) -> None:
@@ -225,13 +228,13 @@ def circular(self) -> None:
             2
             * torch.sqrt(self.ax2)
             * torch.sqrt(self.ay2)
-            * torch.cos(torch.deg2rad(torch.tensor(self.delta)))
+            * torch.cos(torch.deg2rad(self.delta))
         )
         self.I[..., 2] = (
             -2
             * torch.sqrt(self.ax2)
             * torch.sqrt(self.ay2)
-            * torch.sin(torch.deg2rad(torch.tensor(self.delta)))
+            * torch.sin(torch.deg2rad(self.delta))
         )
         self.I[..., 3] = self.ax2 - self.ay2
 
@@ -244,13 +247,13 @@ def dop(self) -> None:
         self.I[..., 2] *= self.polarisation_field
         self.I[..., 3] *= self.polarisation_field
 
-        dop_I = self.I[..., 0].real.clone()
+        dop_I = self.I[..., 0].real.detach().clone()
         dop_I[~mask] = float("nan")
-        dop_Q = self.I[..., 1].real.clone()
+        dop_Q = self.I[..., 1].real.detach().clone()
         dop_Q[~mask] = float("nan")
-        dop_U = self.I[..., 2].real.clone()
+        dop_U = self.I[..., 2].real.detach().clone()
         dop_U[~mask] = float("nan")
-        dop_V = self.I[..., 3].real.clone()
+        dop_V = self.I[..., 3].real.detach().clone()
         dop_V[~mask] = float("nan")
 
         self.lin_dop = torch.sqrt(dop_Q**2 + dop_U**2) / dop_I