Skip to content

WeatherXds: make STATION_POSITION required, remove antenna_name + station=>station_name #373

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/xradio/measurement_set/_utils/_msv2/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1214,13 +1214,13 @@ def get_observation_info(in_file, observation_id, intents):
)
# but before, keep the name-id arrays, we need them for the pointing and weather xds
ant_xds_name_ids = ant_xds["antenna_name"].set_xindex("antenna_id")
ant_xds_station_name_ids = ant_xds["station"].set_xindex("antenna_id")
ant_position_xds_with_ids = ant_xds["ANTENNA_POSITION"].set_xindex("antenna_id")
# No longer needed after converting to name.
ant_xds = ant_xds.drop_vars("antenna_id")

# Create weather_xds
start = time.time()
weather_xds = create_weather_xds(in_file, ant_xds_station_name_ids)
weather_xds = create_weather_xds(in_file, ant_position_xds_with_ids)
logger.debug("Time weather " + str(time.time() - start))

# Create pointing_xds
Expand Down Expand Up @@ -1425,7 +1425,7 @@ def antenna_ids_to_names(
"antenna_id",
"antenna_name",
"mount",
"station",
"station_name",
]
for unwanted_coord in unwanted_coords_from_ant_xds:
xds = xds.drop_vars(unwanted_coord)
Expand Down
10 changes: 5 additions & 5 deletions src/xradio/measurement_set/_utils/_msv2/create_antenna_xds.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def extract_antenna_info(

to_new_coords = {
"NAME": ["antenna_name", ["antenna_name"]],
"STATION": ["station", ["antenna_name"]],
"STATION": ["station_name", ["antenna_name"]],
"MOUNT": ["mount", ["antenna_name"]],
# "PHASED_ARRAY_ID": ["phased_array_id", ["antenna_name"]],
"antenna_id": ["antenna_id", ["antenna_name"]],
Expand Down Expand Up @@ -158,9 +158,9 @@ def extract_antenna_info(

# None of the native numpy functions work on the github test runner.
antenna_name = ant_xds["antenna_name"].values
station = ant_xds["station"].values
station_name = ant_xds["station_name"].values
antenna_name = np.array(
list(map(lambda x, y: x + "_" + y, antenna_name, station))
list(map(lambda x, y: x + "_" + y, antenna_name, station_name))
)

ant_xds["antenna_name"] = xr.DataArray(antenna_name, dims=["antenna_name"])
Expand Down Expand Up @@ -376,7 +376,7 @@ def create_gain_curve_xds(

ant_borrowed_coords = {
"antenna_name": ant_xds.coords["antenna_name"],
"station": ant_xds.coords["station"],
"station_name": ant_xds.coords["station_name"],
"mount": ant_xds.coords["mount"],
"telescope_name": ant_xds.coords["telescope_name"],
"receptor_label": ant_xds.coords["receptor_label"],
Expand Down Expand Up @@ -486,7 +486,7 @@ def create_phase_calibration_xds(

ant_borrowed_coords = {
"antenna_name": ant_xds.coords["antenna_name"],
"station": ant_xds.coords["station"],
"station_name": ant_xds.coords["station_name"],
"mount": ant_xds.coords["mount"],
"telescope_name": ant_xds.coords["telescope_name"],
"receptor_label": ant_xds.coords["receptor_label"],
Expand Down
164 changes: 151 additions & 13 deletions src/xradio/measurement_set/_utils/_msv2/msv4_sub_xdss.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,140 @@ def make_taql_where_weather(
return taql_where


def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
def prepare_generic_weather_xds_and_station_name(
generic_weather_xds: xr.Dataset,
in_file: str,
ant_position_with_ids: xr.DataArray,
has_asdm_station_position: bool,
) -> tuple[xr.Dataset, np.ndarray]:
"""
A generic_weather_xds loaded with load_generic_table() might still need to be reloaded
with an additional WHERE condition to constrain the indices of antennas. But this depends on whether
ASDM/importasdm extension columns are present or not.

This also prepares the station_name values:
- if has_asdm_station_ids:
- tries to find from ASDM_STATION the station names,
- otherwise, takes ids (antenna_ids in generic_weather were actually the ASDM_STATION_IDs
- else: get the values from antenna_xds (the stations present)


Parameters
----------
generic_weather_xds : xr.Dataset
generic dataset read from an MSv2 WEATHER subtable
in_file : str
Input MS name.
ant_position_with_ids : xr.DataArray
antenna_position data var from the antenna_xds (expected to still include the initial ANTENNA_ID
coordinate as well as other coordinates from the antenna_xds)
has_asdm_station_position : bool
wHether this generic weatehr_xds should be treated as including the nonstandard extensions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whether?

NS_WX_STATION_ID and NS_WX_STATION_POSITION as created by CASA/importasdm (ALMA and VLA).

Returns
-------
(generic_weather_xds, station_name): tuple[[xarray.Dataset, numpy.ndarray]
Weather Xarray Dataset prepared for generic conversion to MSv4, values for the station_name coordinate
"""

if has_asdm_station_position:
asdm_station_path = os.path.join(in_file, "ASDM_STATION")
if table_exists(asdm_station_path):
asdm_station_xds = load_generic_table(in_file, "ASDM_STATION")
station_name = asdm_station_xds.name.values[
generic_weather_xds["ANTENNA_ID"].values
]
else:
# if no info from ASDM_STATION, use the indices from antenna_id which was actually the NS_WX_STATION_ID
len_antenna_id = generic_weather_xds.sizes["ANTENNA_ID"]
station_name = list(
map(
lambda x, y: x + "_" + y,
["Station"] * len_antenna_id,
generic_weather_xds["ANTENNA_ID"].values.astype(str),
)
)

else:
taql_where = make_taql_where_weather(in_file, ant_position_with_ids)
generic_weather_xds = load_generic_table(
in_file,
"WEATHER",
rename_ids=subt_rename_ids["WEATHER"],
taql_where=taql_where,
)

if not generic_weather_xds.data_vars:
# for example when the weather subtable only has info for antennas/stations
# not present in the MSv4 (no overlap between antennas loaded in ant_xds and weather)
return None, None

stations_present = ant_position_with_ids.sel(
antenna_id=generic_weather_xds["ANTENNA_ID"]
).station_name
station_name = stations_present.values

return generic_weather_xds, station_name


def finalize_station_position(
weather_xds: xr.Dataset, ant_position_with_ids, has_asdm_station_position: bool
) -> xr.Dataset:
"""
For a STATION_POSITION data var being added to a weather_xds, make sure coordinates and dimensions
are conforming to the schema.

Parameters
----------
weather_xds : xr.Dataset
weather_xds where we still need to ensure the right coordinates and attributes
ant_position_with_ids : xr.DataArray
antenna_position data var from the antenna_xds (expected to still include the initial ANTENNA_ID
coordinate as well as other coordinates from the antenna_xds)
has_asdm_station_position : bool
Whether this generic weatehr_xds should be treated as including the nonstandard extensions
NS_WX_STATION_ID and NS_WX_STATION_POSITION as created by CASA/importasdm (ALMA and VLA).

Returns
-------
weather_xds: xarray.Dataset
Weather Xarray Dataset with all coordinates and attributes in STATION_POSITION
"""
if has_asdm_station_position:
# STATION_POSITION has been created but needs prooper dimensions and attrs
# Drop the time dim
weather_xds["STATION_POSITION"] = weather_xds["STATION_POSITION"].sel(
time_weather=0, drop=True, method="nearest"
)
# borrow location frame attributes from antenna position
weather_xds["STATION_POSITION"].attrs = ant_position_with_ids.attrs
else:
# borrow from ant_posision_with_ids but without carrying over other coords
weather_xds = weather_xds.assign(
{
"STATION_POSITION": (
["station_name", "cartesian_pos_label"],
ant_position_with_ids.values,
ant_position_with_ids.attrs,
)
}
)

return weather_xds


def create_weather_xds(in_file: str, ant_position_with_ids: xr.DataArray):
"""
Creates a Weather Xarray Dataset from a MS v2 WEATHER table.

Parameters
----------
in_file : str
Input MS name.
ant_xds_station_name_ids : xr.DataArray
station name data array from antenna_xds, with name/id information
ant_position_with_ids : xr.DataArray
antenna_position data var from the antenna_xds (expected to still including the initial ANTENNA_ID coordinate
as wellas other coordinates from the antenna_xds)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well as?


Returns
-------
Expand All @@ -190,32 +314,32 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
"""

try:
taql_where = make_taql_where_weather(in_file, ant_xds_station_name_ids)
generic_weather_xds = load_generic_table(
in_file,
"WEATHER",
rename_ids=subt_rename_ids["WEATHER"],
taql_where=taql_where,
)
except ValueError as _exc:
return None

if not generic_weather_xds.data_vars:
# for example when the weather subtable only has info for antennas/stations
# not present in the MSv4 (no overlap between antennas loaded in ant_xds and weather)
has_asdm_station_position = (
"NS_WX_STATION_POSITION" in generic_weather_xds.data_vars
)
generic_weather_xds, station_name = prepare_generic_weather_xds_and_station_name(
generic_weather_xds, in_file, ant_position_with_ids, has_asdm_station_position
)
if not generic_weather_xds:
return None

weather_xds = xr.Dataset(attrs={"type": "weather"})
stations_present = ant_xds_station_name_ids.sel(
antenna_id=generic_weather_xds["ANTENNA_ID"]
)
coords = {
"station_name": stations_present.data,
"antenna_name": stations_present.coords["antenna_name"].data,
"station_name": station_name,
"cartesian_pos_label": ["x", "y", "z"],
}
weather_xds = weather_xds.assign_coords(coords)

dims_station_time = ["station_name", "time_weather"]
dims_station_time_position = dims_station_time + ["cartesian_pos_label"]
to_new_data_variables = {
"H20": ["H2O", dims_station_time],
"IONOS_ELECTRON": ["IONOS_ELECTRON", dims_station_time],
Expand All @@ -226,6 +350,15 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
"WIND_DIRECTION": ["WIND_DIRECTION", dims_station_time],
"WIND_SPEED": ["WIND_SPEED", dims_station_time],
}
if has_asdm_station_position:
to_new_data_variables.update(
{
"NS_WX_STATION_POSITION": [
"STATION_POSITION",
dims_station_time_position,
],
}
)

to_new_coords = {
"TIME": ["time_weather", ["time_weather"]],
Expand All @@ -234,6 +367,9 @@ def create_weather_xds(in_file: str, ant_xds_station_name_ids: xr.DataArray):
weather_xds = convert_generic_xds_to_xradio_schema(
generic_weather_xds, weather_xds, to_new_data_variables, to_new_coords
)
weather_xds = finalize_station_position(
weather_xds, ant_position_with_ids, has_asdm_station_position
)

# TODO: option to interpolate to main time

Expand All @@ -256,6 +392,7 @@ def correct_generic_pointing_xds(
and tries to correct several deviations from the MSv2 specs seen in
common test data.
The problems fixed here include wrong dimensions:

- for example transposed dimensions with respect to the MSv2 specs (output
from CASA simulator),
- missing/additional unexpected dimensions when some of the columns are
Expand Down Expand Up @@ -423,6 +560,7 @@ def prepare_generic_sys_cal_xds(generic_sys_cal_xds: xr.Dataset) -> xr.Dataset:
sys_cal_xds dataset, as their structure differs in dimensions and order
of dimensions.
This function performs various prepareation steps, such as:

- filter out dimensions not neeed for an individual MSv4 (SPW, FEED),
- drop variables loaded from columns with all items set to empty array,
- transpose the dimensions frequency,receptor
Expand Down
22 changes: 12 additions & 10 deletions src/xradio/measurement_set/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
AntennaName = Literal["antenna_name"]
""" Antenna name dimension """
StationName = Literal["station_name"]
""" Station identifier dimension """
""" Station name dimension """
ReceptorLabel = Literal["receptor_label"]
""" Receptor label dimension """
ToneLabel = Literal["tone_label"]
Expand Down Expand Up @@ -1443,7 +1443,7 @@ class AntennaXds:
# Coordinates
antenna_name: Coordof[AntennaNameArray]
""" Antenna name """
station: Coord[AntennaName, str]
station_name: Coord[AntennaName, str]
""" Name of the station pad (relevant to arrays with moving antennas). """
mount: Coord[AntennaName, str]
""" Mount type of the antenna. Reserved keywords include: ”EQUATORIAL” - equatorial mount;
Expand Down Expand Up @@ -1527,7 +1527,7 @@ class GainCurveXds:
# Coordinates
antenna_name: Coordof[AntennaNameArray]
""" Antenna name """
station: Coord[AntennaName, str]
station_name: Coord[AntennaName, str]
""" Name of the station pad (relevant to arrays with moving antennas). """
mount: Coord[AntennaName, str]
""" Mount type of the antenna. Reserved keywords include: ”EQUATORIAL” - equatorial mount;
Expand Down Expand Up @@ -1586,7 +1586,7 @@ class PhaseCalibrationXds:
# Coordinates
antenna_name: Coordof[AntennaNameArray]
""" Antenna name """
station: Coord[AntennaName, str]
station_name: Coord[AntennaName, str]
""" Name of the station pad (relevant to arrays with moving antennas). """
mount: Coord[AntennaName, str]
""" Mount type of the antenna. Reserved keywords include: ”EQUATORIAL” - equatorial mount;
Expand Down Expand Up @@ -1670,13 +1670,11 @@ class WeatherXds:

# Coordinates
station_name: Coord[StationName, str]
""" Station identifier """
""" Station name """
time: Optional[Coordof[TimeInterpolatedCoordArray]]
""" Mid-point of the time interval. Labeled 'time' when interpolated to main time axis """
time_weather: Optional[Coordof[TimeWeatherCoordArray]]
""" Mid-point of the time interval. Labeled 'time_weather' when not interpolated to main time axis """
antenna_name: Optional[Coordof[AntennaNameArray]]
""" Antenna identifier """
ellipsoid_pos_label: Optional[Coord[EllipsoidPosLabel, str]] = (
"lon",
"lat",
Expand All @@ -1686,6 +1684,10 @@ class WeatherXds:
cartesian_pos_label: Optional[Coord[CartesianPosLabel, str]] = ("x", "y", "z")
""" Coordinate labels of geocentric earth location data (typically shape 3 and 'x', 'y', 'z')"""

# Station position variable - required
STATION_POSITION: Data[tuple[StationName], LocationArray] = None
""" Position of the weather station """

# Data variables (all optional)
H2O: Optional[
Data[
Expand Down Expand Up @@ -1764,8 +1766,6 @@ class WeatherXds:
]
] = None
""" Average wind speed """
STATION_POSITION: Optional[Data[tuple[StationName], LocationArray]] = None
""" Station position """

# Attributes
type: Attr[Literal["weather"]] = "weather"
Expand Down Expand Up @@ -1881,7 +1881,9 @@ class SystemCalibrationXds:

# Coordinates
antenna_name: Coordof[AntennaNameArray]
""" Antenna identifier """
""" Antenna name """
station_name: Coord[AntennaName, str]
""" Name of the station pad (relevant to arrays with moving antennas). """
receptor_label: Coord[ReceptorLabel, str]
""" Names of receptors """
polarization_type: Coord[tuple[AntennaName, ReceptorLabel], str]
Expand Down
Loading
Loading