Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export dataset according to NXLauetof application definition #101

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions src/ess/nmx/nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,169 @@ def export_as_nexus(

Currently exporting step is not expected to be part of sciline pipelines.
"""
import warnings

warnings.warn(
DeprecationWarning(
"Exporting to custom NeXus format will be deprecated in the near future."
"Please use ``export_as_nxlauetof`` instead."
),
stacklevel=1,
)
with h5py.File(output_file, "w") as f:
f.attrs["default"] = "NMX_data"
nx_entry = _create_root_data_entry(f)
_create_sample_group(data, nx_entry)
_create_instrument_group(data, nx_entry)
_create_detector_group(data, nx_entry)
_create_source_group(data, nx_entry)


def _create_lauetof_data_entry(file_obj: h5py.File) -> h5py.Group:
nx_entry = file_obj.create_group("entry")
nx_entry.attrs["NX_class"] = "NXentry"
return nx_entry


def _add_lauetof_definition(nx_entry: h5py.Group) -> None:
nx_entry["definition"] = "NXlauetof"


def _add_lauetof_instrument(nx_entry: h5py.Group):
nx_instrument = nx_entry.create_group("instrument")
nx_instrument.attrs["NX_class"] = "NXinstrument"
nx_instrument["name"] = "NMX"


def _add_lauetof_detector_group(dg: sc.DataGroup, nx_instrument: h5py.Group) -> None:
nx_detector = nx_instrument.create_group(dg["name"]) # Detector name
nx_detector.attrs["NX_class"] = "NXdetector"
# Polar angle
_create_dataset_from_var(
name="polar_angle",
root_entry=nx_detector,
var=sc.scalar(0, unit='deg'), # TODO: Add real data
)
# Azimuthal angle
_create_dataset_from_var(
name="azimuthal_angle",
root_entry=nx_detector,
var=sc.scalar(0, unit=''), # TODO: Add real data
)
# Data - shape: [n_x_pixels, n_y_pixels, n_tof_bins]
# The actual application definition defines it as integer,
# but we keep the original data type for now
_create_dataset_from_var(
name="data",
root_entry=nx_detector,
var=sc.scalar(0, unit=''), # TODO: Add real data
)
# x_pixel_size
_create_dataset_from_var(
name="x_pixel_size",
root_entry=nx_detector,
var=sc.scalar(0, unit='mm'), # TODO: Add real data
)
# y_pixel_size
_create_dataset_from_var(
name="y_pixel_size",
root_entry=nx_detector,
var=sc.scalar(0, unit='mm'), # TODO: Add real data
)
# distance
_create_dataset_from_var(
name="distance",
root_entry=nx_detector,
var=sc.scalar(0, unit='m'), # TODO: Add real data
)
# time_of_flight - shape: [nTOF]
_create_dataset_from_var(
name="time_of_flight",
root_entry=nx_detector,
var=sc.scalar(0, unit='s'), # TODO: Add real data
# It should be actual time of flight values of each bin
# Not sure if it should be median/mean of the bin or bin edges
)


def _add_lauetof_sample_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None:
nx_sample = nx_entry.create_group("sample")
nx_sample.attrs["NX_class"] = "NXsample"
nx_sample["name"] = data['sample_name'].value
_create_dataset_from_var(
name='orientation_matrix',
root_entry=nx_sample,
var=sc.array(
dims=['i', 'j'],
values=[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
unit="dimensionless",
), # TODO: Add real data, the sample orientation matrix
)
_create_dataset_from_var(
name='unit_cell',
root_entry=nx_sample,
var=sc.array(
dims=['i'],
values=[1.0, 1.0, 1.0, 90.0, 90.0, 90.0],
unit="dimensionless", # TODO: Add real data,
# a, b, c, alpha, beta, gamma
),
)


def _add_lauetof_monitor_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None:
nx_sample = nx_entry.create_group("control")
nx_sample.attrs["NX_class"] = "NXmonitor"
nx_sample["mode"] = "monitor"
nx_sample["preset"] = 0.0 # Check if this is the correct value
_create_dataset_from_var(
name='data',
root_entry=nx_sample,
var=sc.array(
dims=['tof'], values=[1, 1, 1], unit="counts"
), # TODO: Add real data, bin values
)
_create_dataset_from_var(
name='time_of_flight',
root_entry=nx_sample,
var=sc.array(
dims=['tof'], values=[1, 1, 1], unit="s"
), # TODO: Add real data, bin edges
)


def export_panel_independent_data_as_nxlauetof(
data: sc.DataGroup, output_file: str | pathlib.Path | io.BytesIO
) -> None:
"""Export panel independent data to a nxlauetof format.

It also creates parents of panel dependent datasets/groups.
Therefore panel dependent data should be added on the same file.
"""
with h5py.File(output_file, "w") as f:
f.attrs["NX_class"] = "NXlauetof"
nx_entry = _create_lauetof_data_entry(f)
_add_lauetof_definition(nx_entry)
_add_lauetof_instrument(nx_entry)
_add_lauetof_sample_group(data, nx_entry)
_add_lauetof_monitor_group(data, nx_entry)
# Skipping ``name`` field


def export_panel_dependent_data_as_nxlauetof(
*dgs: sc.DataGroup, output_file: str | pathlib.Path | io.BytesIO
) -> None:
with h5py.File(output_file, "r+") as f:
nx_instrument: h5py.Group = f["entry/instrument"]
for dg in dgs:
_add_lauetof_detector_group(dg, nx_instrument)
Comment on lines +281 to +287
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand. If dgs contains all panels, we still use 3x the memory (leaving aside intermediate steps). Wasn't the point to have a writer that works one panel at a time?

Copy link
Member Author

@YooSunYoung YooSunYoung Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought you might also want to write them all at the same time, if they are not too large.
For example, you need all three panels at the same time if you want to see all three of them in an instrument view anyways.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"I thought you might also want" is often an indicator that it should be postponed until we know it is really needed. Otherwise you need to refactor three times and write tests. And I am not sure why the instrument view is related to this.

In any case: How do you imagine the interface for writing multiple panels to the same file when they are not in memory at the same time?

Copy link
Member Author

@YooSunYoung YooSunYoung Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I am not sure why the instrument view is related to this.

After using instrument view, there's no point of iterating it again and you can just save them all at once.

In any case: How do you imagine the interface for writing multiple panels to the same file when they are not in memory at the same time?

The idea was to save panel independent data first always and then append panel-dependent data into the same file.

file_name = "test.nxs"
export_panel_independent_data_as_nxlauetof(binned_dg, output_file=file_name)
export_panel_dependent_data_as_nxlauetof(binned_dg, output_file=file_name)
del binned_dg

for i in range(1, 3):
    with temp_parameter(wf, DetectorIndex, i) as temp_wf:
        reduced_data = temp_wf.compute(NMXReducedData)
        export_panel_dependent_data_as_nxlauetof(reduced_data, output_file=file_name)
        del reduced_data

And if you don't have problem having all three of them in the memory, you can just:

dg, dgs = all_data_groups # After collecting computed results
export_panel_independent_data_as_nxlauetof(dg, output_file)
export_panel_dependent_data_as_nxlauetof(dg, *dgs, output_file=output_file)

which can be wrapped into a single interface like:

essnmx/src/ess/nmx/nexus.py

Lines 292 to 301 in dce3496

def export_as_nxlauetof(
dg: sc.DataGroup, *dgs: sc.DataGroup, output_file: str | pathlib.Path | io.BytesIO
) -> None:
"""Export the reduced data into a nxlauetof format.
Exporting step is not expected to be part of sciline pipelines.
"""
export_panel_independent_data_as_nxlauetof(dg, output_file)
export_panel_dependent_data_as_nxlauetof(dg, *dgs, output_file=output_file)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, I have no objection to make it available for only single panel at once.
I'll update it then.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the panel-independent data need the workflow? If so, how do we handle that?

Copy link
Member Author

@YooSunYoung YooSunYoung Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

panel-independent data need the workflow

Monitor(control) group might need its own workflow, (we haven't seen any use case of monitor data yet)
but other than that, all other fields only needs a loader.

But also same information is available from the reduced data.
So I was planning to retrieve information from the reduced data.

Do you think we should just retrieve them directly from an input file...?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I was mainly wondering how we can design the interface to avoid user error. If the panel-independent data is obtain from the wrong workflow, or if someone tries to write panels from incompatible workflows (like different params/settings) to the same file.



def export_as_nxlauetof(
data: sc.DataGroup, output_file: str | pathlib.Path | io.BytesIO
) -> None:
"""Export the reduced data into a nxlauetof format.

Exporting step is not expected to be part of sciline pipelines.
"""
export_panel_independent_data_as_nxlauetof(data, output_file)
5 changes: 4 additions & 1 deletion tests/exporter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ def test_mcstas_reduction_export_to_bytestream(reduced_data: NMXReducedData) ->
]

with io.BytesIO() as bio:
export_as_nexus(reduced_data, bio)
with pytest.warns(
DeprecationWarning, match='Please use ``export_as_nxlauetof`` instead.'
):
export_as_nexus(reduced_data, bio)
with h5py.File(bio, 'r') as f:
assert 'NMX_data' in f
nmx_data: h5py.Group = f.require_group('NMX_data')
Expand Down