Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to_events when bins in the input have unsorted edges #172

Merged
merged 3 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/ess/reduce/time_of_flight/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
neutron time-of-arrival at the detectors.
"""

from .toa_to_tof import default_parameters, resample_tof_data, providers, TofWorkflow
from .simulation import simulate_beamline
from .toa_to_tof import default_parameters, resample_tof_data, providers, TofWorkflow
from .to_events import to_events
from .types import (
DistanceResolution,
FrameFoldedTimeOfArrival,
Expand Down Expand Up @@ -56,4 +57,5 @@
"providers",
"resample_tof_data",
"simulate_beamline",
"to_events",
]
20 changes: 11 additions & 9 deletions src/ess/reduce/time_of_flight/to_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,21 @@ def to_events(
edge_sizes = {dim: da.sizes[dim] for dim in edge_dims}
for dim in edge_dims:
coord = da.coords[dim]
low = sc.broadcast(coord[dim, :-1], sizes=edge_sizes).values
high = sc.broadcast(coord[dim, 1:], sizes=edge_sizes).values
left = sc.broadcast(coord[dim, :-1], sizes=edge_sizes).values
right = sc.broadcast(coord[dim, 1:], sizes=edge_sizes).values

# The numpy.random.uniform function below does not support NaNs, so we need to
# replace them with zeros, and then replace them back after the random numbers
# have been generated.
nans = np.isnan(low) | np.isnan(high)
low = np.where(nans, 0.0, low)
high = np.where(nans, 0.0, high)
nans = np.isnan(left) | np.isnan(right)
left = np.where(nans, 0.0, left)
right = np.where(nans, 0.0, right)
# Ensure left <= right
left, right = np.minimum(left, right), np.maximum(left, right)

# In each bin, we generate a number of events with a uniform distribution.
events = rng.uniform(
low, high, size=(events_per_bin, *list(edge_sizes.values()))
left, right, size=(events_per_bin, *list(edge_sizes.values()))
)
events[..., nans] = np.nan
event_coords[dim] = sc.array(
Expand All @@ -77,20 +79,20 @@ def to_events(
data = da.data
if event_masks:
inv_mask = (~reduce(lambda a, b: a | b, event_masks.values())).to(dtype=int)
inv_mask.unit = ''
inv_mask.unit = ""
data = data * inv_mask

# Create the data counts, which are the original counts divided by the number of
# events per bin
sizes = {event_dim: events_per_bin} | da.sizes
val = sc.broadcast(sc.values(data) / float(events_per_bin), sizes=sizes)
kwargs = {'dims': sizes.keys(), 'values': val.values, 'unit': data.unit}
kwargs = {"dims": sizes.keys(), "values": val.values, "unit": data.unit}
if data.variances is not None:
# Note here that all the events are correlated.
# If we later histogram the events with different edges than the original
# histogram, then neighboring bins will be correlated, and the error obtained
# will be too small. It is however not clear what can be done to improve this.
kwargs['variances'] = sc.broadcast(
kwargs["variances"] = sc.broadcast(
sc.variances(data) / float(events_per_bin), sizes=sizes
).values
new_data = sc.array(**kwargs)
Expand Down
15 changes: 15 additions & 0 deletions tests/time_of_flight/to_events_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,18 @@ def test_to_events_two_masks():
assert "m1" not in result.masks
assert sc.identical(hist.masks["m2"], result.masks["m2"])
assert result["x", 2:4].data.sum() == sc.scalar(0.0, unit=table.unit)


def test_to_events_1d_unsorted_bin_edges():
table = sc.data.table_xyz(1000)
hist = table.hist(x=10)
hist.coords["x"].values = hist.coords["x"].values[
[0, 1, 2, 3, 5, 4, 6, 7, 8, 9, 10]
]
events = to_events(hist, "event")
assert "x" not in events.dims
result = events.hist(x=sc.sort(hist.coords["x"], "x"))
assert sc.allclose(hist.data[:3], result.data[:3])
assert sc.allclose(hist.data[6:], result.data[6:])
# The data in the middle gets re-ordered, but the sum should still be the same
assert sc.isclose(hist.data[3:6].sum(), result.data[3:6].sum())
Loading