Merge pull request #172 from scipp/to-events-fix

nvaytet · web-flow · commit 57892980f1f5 · 2025-01-30T12:51:18.000+01:00
Fix `to_events` when bins in the input have unsorted edges
diff --git a/src/ess/reduce/time_of_flight/__init__.py b/src/ess/reduce/time_of_flight/__init__.py
@@ -6,8 +6,9 @@
 neutron time-of-arrival at the detectors.
 """
 
-from .toa_to_tof import default_parameters, resample_tof_data, providers, TofWorkflow
 from .simulation import simulate_beamline
+from .toa_to_tof import default_parameters, resample_tof_data, providers, TofWorkflow
+from .to_events import to_events
 from .types import (
     DistanceResolution,
     FrameFoldedTimeOfArrival,
@@ -56,4 +57,5 @@
     "providers",
     "resample_tof_data",
     "simulate_beamline",
+    "to_events",
 ]
diff --git a/src/ess/reduce/time_of_flight/to_events.py b/src/ess/reduce/time_of_flight/to_events.py
@@ -45,19 +45,21 @@ def to_events(
     edge_sizes = {dim: da.sizes[dim] for dim in edge_dims}
     for dim in edge_dims:
         coord = da.coords[dim]
-        low = sc.broadcast(coord[dim, :-1], sizes=edge_sizes).values
-        high = sc.broadcast(coord[dim, 1:], sizes=edge_sizes).values
+        left = sc.broadcast(coord[dim, :-1], sizes=edge_sizes).values
+        right = sc.broadcast(coord[dim, 1:], sizes=edge_sizes).values
 
         # The numpy.random.uniform function below does not support NaNs, so we need to
         # replace them with zeros, and then replace them back after the random numbers
         # have been generated.
-        nans = np.isnan(low) | np.isnan(high)
-        low = np.where(nans, 0.0, low)
-        high = np.where(nans, 0.0, high)
+        nans = np.isnan(left) | np.isnan(right)
+        left = np.where(nans, 0.0, left)
+        right = np.where(nans, 0.0, right)
+        # Ensure left <= right
+        left, right = np.minimum(left, right), np.maximum(left, right)
 
         # In each bin, we generate a number of events with a uniform distribution.
         events = rng.uniform(
-            low, high, size=(events_per_bin, *list(edge_sizes.values()))
+            left, right, size=(events_per_bin, *list(edge_sizes.values()))
         )
         events[..., nans] = np.nan
         event_coords[dim] = sc.array(
@@ -77,20 +79,20 @@ def to_events(
     data = da.data
     if event_masks:
         inv_mask = (~reduce(lambda a, b: a | b, event_masks.values())).to(dtype=int)
-        inv_mask.unit = ''
+        inv_mask.unit = ""
         data = data * inv_mask
 
     # Create the data counts, which are the original counts divided by the number of
     # events per bin
     sizes = {event_dim: events_per_bin} | da.sizes
     val = sc.broadcast(sc.values(data) / float(events_per_bin), sizes=sizes)
-    kwargs = {'dims': sizes.keys(), 'values': val.values, 'unit': data.unit}
+    kwargs = {"dims": sizes.keys(), "values": val.values, "unit": data.unit}
     if data.variances is not None:
         # Note here that all the events are correlated.
         # If we later histogram the events with different edges than the original
         # histogram, then neighboring bins will be correlated, and the error obtained
         # will be too small. It is however not clear what can be done to improve this.
-        kwargs['variances'] = sc.broadcast(
+        kwargs["variances"] = sc.broadcast(
             sc.variances(data) / float(events_per_bin), sizes=sizes
         ).values
     new_data = sc.array(**kwargs)
diff --git a/tests/time_of_flight/to_events_test.py b/tests/time_of_flight/to_events_test.py
@@ -109,3 +109,18 @@ def test_to_events_two_masks():
     assert "m1" not in result.masks
     assert sc.identical(hist.masks["m2"], result.masks["m2"])
     assert result["x", 2:4].data.sum() == sc.scalar(0.0, unit=table.unit)
+
+
+def test_to_events_1d_unsorted_bin_edges():
+    table = sc.data.table_xyz(1000)
+    hist = table.hist(x=10)
+    hist.coords["x"].values = hist.coords["x"].values[
+        [0, 1, 2, 3, 5, 4, 6, 7, 8, 9, 10]
+    ]
+    events = to_events(hist, "event")
+    assert "x" not in events.dims
+    result = events.hist(x=sc.sort(hist.coords["x"], "x"))
+    assert sc.allclose(hist.data[:3], result.data[:3])
+    assert sc.allclose(hist.data[6:], result.data[6:])
+    # The data in the middle gets re-ordered, but the sum should still be the same
+    assert sc.isclose(hist.data[3:6].sum(), result.data[3:6].sum())