Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SANs2D rules #356

Merged
merged 13 commits into from
Jan 28, 2025
6 changes: 3 additions & 3 deletions rundetection/ingestion/extracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def skip_extract(job_request: JobRequest, _: Any) -> JobRequest:
return job_request


def loq_extract(job_request: JobRequest, dataset: Any) -> JobRequest:
def sans_extract(job_request: JobRequest, dataset: Any) -> JobRequest:
"""
Get the sample details and the cycle strings
:param job_request: The job request
Expand Down Expand Up @@ -173,8 +173,8 @@ def get_extraction_function(instrument: str) -> Callable[[JobRequest, Any], JobR
return tosca_extract
case "osiris":
return osiris_extract
case "loq":
return loq_extract
case "loq" | "sans2d":
return sans_extract
case "iris":
return iris_extract
case _:
Expand Down
47 changes: 0 additions & 47 deletions rundetection/rules/common_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,53 +28,6 @@ def verify(self, job_request: JobRequest) -> None:
job_request.will_reduce = self._value


class CheckIfScatterSANS(Rule[bool]):
def __init__(self, value: bool):
super().__init__(value)
self.should_be_first = True

def verify(self, job_request: JobRequest) -> None:
if not job_request.experiment_title.endswith("_SANS/TRANS"):
job_request.will_reduce = False
logger.error("Not a scatter run. Does not have _SANS/TRANS at the end of the experiment title.")
return
# If it has empty or direct in the title assume it is a direct run file instead of a normal scatter.
if (
"empty" in job_request.experiment_title
or "EMPTY" in job_request.experiment_title
or "direct" in job_request.experiment_title
or "DIRECT" in job_request.experiment_title
):
job_request.will_reduce = False
logger.error(
"If it is a scatter, contains empty or direct in the title and is assumed to be a scatter "
"for an empty can run."
)
return
if "{" not in job_request.experiment_title and "}" not in job_request.experiment_title:
job_request.will_reduce = False
logger.error("If it is a scatter, contains {} in format {x}_{y}_SANS/TRANS. or {x}_SANS/TRANS.")
return


class SansSliceWavs(Rule[str]):
"""
This rule enables users to set the SliceWavs for each script
"""

def verify(self, job_request: JobRequest) -> None:
job_request.additional_values["slice_wavs"] = self._value


class SansPhiLimits(Rule[str]):
"""
This rule enables users to set the PhiLimits for each script
"""

def verify(self, job_request: JobRequest) -> None:
job_request.additional_values["phi_limits"] = self._value


class MolSpecStitchRule(Rule[bool]):
"""
Enables Tosca, Osiris, and Iris Run stitching
Expand Down
13 changes: 5 additions & 8 deletions rundetection/rules/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@
from typing import Any

from rundetection.rules.common_rules import (
CheckIfScatterSANS,
EnabledRule,
MolSpecStitchRule,
SansPhiLimits,
SansSliceWavs,
)
from rundetection.rules.inter_rules import InterStitchRule
from rundetection.rules.iris_rules import IrisCalibrationRule, IrisReductionRule
from rundetection.rules.loq_rules import LoqFindFiles, LoqUserFile
from rundetection.rules.mari_rules import MariMaskFileRule, MariStitchRule, MariWBVANRule
from rundetection.rules.osiris_rules import (
OsirisDefaultGraniteAnalyser,
Expand All @@ -22,6 +18,7 @@
OsirisReflectionCalibrationRule,
)
from rundetection.rules.rule import MissingRuleError, Rule, T
from rundetection.rules.sans_rules import CheckIfScatterSANS, SansFindFiles, SansPhiLimits, SansSliceWavs, SansUserFile


def rule_factory(key_: str, value: T) -> Rule[Any]: # noqa: C901, PLR0911, PLR0912
Expand Down Expand Up @@ -65,12 +62,12 @@ def rule_factory(key_: str, value: T) -> Rule[Any]: # noqa: C901, PLR0911, PLR0
case "checkifscattersans":
if isinstance(value, bool):
return CheckIfScatterSANS(value)
case "loqfindfiles":
case "loqfindfiles" | "sansfindfiles":
if isinstance(value, bool):
return LoqFindFiles(value)
case "loquserfile":
return SansFindFiles(value)
case "loquserfile" | "sansuserfile":
if isinstance(value, str):
return LoqUserFile(value)
return SansUserFile(value)
case "sansphilimits":
if isinstance(value, str):
return SansPhiLimits(value)
Expand Down
114 changes: 73 additions & 41 deletions rundetection/rules/loq_rules.py → rundetection/rules/sans_rules.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,18 @@
"""
Rules for LOQ
"""

from __future__ import annotations

import logging
import re
import typing
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING

import requests
import xmltodict

from rundetection.rules.common_rules import logger
from rundetection.rules.rule import Rule

if typing.TYPE_CHECKING:
if TYPE_CHECKING:
from rundetection.job_requests import JobRequest

logger = logging.getLogger(__name__)


@dataclass
class SansFileData:
Expand All @@ -28,22 +21,23 @@ class SansFileData:
run_number: str


def _extract_run_number_from_filename(filename: str) -> str:
# Assume filename looks like so: LOQ00100002.nxs, then strip.
return filename.split(".")[0].lstrip("LOQ").lstrip("0")


def _is_sample_transmission_file(sans_file: SansFileData, sample_title: str) -> bool:
return sample_title in sans_file.title and sans_file.type == "TRANS"


def _is_sample_direct_file(sans_file: SansFileData) -> bool:
return ("direct" in sans_file.title.lower() or "empty" in sans_file.title.lower()) and sans_file.type == "TRANS"
return (
"direct" in sans_file.title.lower()
or "empty" in sans_file.title.lower()
or "mt " in sans_file.title.lower()
or " mt" in sans_file.title.lower()
or sans_file.title.lower() == "{mt}"
) and sans_file.type == "TRANS"


def _is_can_scatter_file(sans_file: SansFileData, can_title: str) -> bool:
title_contents = re.findall(r"{.*?}", sans_file.title)
return len(title_contents) == 1 and can_title == title_contents[0] and sans_file.type == "SANS/TRANS"
return len(title_contents) == 1 and can_title == title_contents[0] and sans_file.type in {"SANS/TRANS", "SANS"}


def _is_can_transmission_file(sans_file: SansFileData, can_title: str) -> bool:
Expand Down Expand Up @@ -79,24 +73,15 @@ def _find_can_trans_file(sans_files: list[SansFileData], can_title: str) -> Sans
return None


def find_path_for_run_number(cycle_path: str, run_number: int) -> Path | None:
# 10 is just a magic number, but we needed an unrealistic value for the maximum
for padding in range(11):
potential_path = Path(f"{cycle_path}/LOQ{str(run_number).zfill(padding)}.nxs")
if potential_path.exists():
return potential_path
return None


def grab_cycle_instrument_index(cycle: str) -> str:
def grab_cycle_instrument_index(cycle: str, instrument: str) -> str:
_, cycle_year, cycle_num = cycle.split("_")
url = f"http://data.isis.rl.ac.uk/journals/ndxloq/journal_{cycle_year}_{cycle_num}.xml"
url = f"http://data.isis.rl.ac.uk/journals/ndx{instrument.lower()}/journal_{cycle_year}_{cycle_num}.xml"
return requests.get(url, timeout=5).text


def create_list_of_files(job_request: JobRequest) -> list[SansFileData]:
cycle = job_request.additional_values["cycle_string"]
xml = grab_cycle_instrument_index(cycle=cycle)
xml = grab_cycle_instrument_index(cycle=cycle, instrument=job_request.instrument)
cycle_run_info = xmltodict.parse(xml)
list_of_files = []
for run_info in cycle_run_info["NXroot"]["NXentry"]:
Expand All @@ -121,57 +106,104 @@ def _set_transmission_file(job_request: JobRequest, sample_title: str, sans_file
if not job_request.additional_values["included_trans_as_scatter"]:
trans_file = _find_trans_file(sans_files=sans_files, sample_title=sample_title)
trans_run_number = trans_file.run_number if trans_file is not None else None
logger.info("LOQ trans found %s", trans_run_number)
logger.info("%s trans found %s", job_request.instrument, trans_run_number)
else:
trans_run_number = str(job_request.run_number)
logger.info("LOQ trans set as scatter %s", trans_run_number)
logger.info("%s trans set as scatter %s", job_request.instrument, trans_run_number)
if trans_run_number is not None:
job_request.additional_values["scatter_transmission"] = trans_run_number


def _set_can_files(can_title: str | None, job_request: JobRequest, sans_files: list[SansFileData]) -> None:
if can_title is not None:
can_scatter = _find_can_scatter_file(sans_files=sans_files, can_title=can_title)
logger.info("LOQ can scatter found %s", can_scatter)
logger.info("%s can scatter found %s", job_request.instrument, can_scatter)
if can_scatter is not None:
job_request.additional_values["can_scatter"] = can_scatter.run_number

# If using M4 monitor then can scatter is the transmission
if not job_request.additional_values["included_trans_as_scatter"]:
can_trans = _find_can_trans_file(sans_files=sans_files, can_title=can_title)
logger.info("LOQ can trans found %s", can_trans)
logger.info("%s can trans found %s", job_request.instrument, can_trans)
else:
can_trans = can_scatter
logger.info("LOQ can trans set as scatter %s", can_scatter)
logger.info("%s can trans set as scatter %s", job_request.instrument, can_scatter)
if can_trans is not None and can_scatter is not None:
job_request.additional_values["can_transmission"] = can_trans.run_number


def _set_direct_files(job_request: JobRequest, sans_files: list[SansFileData]) -> None:
direct_file = _find_direct_file(sans_files=sans_files)
logger.info("LOQ direct files found %s", direct_file)
logger.info("%s direct files found %s", job_request.instrument, direct_file)
if direct_file is not None:
if "scatter_transmission" in job_request.additional_values:
job_request.additional_values["scatter_direct"] = direct_file.run_number
if "can_scatter" in job_request.additional_values and "can_transmission" in job_request.additional_values:
job_request.additional_values["can_direct"] = direct_file.run_number


class LoqFindFiles(Rule[bool]):
class CheckIfScatterSANS(Rule[bool]):
def __init__(self, value: bool):
super().__init__(value)
self.should_be_first = True

def verify(self, job_request: JobRequest) -> None:
if not job_request.experiment_title.endswith("_SANS/TRANS") and not job_request.experiment_title.endswith(
"_SANS"
):
job_request.will_reduce = False
logger.error("Not a scatter run. Does not have _SANS or _SANS/TRANS at the end of the experiment title.")
return
Pasarus marked this conversation as resolved.
Show resolved Hide resolved
# If it is a direct fix, sans or trans, it should fail, which is why hard coded TRANS as we want to check
# part of the logic not all.
if _is_sample_direct_file(
SansFileData(title=job_request.experiment_title, type="TRANS", run_number=str(job_request.run_number))
):
job_request.will_reduce = False
logger.error(
"If it is a scatter, contains empty or direct in the title and is assumed to be a scatter "
"for an empty can run."
Pasarus marked this conversation as resolved.
Show resolved Hide resolved
)
return
if "{" not in job_request.experiment_title and "}" not in job_request.experiment_title:
job_request.will_reduce = False
logger.error("If it is a scatter, contains {} in format {x}_{y}_SANS/TRANS. or {x}_SANS/TRANS.")
return
Pasarus marked this conversation as resolved.
Show resolved Hide resolved


class SansSliceWavs(Rule[str]):
"""
This rule enables users to set the SliceWavs for each script
"""

def verify(self, job_request: JobRequest) -> None:
job_request.additional_values["slice_wavs"] = self._value


class SansPhiLimits(Rule[str]):
"""
This rule enables users to set the PhiLimits for each script
"""

def verify(self, job_request: JobRequest) -> None:
job_request.additional_values["phi_limits"] = self._value


class SansFindFiles(Rule[bool]):
def __init__(self, value: bool):
super().__init__(value)
self._should_be_last = True

def verify(self, job_request: JobRequest) -> None:
title = job_request.experiment_title
logger.info("LOQ title is %s", title)
logger.info("%s title is %s", job_request.instrument, title)
# Find all of the "titles" [0] is the scatter, [1] is the background
title_parts = re.findall(r"{.*?}", title)
sample_title = title_parts[0]
logger.info("LOQ sample title is %s", sample_title)
logger.info("%s sample title is %s", job_request.instrument, sample_title)
# If background was defined in the title set can title
can_title = title_parts[1] if len(title_parts) > 1 else None
logger.info("LOQ can title is %s from list %s", can_title, title_parts)
logger.info("%s can title is %s from list %s", job_request.instrument, can_title, title_parts)

# Get the file lists
sans_files = create_list_of_files(job_request)
Expand All @@ -188,8 +220,8 @@ def verify(self, job_request: JobRequest) -> None:
_set_direct_files(job_request, sans_files)


class LoqUserFile(Rule[str]):
class SansUserFile(Rule[str]):
def verify(self, job_request: JobRequest) -> None:
# If M4 in user file then the transmission and scatter files are the same.
job_request.additional_values["included_trans_as_scatter"] = "_M4" in self._value
job_request.additional_values["user_file"] = f"/extras/loq/{self._value}"
job_request.additional_values["user_file"] = f"/extras/{job_request.instrument.lower()}/{self._value}"
9 changes: 5 additions & 4 deletions test/ingestion/test_extracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from rundetection.ingestion.extracts import (
get_cycle_string_from_path,
get_extraction_function,
loq_extract,
mari_extract,
osiris_extract,
sans_extract,
skip_extract,
tosca_extract,
)
Expand Down Expand Up @@ -60,7 +60,8 @@ def test_skip_extract(caplog: LogCaptureFixture):
("mari", "mari_extract"),
("tosca", "tosca_extract"),
("osiris", "osiris_extract"),
("loq", "loq_extract"),
("loq", "sans_extract"),
("sans2d", "sans_extract"),
],
)
def test_get_extraction_function(input_value, expected_function_name):
Expand Down Expand Up @@ -238,7 +239,7 @@ def test_osiris_extract_raises_on_bad_frequencies(job_request):
osiris_extract(job_request, dataset)


def test_loq_extract(job_request):
def test_sans_extract(job_request):
dataset = {
"sample": {
"thickness": [1.0],
Expand All @@ -248,7 +249,7 @@ def test_loq_extract(job_request):
}
}
with patch("rundetection.ingestion.extracts.get_cycle_string_from_path", return_value="some string"):
loq_extract(job_request, dataset)
sans_extract(job_request, dataset)

assert job_request.additional_values["cycle_string"] == "some string"
assert job_request.additional_values["sample_thickness"] == 1.0
Expand Down
8 changes: 4 additions & 4 deletions test/rules/test_common_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@

from rundetection.ingestion.ingest import JobRequest
from rundetection.rules.common_rules import (
CheckIfScatterSANS,
EnabledRule,
SansPhiLimits,
SansSliceWavs,
is_y_within_5_percent_of_x,
)
from rundetection.rules.sans_rules import CheckIfScatterSANS, SansPhiLimits, SansSliceWavs


@pytest.fixture
Expand Down Expand Up @@ -49,7 +47,7 @@ def test_enabled_rule_when_not_enabled(job_request) -> None:
assert job_request.will_reduce is False


@pytest.mark.parametrize("end_of_title", ["_TRANS", "_SANS", "COOL", "_sans/trans"])
@pytest.mark.parametrize("end_of_title", ["_TRANS", "COOL", "_sans/trans"])
def test_checkifscattersans_verify_raises_for_no_sans_trans(end_of_title) -> None:
job_request = mock.MagicMock()
job_request.experiment_title = "{fancy chemical}" + end_of_title
Expand All @@ -62,6 +60,8 @@ def test_checkifscattersans_verify_raises_for_no_sans_trans(end_of_title) -> Non
def test_checkifscattersans_verify_raises_for_direct_or_empty_in_title(to_raise) -> None:
job_request = mock.MagicMock()
job_request.experiment_title = "{fancy chemical " + to_raise + "}_SANS/TRANS"
job_request.will_reduce = True
job_request.run_number = 223312
CheckIfScatterSANS(True).verify(job_request)

assert job_request.will_reduce is False
Expand Down
Loading
Loading