Skip to content

Commit 12e31c9

Browse files
Pasarusgithub-actionsDagonite
authored
Implement LOQ rundetection rules (#327)
* Add closer to final LOQ rules changes * Formatting and linting commit * Update OSIRIS Specification * Finalise LOQ tests * Formatting and linting commit * Undo change to ingest.py as un-needed. * Fix issues with the test_common_rules * Fixup mypy * Adjust the e2e tests to make them work * Update rundetection/rules/loq_rules.py Co-authored-by: Samuel <[email protected]> * Update rundetection/rules/loq_rules.py Co-authored-by: Samuel <[email protected]> * Fix up the quick change to simplify _extract_run_number_from_filename * Simplify find_path_for_run_number * Update rundetection/rules/loq_rules.py Co-authored-by: Samuel <[email protected]> * Review responses * Add extra needed extracts for script changes (adding extra metadata) * Remove un-needed test * Test a missed line in extracts selection function --------- Co-authored-by: github-actions <[email protected]> Co-authored-by: Samuel <[email protected]>
1 parent be1d924 commit 12e31c9

File tree

14 files changed

+696
-13
lines changed

14 files changed

+696
-13
lines changed

pyproject.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ version = "0.0.1"
55
requires-python = ">= 3.11"
66
dependencies = [
77
"pika==1.3.2",
8-
"h5py==3.11.0"
8+
"h5py==3.11.0",
9+
"xmltodict==0.13.0",
10+
"requests==2.32.3"
911
]
1012

1113

@@ -19,7 +21,9 @@ run-detection = "rundetection.run_detection:main"
1921
formatting = [
2022
"ruff==0.4.8",
2123
"mypy==1.10.0",
22-
"run-detection[test]"
24+
"run-detection[test]",
25+
"types-requests==2.32.0.20240914",
26+
"types-xmltodict==0.14.0.20241009"
2327
]
2428

2529
test = [

rundetection/ingestion/extracts.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,22 @@ def skip_extract(job_request: JobRequest, _: Any) -> JobRequest:
3434
return job_request
3535

3636

37+
def loq_extract(job_request: JobRequest, dataset: Any) -> JobRequest:
38+
"""
39+
Get the sample details and the cycle strings
40+
:param job_request: The job request
41+
:param dataset: The nexus file dataset
42+
:return: The updated job request
43+
"""
44+
job_request.additional_values["cycle_string"] = get_cycle_string_from_path(job_request.filepath)
45+
job_request.additional_values["sample_thickness"] = dataset.get("sample").get("thickness")
46+
job_request.additional_values["sample_geometry"] = dataset.get("sample").get("shape")
47+
job_request.additional_values["sample_height"] = dataset.get("sample").get("height")
48+
job_request.additional_values["sample_width"] = dataset.get("sample").get("width")
49+
50+
return job_request
51+
52+
3753
def tosca_extract(job_request: JobRequest, _: Any) -> JobRequest:
3854
"""
3955
Add the cycle_string to the job request
@@ -137,6 +153,8 @@ def get_extraction_function(instrument: str) -> Callable[[JobRequest, Any], JobR
137153
return tosca_extract
138154
case "osiris":
139155
return osiris_extract
156+
case "loq":
157+
return loq_extract
140158
case _:
141159
return skip_extract
142160

rundetection/ingestion/ingest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,6 @@ def get_run_title(nexus_path: Path) -> str:
102102
:param nexus_path: Path - the nexus file path
103103
:return: str - The title of the files run
104104
"""
105+
# Instead of using Ingest here and reusing code, we won't bother with loading too much of the file every time and
106+
# JUST load the title instead of everything.
105107
return ingest(nexus_path).experiment_title

rundetection/rules/common_rules.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
Module containing rule implementations for instrument shared rules
33
"""
44

5+
import logging
6+
57
from rundetection.job_requests import JobRequest
68
from rundetection.rules.rule import Rule
79

10+
logger = logging.getLogger(__name__)
11+
812

913
class EnabledRule(Rule[bool]):
1014
"""
@@ -14,3 +18,26 @@ class EnabledRule(Rule[bool]):
1418

1519
def verify(self, job_request: JobRequest) -> None:
1620
job_request.will_reduce = self._value
21+
22+
23+
class NotAScatterFileError(Exception):
24+
pass
25+
26+
27+
class CheckIfScatterSANS(Rule[bool]):
28+
def verify(self, job_request: JobRequest) -> None:
29+
if "_SANS/TRANS" not in job_request.experiment_title:
30+
job_request.will_reduce = False
31+
logger.error("Not a scatter run. Does not have _SANS/TRANS in the experiment title.")
32+
# If it has empty or direct in the title assume it is a direct run file instead of a normal scatter.
33+
if (
34+
"empty" in job_request.experiment_title
35+
or "EMPTY" in job_request.experiment_title
36+
or "direct" in job_request.experiment_title
37+
or "DIRECT" in job_request.experiment_title
38+
):
39+
job_request.will_reduce = False
40+
logger.error(
41+
"If it is a scatter, contains empty or direct in the title and is assumed to be a scatter "
42+
"for an empty can run."
43+
)

rundetection/rules/factory.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
from typing import Any
66

7-
from rundetection.rules.common_rules import EnabledRule
7+
from rundetection.rules.common_rules import CheckIfScatterSANS, EnabledRule
88
from rundetection.rules.inter_rules import InterStitchRule
9+
from rundetection.rules.loq_rules import LoqFindFiles, LoqUserFile
910
from rundetection.rules.mari_rules import MariMaskFileRule, MariStitchRule, MariWBVANRule
1011
from rundetection.rules.osiris_rules import (
1112
OsirisDefaultGraniteAnalyser,
@@ -59,6 +60,15 @@ def rule_factory(key_: str, value: T) -> Rule[Any]: # noqa: C901, PLR0911, PLR0
5960
case "osirisreductionmode":
6061
if isinstance(value, bool):
6162
return OsirisReductionModeRule(value)
63+
case "checkifscattersans":
64+
if isinstance(value, bool):
65+
return CheckIfScatterSANS(value)
66+
case "loqfindfiles":
67+
if isinstance(value, bool):
68+
return LoqFindFiles(value)
69+
case "loquserfile":
70+
if isinstance(value, str):
71+
return LoqUserFile(value)
6272
case _:
6373
raise MissingRuleError(f"Implementation of Rule: {key_} does not exist.")
6474

rundetection/rules/loq_rules.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""
2+
Rules for LOQ
3+
"""
4+
5+
from __future__ import annotations
6+
7+
import logging
8+
import typing
9+
from dataclasses import dataclass
10+
from pathlib import Path
11+
12+
import requests
13+
import xmltodict
14+
15+
from rundetection.rules.rule import Rule
16+
17+
if typing.TYPE_CHECKING:
18+
from rundetection.job_requests import JobRequest
19+
20+
logger = logging.getLogger(__name__)
21+
22+
23+
@dataclass
24+
class SansFileData:
25+
title: str
26+
type: str
27+
run_number: str
28+
29+
30+
def _extract_run_number_from_filename(filename: str) -> str:
31+
# Assume filename looks like so: LOQ00100002.nxs, then strip.
32+
return filename.split(".")[0].lstrip("LOQ").lstrip("0")
33+
34+
35+
def _is_sample_transmission_file(sans_file: SansFileData, sample_title: str) -> bool:
36+
return sample_title in sans_file.title and sans_file.type == "TRANS"
37+
38+
39+
def _is_sample_direct_file(sans_file: SansFileData) -> bool:
40+
return ("direct" in sans_file.title.lower() or "empty" in sans_file.title.lower()) and sans_file.type == "TRANS"
41+
42+
43+
def _is_can_scatter_file(sans_file: SansFileData, can_title: str) -> bool:
44+
return can_title == sans_file.title.split("_")[0] and sans_file.type == "SANS/TRANS"
45+
46+
47+
def _is_can_transmission_file(sans_file: SansFileData, can_title: str) -> bool:
48+
return can_title in sans_file.title and sans_file.type == "TRANS"
49+
50+
51+
def _find_trans_file(sans_files: list[SansFileData], sample_title: str) -> SansFileData | None:
52+
for sans_file in sans_files:
53+
if _is_sample_transmission_file(sans_file=sans_file, sample_title=sample_title):
54+
return sans_file
55+
return None
56+
57+
58+
def _find_direct_file(sans_files: list[SansFileData]) -> SansFileData | None:
59+
reversed_files = reversed(sans_files)
60+
for sans_file in reversed_files:
61+
if _is_sample_direct_file(sans_file=sans_file):
62+
return sans_file
63+
return None
64+
65+
66+
def _find_can_scatter_file(sans_files: list[SansFileData], can_title: str) -> SansFileData | None:
67+
for sans_file in sans_files:
68+
if _is_can_scatter_file(sans_file=sans_file, can_title=can_title):
69+
return sans_file
70+
return None
71+
72+
73+
def _find_can_trans_file(sans_files: list[SansFileData], can_title: str) -> SansFileData | None:
74+
for sans_file in sans_files:
75+
if _is_can_transmission_file(sans_file=sans_file, can_title=can_title):
76+
return sans_file
77+
return None
78+
79+
80+
def find_path_for_run_number(cycle_path: str, run_number: int) -> Path | None:
81+
# 10 is just a magic number, but we needed an unrealistic value for the maximum
82+
for padding in range(11):
83+
potential_path = Path(f"{cycle_path}/LOQ{str(run_number).zfill(padding)}.nxs")
84+
if potential_path.exists():
85+
return potential_path
86+
return None
87+
88+
89+
def grab_cycle_instrument_index(cycle: str) -> str:
90+
_, cycle_year, cycle_num = cycle.split("_")
91+
url = f"http://data.isis.rl.ac.uk/journals/ndxloq/journal_{cycle_year}_{cycle_num}.xml"
92+
return requests.get(url, timeout=5).text
93+
94+
95+
def create_list_of_files(job_request: JobRequest) -> list[SansFileData]:
96+
cycle = job_request.additional_values["cycle_string"]
97+
xml = grab_cycle_instrument_index(cycle=cycle)
98+
cycle_run_info = xmltodict.parse(xml)
99+
list_of_files = []
100+
for run_info in cycle_run_info["NXroot"]["NXentry"]:
101+
title_contents = run_info["title"]["#text"].split("_")
102+
run_number = run_info["run_number"]["#text"]
103+
if len(title_contents) in {2, 3}:
104+
file_type = title_contents[-1]
105+
else:
106+
job_request.will_reduce = False
107+
logger.error(f"Run {run_info} either doesn't contain a _ or is not an expected experiment title format.")
108+
return []
109+
list_of_files.append(SansFileData(title=run_info["title"]["#text"], type=file_type, run_number=run_number))
110+
return list_of_files
111+
112+
113+
def strip_excess_files(sans_files: list[SansFileData], scatter_run_number: int) -> list[SansFileData]:
114+
new_list_of_files: list[SansFileData] = []
115+
for sans_file in sans_files:
116+
if int(sans_file.run_number) >= scatter_run_number:
117+
return new_list_of_files
118+
new_list_of_files.append(sans_file)
119+
return new_list_of_files
120+
121+
122+
class LoqFindFiles(Rule[bool]):
123+
def verify(self, job_request: JobRequest) -> None:
124+
# Expecting 3 values
125+
title_parts = job_request.experiment_title.split("_")
126+
if len(title_parts) != 3: # noqa: PLR2004
127+
job_request.will_reduce = False
128+
logger.error(
129+
f"Less or more than 3 sections to the experiment_title, probably missing Can Scatter title: "
130+
f"{job_request.experiment_title}"
131+
)
132+
return
133+
sample_title, can_title, ___ = title_parts
134+
sans_files = create_list_of_files(job_request)
135+
if sans_files == []:
136+
job_request.will_reduce = False
137+
logger.error("No files found for this cycle excluding this run.")
138+
return
139+
sans_files = strip_excess_files(sans_files, scatter_run_number=job_request.run_number)
140+
141+
job_request.additional_values["run_number"] = job_request.run_number
142+
143+
trans_file = _find_trans_file(sans_files=sans_files, sample_title=sample_title)
144+
if trans_file is not None:
145+
job_request.additional_values["scatter_transmission"] = trans_file.run_number
146+
147+
can_scatter = _find_can_scatter_file(sans_files=sans_files, can_title=can_title)
148+
if can_scatter is not None:
149+
job_request.additional_values["can_scatter"] = can_scatter.run_number
150+
151+
can_trans = _find_can_trans_file(sans_files=sans_files, can_title=can_title)
152+
if can_trans is not None and can_scatter is not None:
153+
job_request.additional_values["can_transmission"] = can_trans.run_number
154+
155+
direct_file = _find_direct_file(sans_files=sans_files)
156+
if direct_file is not None:
157+
if trans_file is not None:
158+
job_request.additional_values["scatter_direct"] = direct_file.run_number
159+
if can_scatter is not None and can_trans is not None:
160+
job_request.additional_values["can_direct"] = direct_file.run_number
161+
162+
163+
class LoqUserFile(Rule[str]):
164+
def verify(self, job_request: JobRequest) -> None:
165+
job_request.additional_values["user_file"] = f"/extras/loq/{self._value}"

rundetection/run_detection.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,12 @@ def process_notifications(notification_queue: SimpleQueue[JobRequest]) -> None:
130130
:param notification_queue: The notification queue
131131
:return: None
132132
"""
133-
logger.info("Checking notification queue...")
134133
while not notification_queue.empty():
135134
detected_run = notification_queue.get()
136135
logger.info("Sending notification for run: %s", detected_run.run_number)
137136

138137
with producer() as channel:
139138
channel.basic_publish(EGRESS_QUEUE_NAME, "", detected_run.to_json_string().encode())
140-
logger.info("Notification queue empty. Continuing...")
141139

142140

143141
def write_readiness_probe_file() -> None:
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
{
2-
"enabled": false
2+
"enabled": true,
3+
"checkifscattersans": true,
4+
"loqfindfiles": true,
5+
"loquserfile": "USER_LOQ_243B_M3_Changer_Xpress_Okesola__MERGED_log.toml"
36
}

rundetection/specifications/osiris_specification.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"enabled": true,
3-
"osiriscalibfilesandreflection": {"002": "00148587", "004": "00148587"},
3+
"osiriscalibfilesandreflection": {"002": "00149059", "004": "00149060"},
44
"osirisreductionmode": false,
55
"osirisdefaultspectroscopy": true,
66
"osirisdefaultgraniteanalyser": true,

test/ingestion/test_extracts.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from rundetection.ingestion.extracts import (
1212
get_cycle_string_from_path,
1313
get_extraction_function,
14+
loq_extract,
1415
mari_extract,
1516
osiris_extract,
1617
skip_extract,
@@ -59,6 +60,7 @@ def test_skip_extract(caplog: LogCaptureFixture):
5960
("mari", "mari_extract"),
6061
("tosca", "tosca_extract"),
6162
("osiris", "osiris_extract"),
63+
("loq", "loq_extract"),
6264
],
6365
)
6466
def test_get_extraction_function(input_value, expected_function_name):
@@ -236,6 +238,25 @@ def test_osiris_extract_raises_on_bad_frequencies(job_request):
236238
osiris_extract(job_request, dataset)
237239

238240

241+
def test_loq_extract(job_request):
242+
dataset = {
243+
"sample": {
244+
"thickness": 1.0,
245+
"shape": "Disc",
246+
"height": 8.0,
247+
"width": 8.0,
248+
}
249+
}
250+
with patch("rundetection.ingestion.extracts.get_cycle_string_from_path", return_value="some string"):
251+
loq_extract(job_request, dataset)
252+
253+
assert job_request.additional_values["cycle_string"] == "some string"
254+
assert job_request.additional_values["sample_thickness"] == 1.0
255+
assert job_request.additional_values["sample_geometry"] == "Disc"
256+
assert job_request.additional_values["sample_height"] == 8.0 # noqa: PLR2004
257+
assert job_request.additional_values["sample_width"] == 8.0 # noqa: PLR2004
258+
259+
239260
def test_get_cycle_string_from_path_valid():
240261
"""
241262
Test get cycle string returns correct string

0 commit comments

Comments
 (0)