-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft: Introduce REDUCTION and ERRATA file sections #473
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,6 +146,8 @@ def __init__(self, filename, _cache_info=None): | |
self.train_ids = _cache_info['train_ids'] | ||
self.control_sources = _cache_info['control_sources'] | ||
self.instrument_sources = _cache_info['instrument_sources'] | ||
self.reduction_data = _cache_info['reduction_data'] | ||
self.errata = _cache_info['errata'] | ||
self.validity_flag = _cache_info.get('flag', None) | ||
else: | ||
try: | ||
|
@@ -155,7 +157,8 @@ def __init__(self, filename, _cache_info=None): | |
|
||
self.train_ids = tid_data[tid_data != 0] | ||
|
||
self.control_sources, self.instrument_sources = self._read_data_sources() | ||
(self.control_sources, self.instrument_sources, | ||
self.reduction_data, self.errata) = self._read_data_sources() | ||
|
||
self.validity_flag = None | ||
|
||
|
@@ -295,6 +298,7 @@ def format_version(self): | |
|
||
def _read_data_sources(self): | ||
control_sources, instrument_sources = set(), set() | ||
reduction_info, errata = set(), set() | ||
|
||
# The list of data sources moved in file format 1.0 | ||
if self.format_version == '0.5': | ||
|
@@ -320,14 +324,19 @@ def _read_data_sources(self): | |
# TODO: Do something with groups? | ||
elif category == 'CONTROL': | ||
control_sources.add(h5_source) | ||
elif category == 'REDUCTION': | ||
reduction_info.add(h5_source) | ||
elif category == 'ERRATA': | ||
errata.add(h5_source) | ||
elif category == 'Karabo_TimerServer': | ||
# Ignore virtual data source used only in file format | ||
# version 1.1 / pclayer-1.10.3-2.10.5. | ||
pass | ||
else: | ||
raise ValueError("Unknown data category %r" % category) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this bit is probably the big sore point for this proposal - pretty much all prior versions of EXtra-data (or even Since this only affects the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I don't see any way around it though, except if we add below METADATA after all. Generally one should possibly make this less restrictive in my view, as it could hit us again in the future. |
||
|
||
return frozenset(control_sources), frozenset(instrument_sources) | ||
return (frozenset(control_sources), frozenset(instrument_sources), | ||
frozenset(reduction_info), frozenset(errata)) | ||
|
||
def _guess_valid_trains(self): | ||
# File format version 1.0 includes a flag which is 0 if a train ID | ||
|
@@ -418,6 +427,10 @@ def index_groups(self, source): | |
return {''} | ||
elif source in self.instrument_sources: | ||
return set(self.file[f'/INDEX/{source}'].keys()) | ||
elif source in self.reduction_data: | ||
return set(self.file[f'/INDEX/{source}'].keys()) | ||
elif source in self.errata: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand from our example that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's nothing concrete implemented in the DAQ, but some form of source identification is required. Think of a train that came outside the buffer range, and couldn't be store. You'd want to know what the data was, so it would be prefixed by the source in the path. Also technically all this additionally data is FAST data for the DAQ - i.e. it will be stored only if a train contains it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Certainly, my question was indeed more concrete in how you would pick the source name. Say Furthermore I had a look into your example file for data reduction. There are
with a correspondig entry in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So in the reduction case, I prefix all source names with Hence, for what you observe in the index section:
|
||
return set(self.file[f'/INDEX/{source}'].keys()) | ||
else: | ||
raise SourceNameError(source) | ||
|
||
|
@@ -454,6 +467,10 @@ def get_keys(self, source): | |
group = '/CONTROL/' + source | ||
elif source in self.instrument_sources: | ||
group = '/INSTRUMENT/' + source | ||
elif source in self.reduction_data: | ||
group = '/REDUCTION/' + source | ||
elif source in self.errata: | ||
group = '/ERRATA/' + source | ||
else: | ||
raise SourceNameError(source) | ||
|
||
|
@@ -478,6 +495,10 @@ def get_one_key(self, source): | |
group = '/CONTROL/' + source | ||
elif source in self.instrument_sources: | ||
group = '/INSTRUMENT/' + source | ||
elif source in self.reduction_data: | ||
group = '/REDUCTION/' + source | ||
elif source in self.errata: | ||
group = '/ERRATA/' + source | ||
else: | ||
raise SourceNameError(source) | ||
|
||
|
@@ -527,6 +548,10 @@ def has_source_key(self, source, key): | |
path = '/CONTROL/{}/{}'.format(source, key.replace('.', '/')) | ||
elif source in self.instrument_sources: | ||
path = '/INSTRUMENT/{}/{}'.format(source, key.replace('.', '/')) | ||
elif source in self.reduction_data: | ||
path = '/REDUCTION/{}/{}'.format(source, key.replace('.', '/')) | ||
elif source in self.errata: | ||
path = '/ERRATA/{}/{}'.format(source, key.replace('.', '/')) | ||
else: | ||
raise SourceNameError(source) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should either not be cached at all or use conditional access like
validity_flag
further below. Since it's only auxiliary information and may often be comparably small, it may be fine to always generate it on demand. If this changes in the future, we can add machinery to augment existing indices, or simply re-create them.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense. I'll remove from caching.