From 623390b493eb63cbe03f1d7f299c6777d5bfe1e4 Mon Sep 17 00:00:00 2001 From: Rakib Hassan Date: Wed, 29 Jan 2025 18:20:50 +1100 Subject: [PATCH] Miscellaneous Changes * Extended response-factory to be able to retrieve responses from a database * Minor cleanups in associated files --- .../analytics/station_analytics.py | 5 +- seismic/ASDFdatabase/analytics/utils.py | 2 +- seismic/ASDFdatabase/analytics/wa.py | 7 +- seismic/ASDFdatabase/analytics/wa_asdf.py | 10 +- seismic/ASDFdatabase/utils.py | 23 +-- seismic/inventory/fdsnxml_convert.py | 2 +- seismic/inventory/response.py | 189 ++++++++++++------ .../ASDFdatabase/test_waveform_analytics.py | 29 +-- 8 files changed, 154 insertions(+), 113 deletions(-) diff --git a/seismic/ASDFdatabase/analytics/station_analytics.py b/seismic/ASDFdatabase/analytics/station_analytics.py index f0ce0634..53042ec9 100644 --- a/seismic/ASDFdatabase/analytics/station_analytics.py +++ b/seismic/ASDFdatabase/analytics/station_analytics.py @@ -140,7 +140,7 @@ def get_period_details(self, sampling_rate: float): class ResponseAmplitudesCache(): class ResponseAmplitudes(): - def __init__(self, sampling_rate:float, response:Response): + def __init__(self, sampling_rate: float, response: Response): self.response = response self.sampling_rate = sampling_rate @@ -208,8 +208,7 @@ def analyse_data(self, network: str, station: str, location: str, - channel: str, - sampling_rate: int): + channel: str): # collate timespans to be allocated to each parallel process st, et = self.get_time_range_func(network, diff --git a/seismic/ASDFdatabase/analytics/utils.py b/seismic/ASDFdatabase/analytics/utils.py index b46cf5f2..6b683f96 100644 --- a/seismic/ASDFdatabase/analytics/utils.py +++ b/seismic/ASDFdatabase/analytics/utils.py @@ -76,7 +76,7 @@ def get_response(input_file, network=None, station=None, location=None, channel= if(resp_inv is not None): rf = ResponseFactory() - rf.CreateFromInventory(resp_name, resp_inv) + rf.createFromInventory(resp_name, resp_inv) result = rf.getResponse(resp_name) # end if diff --git a/seismic/ASDFdatabase/analytics/wa.py b/seismic/ASDFdatabase/analytics/wa.py index ce162ae4..89d435dc 100644 --- a/seismic/ASDFdatabase/analytics/wa.py +++ b/seismic/ASDFdatabase/analytics/wa.py @@ -184,7 +184,6 @@ def get_time_range_func(net, sta, loc, cha): # account for the possibility of each combination of net, sta, loc, cha to have # recordings under different sampling rates meta_dict = select_channel(mseed_index, sd, ed) - sr_dict = mseed_index.get_channel_sampling_rates() # instantiate progress tracker manager = Manager() @@ -199,11 +198,7 @@ def get_time_range_func(net, sta, loc, cha): """.format(cha)) for meta in meta_list: net, sta, loc, cha = meta - - nslc = '.'.join(meta) - sampling_rate = sr_dict[nslc] - - sa.analyse_data(net, sta, loc, cha, sampling_rate) + sa.analyse_data(net, sta, loc, cha) # end for ofn = os.path.join(output_folder, '{}.pdf'.format(cha)) diff --git a/seismic/ASDFdatabase/analytics/wa_asdf.py b/seismic/ASDFdatabase/analytics/wa_asdf.py index 09cdab6c..ecdbe694 100644 --- a/seismic/ASDFdatabase/analytics/wa_asdf.py +++ b/seismic/ASDFdatabase/analytics/wa_asdf.py @@ -30,22 +30,18 @@ type=str) @click.argument('station', required=True, type=str) -@click.argument('location', required=True, - type=str) @click.argument('channel', required=True, type=str) -@click.argument('instrument-response', required=True, +@click.argument('response-database', required=True, type=click.Path(exists=True)) -@click.argument('sampling-rate', required=True, - type=int) @click.argument('output-folder', required=True, type=click.Path(exists=True)) @click.option('--start-date', type=str, default=None, show_default=True, help="Start date in UTC format for processing data") @click.option('--end-date', type=str, default=None, show_default=True, help="End date in UTC format for processing data") -def process_asdf(asdf_source, network, station, location, channel, instrument_response, - sampling_rate, output_folder, start_date, end_date): +def process_asdf(asdf_source, network, station, channel, response_database, + output_folder, start_date, end_date): """ ASDF_SOURCE: Path to text file containing paths to ASDF files\n NETWORK: network code diff --git a/seismic/ASDFdatabase/utils.py b/seismic/ASDFdatabase/utils.py index 6ed75519..832c59ba 100644 --- a/seismic/ASDFdatabase/utils.py +++ b/seismic/ASDFdatabase/utils.py @@ -186,7 +186,6 @@ def __init__(self, mseed_folder, pattern): self.tree = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))) self.stream_cache = MseedIndex.StreamCache() self.mseed_files = np.array(sorted(glob(os.path.join(self.mseed_folder, pattern)))) - self.sr_dict = defaultdict(list) # sampling rates for each nslc combination self.coverage_dict = defaultdict(float) # dict keyed by nslc, with coverage as values fc = len(self.mseed_files) @@ -211,10 +210,10 @@ def __init__(self, mseed_folder, pattern): # end try for tr in st: - nc, sc, lc, cc, st, et, sr = \ + nc, sc, lc, cc, st, et = \ tr.stats.network, tr.stats.station, tr.stats.location, \ tr.stats.channel, tr.stats.starttime.timestamp, \ - tr.stats.endtime.timestamp, tr.stats.sampling_rate + tr.stats.endtime.timestamp # skip bogus traces if(nc == sc == lc == cc == ''): continue @@ -223,25 +222,11 @@ def __init__(self, mseed_folder, pattern): nslc = '.'.join((nc, sc, lc, cc)) # store coverage for unique channels and sampling rates cov = float(et - st) # coverage in seconds - self.sr_dict[nslc].append(sr) self.coverage_dict[nslc] += cov # end for # if (i > 0): break # end for - # take median of sampling rates found - for k, v in self.sr_dict.items(): - vals = np.array(self.sr_dict[k]) - - unique_sr = set(vals) - chosen_sr = np.median(vals) - if(len(unique_sr) > 1): - print('Multiple sampling rates ({}) found under {}. ' - 'Selecting median sampling rate ({}).'.format(unique_sr, k, chosen_sr)) - # end if - self.sr_dict[k] = chosen_sr - # end for - print('\nCreating metadata index for a total of {} traces found..'.format(len(self.meta_list))) for row in tqdm(self.meta_list): @@ -254,10 +239,6 @@ def __init__(self, mseed_folder, pattern): # end for # end func - def get_channel_sampling_rates(self) -> defaultdict: - return self.sr_dict - # end func - def get_channel_coverages(self) -> defaultdict: return self.coverage_dict # end func diff --git a/seismic/inventory/fdsnxml_convert.py b/seismic/inventory/fdsnxml_convert.py index bdd0d2b0..a3d2b116 100644 --- a/seismic/inventory/fdsnxml_convert.py +++ b/seismic/inventory/fdsnxml_convert.py @@ -52,7 +52,7 @@ def toSc3ml(src_path, dst_path, response_fdsnxml=None): response = None if response_fdsnxml is not None: rf = ResponseFactory() - rf.CreateFromStationXML('resp', response_fdsnxml) + rf.createFromStationXML('resp', response_fdsnxml) response = rf.getResponse('resp') if os.path.isfile(src_path): diff --git a/seismic/inventory/response.py b/seismic/inventory/response.py index 2227f7a5..67c545f6 100644 --- a/seismic/inventory/response.py +++ b/seismic/inventory/response.py @@ -20,33 +20,64 @@ from obspy.core import UTCDateTime from obspy import read_inventory - +import sqlite3 +import atexit +from io import BytesIO +from types import SimpleNamespace class ResponseFactory: """ - The ResponseFactory class encapsulates the generation of a collection of named Instrument Response Objects from a - variety of sources. Currently it provides the facility to create Response objects from two sources, namely, - Poles and Zeroes supplied by the user and from StationXML files generated from RESP files using the PDCC tool - (link below). The conversion of RESP files into a corresponding StationXML file, at this stage, must take place - externally, because ObsPy lacks that functionality. - The intended usage of this class during the creation of an ASDF dataset is as follows:: - - 1. User creates a number of uniquely named Response objects (see associated tests as well) pertaining to different - channels in a given survey. - 2. User fetches these Response objects from an instance of ResponseFactory as needed, while creating ObsPy Channel - objects, during which Response objects can be passed in as an argument. - 3. User builds a hierarchy of channel->station->network inventories, with the appropriate instrument response - information embedded - 4. The master FDSN StaionXML file output after step 3 can then be converted into an SC3ML file (which can be - ingested by SeisComp3) using the fdsnxml2inv tool. - - PDCC tool: https://ds.iris.edu/ds/nodes/dmc/software/downloads/pdcc/ + The ResponseFactory class encapsulates the loading and retrieval of Instrument Response Objects from a + variety of sources: + i) from a stationXML file or obspy.core.Inventory containing a single Response Object + ii) from Poles and Zeros + iii) from a database, in which each row identified by net, sta, cha contains a response-level inventory """ def __init__(self): - self.m_responseInventory = defaultdict(list) + self.response_cache = defaultdict(list) + self.db_source = None # end func + class ResponseFromDB(object): + def __init__(self, db_fn): + try: + # Connect to SQLite database + self.conn = sqlite3.connect(db_fn) + self.cursor = self.conn.cursor() + + except sqlite3.Error as e: + print("An error occurred: {}".format(e)) + # end try + + atexit.register(lambda: self.conn.close()) + # end func + + def getResponse(self, net, sta, loc, cha): + try: + q = "select sta_xml from responses where net='{}' and sta='{}' and cha='{}';".format(net, sta, cha) + self.cursor.execute(q) + + row = self.cursor.fetchall() + sta_xml = row[0][0] + inv = read_inventory(BytesIO(sta_xml)) + + # select desired location + inv = inv.select(network=net, station=sta, location=loc, channel=cha) + if(len(inv)): + try: + return inv.networks[0].stations[0].channels[0].response + except: + return None + # end try + # end func + except sqlite3.Error as e: + print("An error occurred: {}".format(e)) + # end try + return None + # end func + # end class + class ResponseFromInventory(object): """Helper class to get Obspy Response object from an Inventory @@ -58,8 +89,8 @@ def __init__(self, source_inventory): :param source_inventory: Inventory from which to extract response :type source_inventory: obspy.core.inventory.inventory.Inventory """ - self.m_inventory = source_inventory - self.m_response = None + self.inventory = source_inventory + self.response = None self._get_response_from_inventory() #end func @@ -69,8 +100,8 @@ def _get_response_from_inventory(self): c = None found = 0 # Extract network, station and channel codes - if self.m_inventory.networks: - n = self.m_inventory.networks[0] + if self.inventory.networks: + n = self.inventory.networks[0] found += 1 if n.stations: s = n.stations[0] @@ -86,8 +117,8 @@ def _get_response_from_inventory(self): msg = 'Network, station or channel information missing in RESP file.' raise RuntimeError(msg) else: - seedid = self.m_inventory.get_contents()['channels'][0] - self.m_response = self.m_inventory.get_response(seedid, c.start_date) + seedid = self.inventory.get_contents()['channels'][0] + self.response = self.inventory.get_response(seedid, c.start_date) # end if #end func # end class @@ -105,6 +136,19 @@ def __init__(self, respFileName): super(ResponseFactory.ResponseFromStationXML, self).__init__(xml_inventory) # end class + class ResponseFromResp(ResponseFromInventory): + """Helper class to get Obspy Response object from a station xml file + """ + def __init__(self, respFileName): + """Constructor + + :param respFileName: XML file to load + :type respFileName: str + """ + resp_inventory = read_inventory(respFileName) + super(ResponseFactory.ResponseFromResp, self).__init__(resp_inventory) + # end class + class ResponseFromPAZ: def __init__(self, pzTransferFunctionType='LAPLACE (RADIANS/SECOND)', normFactor=8e4, @@ -113,7 +157,7 @@ def __init__(self, pzTransferFunctionType='LAPLACE (RADIANS/SECOND)', stageGainFreq=1e-2, poles=[0 + 0j], zeros=[0 + 0j]): - self.m_base = ''' + self.base = ''' -i @@ -205,34 +249,34 @@ def __init__(self, pzTransferFunctionType='LAPLACE (RADIANS/SECOND)', ''' - self.m_response = None - self.m_pzTransferFunctionType = pzTransferFunctionType - self.m_normFactor = normFactor - self.m_normFreq = normFreq - self.m_stageGain = stageGain - self.m_stageGainFreq = stageGainFreq - self.m_poles = poles - self.m_zeros = zeros + self.response = None + self.pzTransferFunctionType = pzTransferFunctionType + self.normFactor = normFactor + self.normFreq = normFreq + self.stageGain = stageGain + self.stageGainFreq = stageGainFreq + self.poles = poles + self.zeros = zeros # Generate a valid response inventory based on the fdsnstationxml file, which # was downloaded from IRIS as an example. - inv = read_inventory(StringIO(self.m_base)) + inv = read_inventory(StringIO(self.base)) datetime = UTCDateTime("2002-11-19T21:07:00.000") # Fetch the response object and adapt its parameters based on user-input. - self.m_response = inv.get_response('IU.ANMO.00.BHZ', datetime) - - self.m_response.response_stages[0].pz_transfer_function_type = self.m_pzTransferFunctionType - self.m_response.response_stages[0].normalization_factor = self.m_normFactor - self.m_response.response_stages[0].normalization_frequency = self.m_normFreq - self.m_response.response_stages[0].stage_gain = self.m_stageGain - self.m_response.response_stages[0].stage_gain_frequency = self.m_stageGainFreq - self.m_response.response_stages[0].poles = poles - self.m_response.response_stages[0].zeros = zeros + self.response = inv.get_response('IU.ANMO.00.BHZ', datetime) + + self.response.response_stages[0].pz_transfer_function_type = self.pzTransferFunctionType + self.response.response_stages[0].normalization_factor = self.normFactor + self.response.response_stages[0].normalization_frequency = self.normFreq + self.response.response_stages[0].stage_gain = self.stageGain + self.response.response_stages[0].stage_gain_frequency = self.stageGainFreq + self.response.response_stages[0].poles = poles + self.response.response_stages[0].zeros = zeros # end func # end class - def CreateFromInventory(self, name, obspy_inventory): + def createFromInventory(self, name, obspy_inventory): """Create response from an Inventory :param name: Name of the response for later retrieval @@ -240,21 +284,32 @@ def CreateFromInventory(self, name, obspy_inventory): :param obspy_inventory: Inventory from which to extract response :type obspy_inventory: obspy.core.inventory.inventory.Inventory """ - self.m_responseInventory[name] = ResponseFactory.ResponseFromInventory(obspy_inventory) + self.response_cache[name] = ResponseFactory.ResponseFromInventory(obspy_inventory) # end func - def CreateFromStationXML(self, name, respFileName): + def createFromStationXML(self, name, staionXMLFileName): """Create response from an XML file :param name: Name of the response for later retrieval :type name: str - :param respFileName: XML file to load + :param staionXMLFileName: XML file to load + :type staionXMLFileName: str + """ + self.response_cache[name] = ResponseFactory.ResponseFromStationXML(staionXMLFileName) + # end func + + def createFromRespFile(self, name, respFileName): + """Create response from an Resp file + + :param name: Name of the response for later retrieval + :type name: str + :param respFileName: Resp file to load :type respFileName: str """ - self.m_responseInventory[name] = ResponseFactory.ResponseFromStationXML(respFileName) + self.response_cache[name] = ResponseFactory.ResponseFromStationXML(respFileName) # end func - def CreateFromPAZ(self, name, pzTransferFunctionType, + def createFromPAZ(self, name, pzTransferFunctionType, normFactor, normFreq, stageGain, @@ -262,27 +317,39 @@ def CreateFromPAZ(self, name, pzTransferFunctionType, poles, zeros): - self.m_responseInventory[name] = ResponseFactory.ResponseFromPAZ(pzTransferFunctionType, - normFactor, - normFreq, - stageGain, - stageGainFreq, - poles, - zeros) - + self.response_cache[name] = ResponseFactory.ResponseFromPAZ(pzTransferFunctionType, + normFactor, + normFreq, + stageGain, + stageGainFreq, + poles, + zeros) #end func + def createFromDB(self, db_fn): + self.db_source = self.ResponseFromDB(db_fn) + # end func + def getResponse(self, name): """Retrieve response by name - :param name: Name given to response at creation time + :param name: Name given to response at creation time, or a string containing + 'network.station.location.channel' to query a database source, if it exists :type name: str :raises RuntimeError: Raises error if name is not recognized :return: The requested response :rtype: obspy.core.inventory.response.Response """ - if (name in self.m_responseInventory.keys()): - return self.m_responseInventory[name].m_response + if (name in self.response_cache.keys()): + return self.response_cache[name].response + elif(self.db_source): + # try breaking down name into net, sta, loc, cha + net, sta, loc, cha = name.split('.') + resp = self.db_source.getResponse(net, sta, loc, cha) + + if(resp): self.response_cache[name] = SimpleNamespace(**{'response': resp}) + + return resp else: msg = "Response with name: %s not found.." % (name) raise RuntimeError(msg) diff --git a/tests/test_seismic/ASDFdatabase/test_waveform_analytics.py b/tests/test_seismic/ASDFdatabase/test_waveform_analytics.py index f6efd306..86dbf6b2 100644 --- a/tests/test_seismic/ASDFdatabase/test_waveform_analytics.py +++ b/tests/test_seismic/ASDFdatabase/test_waveform_analytics.py @@ -19,7 +19,7 @@ from obspy.core import Trace, Stream from obspy.signal.spectral_estimation import PPSD from obspy import UTCDateTime -from seismic.ASDFdatabase.analytics.station_analytics import StationAnalytics +from seismic.ASDFdatabase.analytics.station_analytics import StationAnalytics, PeriodDetailsCache from seismic.inventory.response import ResponseFactory from shutil import rmtree from scipy.interpolate import interp1d @@ -53,14 +53,16 @@ def get_waveforms_func(net, sta, loc, cha, st, et): # create a flat response rf = ResponseFactory() - rf.CreateFromPAZ('flat_response', 'LAPLACE (RADIANS/SECOND)', normFactor=1, - normFreq=1, - stageGain=1, - stageGainFreq=1, - poles=[0 + 1j], - zeros=[0 + 1j]) + rf.createFromPAZ('flat_response', 'LAPLACE (RADIANS/SECOND)', normFactor=1, + normFreq=1, + stageGain=1, + stageGainFreq=1, + poles=[0 + 1j], + zeros=[0 + 1j]) resp = rf.getResponse('flat_response') + pdc = PeriodDetailsCache() + pd = pdc.get_period_details(sampling_rate) # generate spectrum for fast_ppsd sa = StationAnalytics(get_time_range_func, get_waveforms_func, @@ -71,14 +73,15 @@ def get_waveforms_func(net, sta, loc, cha, st, et): end_time = None, nproc=1) - sa.analyse_data(network, station, location, channel, sampling_rate) + sa.analyse_data(network, station, location, channel) # create an interpolation object for the spectrum from fast_ppsd fast_ppsd_io = None for start_time, end_time in zip(sa.st_list, sa.et_list): - output_fn_stem = '{}.{}.{}.{}.{}.{}'.format(network, + output_fn_stem = '{}.{}.{}.{}.{}.{}.{}'.format(network, station, location, channel, + sampling_rate, start_time, end_time) @@ -88,7 +91,7 @@ def get_waveforms_func(net, sta, loc, cha, st, et): results = np.load(output_fn_npz) spec = results['sparse_spec'] - fast_ppsd_io = interp1d(sa.sparse_periods, spec) + fast_ppsd_io = interp1d(pd.sparse_periods, spec) # end for # generate spectrum using obspy ppsdf, with a flat response @@ -104,9 +107,9 @@ def get_waveforms_func(net, sta, loc, cha, st, et): ppsd_io = interp1d(periods, spec) # check conformity of spectra - common_periods = np.linspace(np.max([sa.sparse_periods[0], periods[0]]), - np.min([sa.sparse_periods[-1], periods[-1]]), 100) + common_periods = np.linspace(np.max([pd.sparse_periods[0], periods[0]]), + np.min([pd.sparse_periods[-1], periods[-1]]), 100) corr = np.corrcoef(fast_ppsd_io(common_periods), ppsd_io(common_periods))[0,1] assert corr > 0.9 -# end func \ No newline at end of file +# end func