1414import obspyh5
1515from obspyh5 import dataset2trace , is_obspyh5 , trace2group
1616from os .path import splitext
17-
17+ from typing import DefaultDict
1818from seismic .units_utils import KM_PER_DEG
1919from rf .rfstream import rfstats , obj2stats
2020from collections import defaultdict
21- from pandas import DataFrame
2221# pylint: disable=invalid-name
2322
2423
3433
3534def safe_iter_event_data (events , inventory , get_waveforms , use_rfstats = True , phase = 'P' ,
3635 request_window = None , pad = 10 , pbar = None ,
37- status :DataFrame = None , ** kwargs ):
36+ status : DefaultDict [ str , int ] = None , log = None , ** kwargs ):
3837 """
3938 Return iterator yielding three component streams per station and event.
4039
@@ -48,7 +47,8 @@ def safe_iter_event_data(events, inventory, get_waveforms, use_rfstats=True, pha
4847 :param request_window: requested time window around the onset of the phase
4948 :param float pad: padding in seconds around request window
5049 :param pbar: tqdm_ instance for displaying a progressbar
51- :param status: an empty pandas DataFrame for retrieving statistics
50+ :param status: a defaultdict for retrieving statistics
51+ :param log: a python logging instance
5252 :param kwargs: all other kwargs are passed to `~rf.rfstream.rfstats()`
5353
5454 :return: three component streams with raw data
@@ -82,19 +82,18 @@ def _get_stations(inventory):
8282 pbar .total = len (events ) * len (stations )
8383 # end if
8484
85+ fmt = "{:<15} {:<30} {:>9} {:>9} {:>7} {:>5} {:>15}"
86+ if (log is not None ):
87+ log .info (fmt .format ("seed_id" , "origin_time" , "lon" , "lat" , "depth" , "mag" , "status" )+ '\n ' ,
88+ extra = {'simple' : True })
89+ # end if
8590 for i , (event , seedid ) in enumerate (itertools .product (events , stations )):
8691 if pbar is not None : pbar .update (1 )
8792 origin = (event .preferred_origin () or event .origins [0 ])
8893 magnitude = (event .preferred_magnitude () or event .magnitudes [0 ])
8994 origin_time , elon , elat , edepth , eMw = origin ['time' ], origin ['longitude' ], \
9095 origin ['latitude' ], origin ['depth' ], magnitude .mag
91- row_items = [seedid , origin_time , elon , elat , edepth / 1e3 , eMw ]
92-
93- # initialize status data-frame
94- if (i == 0 and status is not None ):
95- cols = ['seed_id' , 'origin_time' , 'lon' , 'lat' , 'depth' , 'magnitude' , 'status' ]
96- for col in cols : status [col ] = None
97- # end if
96+ row_items = [seedid , origin_time .strftime ('%Y-%m-%dT%H:%M:%S.%f' ), elon , elat , edepth / 1e3 , eMw ]
9897
9998 try :
10099 # exclude datetime from call to get_coordinates to ensure incorrect
@@ -103,7 +102,7 @@ def _get_stations(inventory):
103102 args = (seedid [:- 1 ] + stations [seedid ], None )
104103 coords = inventory .get_coordinates (* args )
105104 except Exception : # station not available at that time
106- if (status is not None ): status . loc [ i ] = [ * row_items , 'Invalid inventory' ]
105+ if (log is not None ): log . info ( fmt . format ( * row_items , 'Invalid inventory' ), extra = { 'simple' : True })
107106 continue
108107 # end try
109108
@@ -115,11 +114,11 @@ def _get_stations(inventory):
115114 from warnings import warn
116115 warn ('Error "%s" in rfstats call for event %s, station %s.'
117116 % (exception , event .resource_id , seedid ))
118- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Invalid rfstats' ]
117+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Invalid rfstats' ), extra = { 'simple' : True })
119118 continue
120119 # end try
121120 if not stats :
122- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Invalid rfstats' ]
121+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Invalid rfstats' ), extra = { 'simple' : True })
123122 continue
124123 # end if
125124 # end if
@@ -146,11 +145,11 @@ def _get_stations(inventory):
146145 stream .merge ()
147146
148147 if (len (stream ) == 0 ):
149- if ( status is not None ): status . loc [ i ] = [ * row_items , 'No data' ]
148+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'No data' ), extra = { 'simple' : True })
150149 continue
151150 # end if
152151 except Exception : # no data available
153- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Bad data' ]
152+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'No data' ), extra = { 'simple' : True })
154153 continue
155154 # end try
156155
@@ -178,7 +177,7 @@ def _get_stations(inventory):
178177 warn ('Need 3 component seismograms. %d components '
179178 'detected for event %s, station %s.'
180179 % (len (stream ), event .resource_id , seedid ))
181- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Missing components' ]
180+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Missing components' ), extra = { 'simple' : True })
182181 continue
183182 # end if
184183
@@ -200,7 +199,7 @@ def has_masked_values(data_stream):
200199 from warnings import warn
201200 warn ('Gaps or overlaps detected for event %s, station %s.'
202201 % (event .resource_id , seedid ))
203- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Patchy data' ]
202+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Patchy data' ), extra = { 'simple' : True })
204203 continue
205204 else :
206205 for tr in stream : tr .data = np .array (tr .data )
@@ -211,7 +210,8 @@ def has_masked_values(data_stream):
211210 tr .stats .update (stats )
212211 # end for
213212
214- if (status is not None ): status .loc [i ] = [* row_items , 'Good data' ]
213+ if (log is not None ): log .info (fmt .format (* row_items , 'Good data' ), extra = {'simple' : True })
214+ if (status is not None ): status [seedid ] += 1
215215 yield RFStream (stream )
216216 # end for
217217# end func
0 commit comments