1
- #!/usr/bin/python
1
+ #!/usr/bin/python3
2
2
3
3
import sys
4
4
import json
@@ -76,32 +76,52 @@ def print(self, count_parent=None):
76
76
val_stat .print (self .count )
77
77
78
78
class DataItemStatisticsCalculator :
79
- def __init__ (self ):
79
+ def __init__ (self , per_source ):
80
+ self ._per_source = per_source
80
81
self .__num_records = 0
81
- self .__statistics = {} # type: Dict[str, DataItemStatistic]
82
+ self .__statistics = {} # type: Dict[(int, int), Dict[ str, DataItemStatistic] ]
82
83
83
84
@property
84
85
def num_records (self ):
85
86
return self .__num_records
86
87
87
88
def process_record (self , cat , record ):
88
89
90
+ sac = None
91
+ sic = None
92
+
93
+ if self ._per_source :
94
+ sac = find_value ('010.SAC' , record )
95
+ sic = find_value ('010.SIC' , record )
96
+
97
+ assert sac is not None and sic is not None
98
+
89
99
self .__num_records += 1
90
100
91
101
cat_str = str (cat ).zfill (3 )
92
102
93
- if cat_str not in self .__statistics :
94
- self .__statistics [cat_str ] = DataItemStatistic (cat_str )
103
+ if (sac , sic ) not in self .__statistics :
104
+ self .__statistics [(sac , sic )] = {}
105
+
106
+ if cat_str not in self .__statistics [(sac , sic )]:
107
+ self .__statistics [(sac , sic )][cat_str ] = DataItemStatistic (cat_str )
95
108
96
- self .__statistics [cat_str ].process_object (record )
109
+ self .__statistics [( sac , sic )][ cat_str ].process_object (record )
97
110
98
111
def print (self ):
99
112
print ('num records {}' .format (self .__num_records ))
100
113
101
- print ('data items' )
102
- for cat , stat in sorted (self .__statistics .items ()):
103
- print ()
104
- stat .print ()
114
+ for (sac , sic ), stat_dict in sorted (self .__statistics .items ()):
115
+
116
+ if self ._per_source :
117
+ print ('data items for {}/{}' .format (sac , sic ))
118
+ else :
119
+ print ('data items' )
120
+
121
+ for cat , stat in sorted (stat_dict .items ()):
122
+ print ()
123
+ stat .print ()
124
+ print ('\n \n ' )
105
125
106
126
107
127
# filter functions return True if record should be skipped
@@ -122,32 +142,32 @@ def filter_cats(cat, record):
122
142
def main (argv ):
123
143
124
144
parser = argparse .ArgumentParser (description = 'ASTERIX data item analysis' )
125
- parser .add_argument ('--framing' , help = 'Framing True or False' , required = True )
145
+ parser .add_argument ('--framing' , help = 'Framing' , default = False , action = 'store_true' , required = False )
126
146
parser .add_argument ('--cats' , help = 'ASTERIX categories to be analyzed as CSV' , required = False )
147
+ parser .add_argument ('--per_source' , help = 'Whether to do analysis per SAC/SIC' , default = False , action = 'store_true' , required = False )
127
148
128
149
args = parser .parse_args ()
129
150
130
- assert args .framing is not None
131
- assert args .framing == 'True' or args .framing == 'False'
132
- framing = args .framing == 'True'
151
+ # assert args.framing is not None
152
+ # assert args.framing == 'True' or args.framing == 'False'
153
+ print ( ' framing {} ' . format ( args .framing ))
133
154
134
155
global cat_list
135
156
if args .cats is not None :
136
157
cat_list = args .cats .split ("," )
137
158
cat_list = [int (i ) for i in cat_list ]
138
159
139
- print ('framing {}' .format (framing ))
140
160
print ('cats {}' .format (cat_list ))
161
+ print ('per-source {} ' .format (args .per_source ))
141
162
142
163
num_blocks = 0
143
164
144
- statistics_calc = DataItemStatisticsCalculator () # type: DataItemStatisticsCalculator
165
+ statistics_calc = DataItemStatisticsCalculator (args . per_source ) # type: DataItemStatisticsCalculator
145
166
146
167
if cat_list is None : # without filtering
147
- record_extractor = RecordExtractor (framing , statistics_calc .process_record ) # type: RecordExtractor
168
+ record_extractor = RecordExtractor (args . framing , statistics_calc .process_record ) # type: RecordExtractor
148
169
else : # with filtering lambda
149
- record_extractor = RecordExtractor (framing , statistics_calc .process_record , filter_cats ) # type: RecordExtractor
150
-
170
+ record_extractor = RecordExtractor (args .framing , statistics_calc .process_record , filter_cats ) # type: RecordExtractor
151
171
152
172
start_time = time .time ()
153
173
0 commit comments