forked from mfacorcoran/ogip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathogip_check_dir.py
346 lines (275 loc) · 15.3 KB
/
ogip_check_dir.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
import os
from ogip_check import ogip_check
from ogip_generic_lib import *
from itertools import chain
from ogip_dictionary import ogip_dictionary
from datetime import datetime
import gc
import inspect
def dict_add(dict,dir,file,statobj):
# If an entry for this directory exists, add an entry for the
# current file to store the retstat info.
if dir not in dict:
dict[dir]={}
dict[dir][file]=statobj
def dict_incr(dict,key):
# Increment a dictionary key.
if key in dict: dict[key]+=1
else: dict[key]=1
class ogip_collect:
"""
For compiling summaries of the results by warnings, errors, etc.
- Total number of files found:
"""
def __init__(self):
# dictionary of directories containing lists of files that
# could not be checked for a number of reasons.
self.bad={}
# dictionary of directories containing files that were only verified
self.verified={}
# dictionary of directories containing files that were OGIP checked
self.checked={}
# dictionary that just counts files found of each type
self.count_types={}
# dictionary that just counts extensions in files of each type
self.count_extnames={}
self.count_unrec_extnames={}
# dictionary containing unrecognized extensions found
self.unrec_extnames={}
def update(self,dir=None,file=None,statobj=None):
# Store dictionaries of directories containing dictionaries of
# files containing status class objects. Also keeps a running
# count of types and extensions checked.
if statobj.status != 0:
# Those that cannot be checked, either because they could
# not be opened or could not be recognized:
dict_add(self.bad, dir, file, statobj)
elif statobj.vonly == True:
# Files where only fverify was even attempted:
dict_add(self.verified, dir, file, statobj)
else:
# Where the check was attempted:
dict_add(self.checked, dir, file, statobj)
if statobj.otype != 'unknown':
# Add one to the corresponding type
dict_incr(self.count_types,statobj.otype)
# Another level of dictionaries for extension names for each type
if statobj.otype not in self.count_extnames: self.count_extnames[statobj.otype]={}
for extn in statobj.extns:
dict_incr(self.count_extnames[statobj.otype],extn)
for extn in statobj.unrec_extns:
dict_incr(self.unrec_extnames,extn)
else:
# For unrecognized files, list and count unrecognized
# extensions. Note that in this case, statobj.extns is a
# simple list instead of a dictionary pointing to the
# statinfo for that extn.
for extn in statobj.extns:
# Since you get here with unopened files, for
# example, and we don't want to count the "none"s as
# unrecognized extension names.
if extn != 'none':
dict_incr(self.unrec_extnames,extn)
def count_bad(self):
return sum(len(d) for d in self.bad.itervalues())
def count_unrecognized(self):
return sum( sum([f.unrec for f in d.itervalues()]) for d in self.bad.itervalues() )
def count_fopen(self):
# Inner list comprehension returns a list of files and counts fopen errors,
# then iterated and summed over all the directories
return sum( sum([f.fopen for f in d.itervalues()]) for d in self.bad.itervalues() )
def count_fver_bad(self):
# These are always bad (couldn't be checked, fver==2). (But not all bad have fver==2 so check.)
return sum( sum([f.fver/2 for f in d.itervalues() if f.fver == 2]) for d in self.bad.itervalues() )
def count_checksum(self):
# Count checksum issues in files that otherwise passed ftverify
return sum( sum([f.checksum for f in d.itervalues() if f.checksum==1]) for d in self.checked.itervalues() )
def count_wcsval_err(self):
# These may subsequently be checked.
return sum( sum([f.wcsval for f in d.itervalues() if f.wcsval == 1]) for d in self.checked.itervalues() )
def count_fver_fixed(self):
# Want to count all those that were fixable fverify problems,
# i.e., with fver==1 (as opposed to 0==no problem, or 2==not
# fixable) , whether they were checked after or not ("bad").
return sum( sum([f.fver for f in d.itervalues() if f.fver == 1]) for d in self.checked.itervalues() ) + \
sum( sum([f.fver for f in d.itervalues() if f.fver == 1]) for d in self.bad.itervalues() )
def count_failed(self):
# Were checked but failed in the sense of violating either
# FITS or OGIP standards
return sum(len([f for f in d.itervalues() if f.vonly==False and f.tot_errors() > 0]) for d in self.checked.itervalues())
def count_good(self):
return sum(len([f for f in d.itervalues() if f.vonly==False and f.tot_errors() == 0 and f.tot_warnings() == 0]) for d in self.checked.itervalues())
def count_warned(self,level=None):
#
count=0
for d in self.checked.itervalues():
for f in [ff for ff in d.itervalues() if ff.tot_errors()==0]:
for e in f.extns.itervalues():
if e.WARNINGS[level]>0:
count+=1
break # Only count once for the file even if multiple extensions have level=level warnings.
return count
def count_checked(self):
return sum(len([f for f in d.itervalues()]) for d in self.checked.itervalues())
def count_verified(self):
return sum(len([f for f in d.itervalues()]) for d in self.verified.itervalues())
def count_missing_key(self,otype,extname,key):
# Count how many files of a given type are missing a given
# keyword.
#
# Don't see a clever pythonic way, so just look through the
# whole set of files with failures (a missing keyword is an
# error case):
count=0
for d in self.checked.itervalues():
# Now d is a dictionary of files for a directory. Check
# only those of the right type:
for fstat in (f for f in d.itervalues() if f.otype == otype):
if extname not in fstat.extns:
continue
if key in fstat.extns[extname].MISKEYS: count+=1
return count
def count_missing_col(self,otype,extname,col):
count=0
for d in self.checked.itervalues():
# Now d is a dictionary of files for a directory. Check
# only those of the right type:
for fstat in (f for f in d.itervalues() if f.otype == otype):
if extname not in fstat.extns:
continue
if col in fstat.extns[extname].MISCOLS: count+=1
return count
def ogip_check_dir(basedir,logdir,meta_key,default_type,verbosity):
"""
Traverses the given base directory and for all files found beneath:
- checks if FITS type (simply by trying to open it as a FITS file), and if FITS, then
- runs FITS verify (pyfits), then
- runs ogip_check, capturing the output, then
- collects statistics on the results, and
- writes a set of reports.
USAGE:
"""
# Get from meta data lists of suffixes and directories to ignore:
meta=ogip_get_meta(meta_key)
ignore={
'suffixes':tuple(meta["ignore"]["suffixes"]),
'directories':tuple(meta["ignore"]["directories"]),
}
# Object that holds summary data and methods.
summary=ogip_collect()
frame = inspect.currentframe()
args, _, _, values = inspect.getargvalues(frame)
print("Running %s with" % inspect.getframeinfo(frame)[2])
for i in args:
print(" %s = %s" % (i, values[i]))
# Goes through all contents of root directory
for dir, subdirs, files in os.walk(basedir, topdown=False,followlinks=True):
if os.path.split(dir)[1].startswith('.'): continue
verify_only=True if dir.endswith(tuple(ignore['directories'])) else False
cnt_check=cnt_tot=0
if (verbosity > 0 and len(files) > 0):
print "\n\n************************************************"
if (verify_only): print "Now on directory %s for FITS verification ONLY." % dir
else: print "Now on directory %s" % dir
print "************************************************"
for name in [x for x in files if not
( x.endswith(ignore['suffixes'])
or x.endswith(tuple([y+".gz" for y in ignore['suffixes'] ]))
or x.endswith(tuple([y.upper() for y in ignore['suffixes'] ]))
or x.endswith(tuple([(y+".gz").upper() for y in ignore['suffixes'] ]))
or x.startswith('.')
) ]:
cnt_tot+=1
one=os.path.join(dir, name)
if (verbosity > 1): print "\nTIMESTAMP: " + datetime.now().strftime("%Y-%m-%d %X")
if logdir:
logpath= os.path.join(logdir,os.path.relpath(dir,basedir))
if not os.path.isdir(logpath): os.makedirs(logpath)
logfile=os.path.join(logpath,name+".check.log")
if (verbosity > 1 and verify_only==False): print("CHECKING %s; see log in %s" % (one, logfile) )
elif (verbosity > 1 and verify_only==True): print("Verifying %s; see log in %s" % (one, logfile) )
sys.stdout.flush()
else:
logfile=sys.stdout
if (verbosity > 1 and verify_only==False): print("CHECKING %s" % one)
elif (verbosity > 1 and verify_only==True): print("Verifying %s" % one)
sys.stdout.flush()
# Returns status that contains both the counts of errors,
# warnings, and the logged reports.
status=ogip_check(one,None,logfile,verbosity=verbosity,dtype=default_type,vonly=verify_only,meta_key=meta_key)
#if status.status != 0:
#print("ERROR: failed to check file %s; see log in %s\nContinuing." % (one, logfile) )
if status.status == 0:
if (verbosity > 1 and verify_only==False):
print("Done. Found file of type %s with %s errors and %s (level=2) and %s (level=3) warnings." % (status.otype, status.tot_errors(),status.tot_warnings(2),status.tot_warnings(3) ) )
cnt_check+=1
#elif (verbosity > 1 and verify_only==True): print("File is in an ignored directory, skipping OGIP standards check.")
sys.stdout.flush()
# Store the retstat info for the file
summary.update(dir=dir,file=name,statobj=status)
gc.collect() # Should be unnecessary but just in case.
if (verbosity > 1 and len(files) > 0 and verify_only==False): print("\n********\nFound %s files in this directory that could be checked out of %s that were examined and %s that were ignored (for %s total in the directory)." % (cnt_check,cnt_tot,len(files)-cnt_tot,len(files)) )
print("\n***************************************************")
print("Done checking. Now to summarize:\n")
print("The total number of files found: %s" % int(summary.count_bad()+summary.count_verified()+summary.count_checked()) )
print("The total number of files that could not be opened as FITS: %s" % summary.count_fopen() )
print("The total number of files whose type could not be recognized: %s" % summary.count_unrecognized() )
print("The total number of files that failed FITS verify and could NOT be 'fixed': %s" % summary.count_fver_bad() )
print("The total number of files that failed FITS verify but 'fixed': %s" % summary.count_fver_fixed() )
print("The total number of files that have FITS checksum issues: %s" % summary.count_checksum() )
print("The total number of files that could not be checked for other reasons: %s" % int(summary.count_bad()-summary.count_fopen()-summary.count_unrecognized()-summary.count_fver_bad() ) )
print("The total number of files verified only: %s" % summary.count_verified() )
print("The total number of files checked: %s" % summary.count_checked() )
print("The total number of files with no OGIP warnings or errors: %s" % summary.count_good() )
print("The total number of files with no errors but with OGIP level 3 warnings: %s" % summary.count_warned(3) )
print("The total number of files with no errors but with OGIP level 2 warnings: %s" % summary.count_warned(2) )
print("The total number of files with OGIP errors: %s" % summary.count_failed() )
print("The total number of images that failed wcs.validate(): %s" % summary.count_wcsval_err() )
print("")
for k in sorted(summary.count_types):
print("Checked %s files of type %s" % (summary.count_types[k],k) )
print("")
# Summarize required keywords for each type:
types=[ 'TIMING', 'SPECTRAL', 'RMF', 'ARF', 'CALDB', 'IMAGE' ]
for t in sorted(types):
print("")
if t not in summary.count_types:
# Don't bother to summarize types for which no files were found
print("Found no files of type %s" % t)
continue
print("Summary of missing required keywords and columns for type %s:" % t)
cnt=0
dict=ogip_dictionary(t)
if t == 'CALDB':
# CALDB types don't have required extnames. Any files
# checked as CALDB types will store the extn as CALFILE.
check_extns=['CALFILE']
elif t == 'IMAGE':
check_extns=['IMAGE']
else:
check_extns=dict['EXTENSIONS']['REQUIRED']+ dict['EXTENSIONS']['OPTIONAL']
for extn in check_extns:
if extn in summary.count_extnames[t]:
for k in dict[extn]['KEYWORDS']:
if dict[extn]['KEYWORDS'][k]['level']!=1: continue
if summary.count_missing_key(t,extn,k) > 0:
cnt+=1
print(" Found %s (out of %s) files have at least one extension %s missing key %s." % (summary.count_missing_key(t,extn,k), summary.count_extnames[t][extn], extn, k) )
for c in dict[extn]['COLUMNS']:
if dict[extn]['COLUMNS'][c]['level']!=1: continue
if summary.count_missing_col(t,extn,c) > 0:
cnt+=1
print(" Found %s (out of %s) files have at least one extension %s missing column %s." % (summary.count_missing_col(t,extn,c), summary.count_extnames[t][extn], extn, c) )
if cnt==0: print(" None of these are missing required keywords or columns.")
if len(summary.unrec_extnames.keys()) > 0:
print("\nFound the following unrecognized extensions (with total number of each):")
for k in sorted(summary.unrec_extnames):
print (" %s (%s)" % (k,summary.unrec_extnames[k]) )
print("")
else: print("\n(No unrecognized extensions.)")
print("\nDone\n")
return
if __name__== "__main__":
dir = "."
status = ogip_check_dir(dir)
exit(status.status)