2424import subprocess
2525import zipfile
2626import tarfile
27+ import statistics
28+ from pathlib import Path
2729from threading import Thread
28- from typing import List , Any , Optional , Dict
30+ from typing import List , Any , Optional , Dict , Tuple , Set
2931
3032import constants
3133import oss_fuzz
3436DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
3537DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
3638DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37- DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39+ DB_JSON_ALL_CURRENT = 'all-project-current.json'
3840DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
3941DB_BUILD_STATUS_JSON = 'build-status.json'
4042#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
4446 DB_JSON_ALL_PROJECT_TIMESTAMP ,
4547 DB_JSON_ALL_FUNCTIONS ,
4648 DB_JSON_ALL_CONSTRUCTORS ,
47- DB_JSON_ALL_CURRENT_FUNCS ,
49+ DB_JSON_ALL_CURRENT ,
4850]
4951
5052INTROSPECTOR_WEBAPP_ZIP = (
5355FI_EXCLUDE_ALL_NON_MUSTS = bool (int (os .getenv ('FI_EXCLUDE_ALL_NON_MUSTS' ,
5456 '0' )))
5557
58+ NUM_RECENT_DAYS = 30
59+ FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60+
5661MUST_INCLUDES = set ()
5762MUST_INCLUDE_WITH_LANG : List [Any ] = []
5863
@@ -283,9 +288,8 @@ def extract_and_refine_functions(all_function_list, date_str):
283288 return refined_proj_list
284289
285290
286- def extract_code_coverage_data (code_coverage_summary , project_name , date_str ,
287- project_language ) -> Optional [Dict [str , Any ]]:
288- """Gets coverage URL and line coverage total of a project"""
291+ def extract_code_coverage_data (code_coverage_summary ):
292+ """Extract the coverage data from a loaded coverage summary.json"""
289293 # Extract data from the code coverage reports
290294 if code_coverage_summary is None :
291295 return None
@@ -307,8 +311,19 @@ def extract_code_coverage_data(code_coverage_summary, project_name, date_str,
307311 except :
308312 pass
309313
314+ return line_total_summary
315+
316+
317+ def prepare_code_coverage_dict (
318+ code_coverage_summary , project_name : str , date_str : str ,
319+ project_language : str ) -> Optional [Dict [str , Any ]]:
320+ """Gets coverage URL and line coverage total of a project"""
321+ line_total_summary = extract_code_coverage_data (code_coverage_summary )
322+ if line_total_summary is None :
323+ return None
324+
310325 coverage_url = oss_fuzz .get_coverage_report_url (project_name ,
311- date_str .replace ("-" , "" ),
326+ date_str .replace ('-' , '' ),
312327 project_language )
313328 code_coverage_data_dict = {
314329 'coverage_url' : coverage_url ,
@@ -442,7 +457,7 @@ def extract_local_project_data(project_name, oss_fuzz_path,
442457 project_name
443458 }
444459
445- code_coverage_data_dict = extract_code_coverage_data (
460+ code_coverage_data_dict = prepare_code_coverage_dict (
446461 code_coverage_summary , project_name , '' , project_language )
447462
448463 if cov_fuzz_stats is not None :
@@ -465,8 +480,8 @@ def extract_local_project_data(project_name, oss_fuzz_path,
465480 dictionary_key = '%s###%s' % (project_name , '' )
466481 manager_return_dict [dictionary_key ] = {
467482 'project_timestamp' : project_timestamp ,
468- " introspector-data-dict" : introspector_data_dict ,
469- " coverage-data-dict" : code_coverage_data_dict ,
483+ ' introspector-data-dict' : introspector_data_dict ,
484+ ' coverage-data-dict' : code_coverage_data_dict ,
470485 'all-header-files' : all_header_files ,
471486 }
472487
@@ -704,20 +719,30 @@ def extract_project_data(project_name, date_str, should_include_details,
704719 'project_name' : project_name
705720 }
706721
707- code_coverage_data_dict = extract_code_coverage_data (
722+ code_coverage_data_dict = prepare_code_coverage_dict (
708723 code_coverage_summary , project_name , date_str , project_language )
709724
725+ per_fuzzer_cov = {}
710726 if cov_fuzz_stats is not None :
711727 all_fuzzers = cov_fuzz_stats .split ("\n " )
712728 if all_fuzzers [- 1 ] == '' :
713729 all_fuzzers = all_fuzzers [0 :- 1 ]
714730 amount_of_fuzzers = len (all_fuzzers )
731+ for ff in all_fuzzers :
732+ try :
733+ fuzzer_cov = oss_fuzz .get_fuzzer_code_coverage_summary (
734+ project_name , date_str .replace ("-" , "" ), ff )
735+ fuzzer_cov_data = extract_code_coverage_data (fuzzer_cov )
736+ per_fuzzer_cov [ff ] = fuzzer_cov_data
737+ except :
738+ pass
715739
716740 project_timestamp = {
717741 "project_name" : project_name ,
718742 "date" : date_str ,
719743 'language' : project_language ,
720744 'coverage-data' : code_coverage_data_dict ,
745+ 'per-fuzzer-coverage-data' : per_fuzzer_cov ,
721746 'introspector-data' : introspector_data_dict ,
722747 'fuzzer-count' : amount_of_fuzzers ,
723748 'project_repository' : project_repository ,
@@ -878,11 +903,105 @@ def extend_db_timestamps(db_timestamp, output_directory):
878903 json .dump (existing_timestamps , f )
879904
880905
881- def extend_db_json_files (project_timestamps , output_directory ):
906+ def per_fuzzer_coverage_analysis (project_name : str ,
907+ coverages : Dict [str , List [Tuple [int , str ]]],
908+ lost_fuzzers ):
909+ """Go through the recent coverage results and combine them into a short summary.
910+ Including an assessment if the fuzzer got worse over time.
911+ """
912+
913+ # TODO This might not be a good metric when coverage is not meaningful,
914+ # for example for very small projects or projects that have low coverage
915+ # already. Though, this might not be super bad as we are taking a look
916+ # at per fuzzer coverage, which is should already be normalized to what
917+ # can be reached.
918+ # TODO What would be a good percentage to mark as coverage degradation,
919+ # taking 5% for now but should be observed, maybe per it should be
920+ # configurable per project as well.
921+ results = {}
922+ for ff , data in coverages .items ():
923+ if len (data ) > 0 :
924+ values = [dd [0 ] for dd in data ]
925+ dates = [dd [1 ] for dd in data ]
926+ latest_date_with_value = next (dd [1 ] for dd in reversed (data )
927+ if dd [0 ] is not None )
928+ if latest_date_with_value is not None :
929+ report_url = oss_fuzz .get_fuzzer_code_coverage_summary_url (
930+ project_name , latest_date_with_value .replace ('-' , '' ), ff )
931+ report_url = report_url [:- len ('summary.json' )] + 'index.html'
932+ else :
933+ report_url = None
934+ max_cov = max (values [:- 1 ], default = 0 )
935+ avg_cov = round (statistics .fmean (values ), 2 )
936+ current = values [- 1 ]
937+ results [ff ] = {
938+ 'report_url' : report_url ,
939+ 'report_date' : latest_date_with_value ,
940+ 'coverages_values' : values ,
941+ 'coverages_dates' : dates ,
942+ 'max' : max_cov ,
943+ 'avg' : avg_cov ,
944+ 'current' : current ,
945+ 'has_degraded' :
946+ (max_cov - current ) > FUZZER_COVERAGE_IS_DEGRADED ,
947+ 'got_lost' : ff in lost_fuzzers ,
948+ }
949+ return results
950+
951+
952+ def calculate_recent_results (projects_with_new_results , timestamps ,
953+ num_days : int ):
954+ """Analyse recent project data to detect possible degradations of fuzzer efficiency."""
955+ from collections import defaultdict
956+
957+ data : Dict [str , Dict [str , Dict [str , Any ]]] = defaultdict (dict )
958+ for pt in timestamps :
959+ project_name = pt ['project_name' ]
960+ if project_name in projects_with_new_results :
961+ data [project_name ][pt ['date' ]] = pt
962+
963+ results = {}
964+ for project_name , project_data in data .items ():
965+ fuzzers_past = set ()
966+ fuzzers_current : Set [str ] = set ()
967+ per_fuzzer_coverages = defaultdict (list )
968+
969+ for do in (get_date_at_offset_as_str (ii )
970+ for ii in range (- num_days , 0 , 1 )):
971+ try :
972+ date_data = project_data [do ]
973+ per_fuzzer_coverage_data = date_data [
974+ 'per-fuzzer-coverage-data' ]
975+
976+ fuzzers_past |= fuzzers_current
977+ fuzzers_current = set (per_fuzzer_coverage_data .keys ())
978+
979+ for ff , cov_data in per_fuzzer_coverage_data .items ():
980+ try :
981+ perc = round (
982+ 100 * cov_data ['covered' ] / cov_data ['count' ], 2 )
983+ except :
984+ perc = 0
985+
986+ per_fuzzer_coverages [ff ].append ((perc , do ))
987+ except :
988+ continue
989+
990+ fuzzer_diff = fuzzers_past - fuzzers_current
991+ per_fuzzer_coverages = per_fuzzer_coverage_analysis (
992+ project_name , per_fuzzer_coverages , fuzzer_diff )
993+
994+ results [project_name ] = per_fuzzer_coverages
995+
996+ return results
997+
998+
999+ def extend_db_json_files (project_timestamps , output_directory ,
1000+ should_include_details ):
8821001 """Extends a set of DB .json files."""
8831002
8841003 existing_timestamps = []
885- logging .info ('Loading existing timestamps 1 ' )
1004+ logging .info ('Loading existing timestamps' )
8861005 if os .path .isfile (
8871006 os .path .join (output_directory , DB_JSON_ALL_PROJECT_TIMESTAMP )):
8881007 with open (
@@ -901,10 +1020,11 @@ def extend_db_json_files(project_timestamps, output_directory):
9011020 existing_timestamp_mapping = dict ()
9021021
9031022 for es in existing_timestamps :
904- if not es ['project_name' ] in existing_timestamp_mapping :
1023+ if es ['project_name' ] not in existing_timestamp_mapping :
9051024 existing_timestamp_mapping [es ['project_name' ]] = set ()
9061025 existing_timestamp_mapping [es ['project_name' ]].add (es ['date' ])
9071026
1027+ projects_with_new_results = set ()
9081028 for new_ts in project_timestamps :
9091029 to_add = True
9101030
@@ -914,24 +1034,44 @@ def extend_db_json_files(project_timestamps, output_directory):
9141034 to_add = False
9151035 if to_add :
9161036 existing_timestamps .append (new_ts )
1037+ projects_with_new_results .add (new_ts ['project_name' ])
9171038 have_added = True
9181039
9191040 if FI_EXCLUDE_ALL_NON_MUSTS :
920- new_timestamps = []
1041+ # Filter existing timstamps to to only those in MUST_INCLUDES.
1042+ kept_timestamps = []
9211043 for ts in existing_timestamps :
9221044 if ts ['project_name' ] in MUST_INCLUDES :
923- new_timestamps .append (ts )
924- existing_timestamps = new_timestamps
1045+ kept_timestamps .append (ts )
1046+ existing_timestamps = kept_timestamps
9251047
926- new_project_stamps = []
1048+ # Also filter the current project results.
1049+ kept_project_stamps = []
9271050 for project_stamp in project_timestamps :
9281051 if project_stamp ['project_name' ] in MUST_INCLUDES :
929- new_project_stamps .append (project_stamp )
930- project_timestamps = new_project_stamps
1052+ kept_project_stamps .append (project_stamp )
1053+ project_timestamps = kept_project_stamps
1054+
1055+ if should_include_details :
1056+ recent_results = calculate_recent_results (projects_with_new_results ,
1057+ existing_timestamps ,
1058+ NUM_RECENT_DAYS )
1059+ # TODO these results might detect issues that should be communicated with
1060+ # project maintainers. The best approach might be to load the
1061+ # project_timestamps file (all-project-current.json)
1062+ # separately and load recent results there and maybe issue warnings.
1063+ for pt in project_timestamps :
1064+ try :
1065+ pt ['recent_results' ] = recent_results .get (pt ['project_name' ])
1066+ except Exception as exc :
1067+ logger .warning (
1068+ f'Could not get recent results for { pt ["project_name" ]} : { exc } '
1069+ )
1070+ else :
1071+ recent_results = None
9311072
932- logging .info ('Dumping all current projects' )
933- with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT_FUNCS ),
934- 'w' ) as f :
1073+ logging .info ('Dumping current project data' )
1074+ with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT ), 'w' ) as f :
9351075 json .dump (project_timestamps , f )
9361076
9371077 # Remove any light-introspector files because they should not be saved in the
@@ -999,7 +1139,8 @@ def update_db_files(db_timestamp,
9991139 f .write (json .dumps (all_header_files ))
10001140
10011141 logging .info ('Extending DB json files' )
1002- extend_db_json_files (project_timestamps , output_directory )
1142+ extend_db_json_files (project_timestamps , output_directory ,
1143+ should_include_details )
10031144
10041145 logging .info ('Extending DB time stamps' )
10051146 extend_db_timestamps (db_timestamp , output_directory )
0 commit comments