2424import subprocess
2525import zipfile
2626import tarfile
27+ import statistics
28+ from pathlib import Path
2729from threading import Thread
28- from typing import List , Any , Optional , Dict
30+ from typing import List , Any , Optional , Dict , Tuple , Set
2931
3032import constants
3133import oss_fuzz
3436DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
3537DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
3638DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37- DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39+ DB_JSON_ALL_CURRENT = 'all-project-current.json'
3840DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
3941DB_BUILD_STATUS_JSON = 'build-status.json'
4042#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
4446 DB_JSON_ALL_PROJECT_TIMESTAMP ,
4547 DB_JSON_ALL_FUNCTIONS ,
4648 DB_JSON_ALL_CONSTRUCTORS ,
47- DB_JSON_ALL_CURRENT_FUNCS ,
49+ DB_JSON_ALL_CURRENT ,
4850]
4951
5052INTROSPECTOR_WEBAPP_ZIP = (
5355FI_EXCLUDE_ALL_NON_MUSTS = bool (int (os .getenv ('FI_EXCLUDE_ALL_NON_MUSTS' ,
5456 '0' )))
5557
58+ NUM_RECENT_DAYS = 30
59+ FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60+
5661MUST_INCLUDES = set ()
5762MUST_INCLUDE_WITH_LANG : List [Any ] = []
5863
@@ -896,11 +901,105 @@ def extend_db_timestamps(db_timestamp, output_directory):
896901 json .dump (existing_timestamps , f )
897902
898903
899- def extend_db_json_files (project_timestamps , output_directory ):
904+ def per_fuzzer_coverage_analysis (project_name : str ,
905+ coverages : Dict [str , List [Tuple [int , str ]]],
906+ lost_fuzzers ):
907+ """Go through the recent coverage results and combine them into a short summary.
908+ Including an assessment if the fuzzer got worse over time.
909+ """
910+
911+ # TODO This might not be a good metric when coverage is not meaningful,
912+ # for example for very small projects or projects that have low coverage
913+ # already. Though, this might not be super bad as we are taking a look
914+ # at per fuzzer coverage, which is should already be normalized to what
915+ # can be reached.
916+ # TODO What would be a good percentage to mark as coverage degradation,
917+ # taking 5% for now but should be observed, maybe per it should be
918+ # configurable per project as well.
919+ results = {}
920+ for ff , data in coverages .items ():
921+ if len (data ) > 0 :
922+ values = [dd [0 ] for dd in data ]
923+ dates = [dd [1 ] for dd in data ]
924+ latest_date_with_value = next (dd [1 ] for dd in reversed (data )
925+ if dd [0 ] is not None )
926+ if latest_date_with_value is not None :
927+ report_url = oss_fuzz .get_fuzzer_code_coverage_summary_url (
928+ project_name , latest_date_with_value .replace ('-' , '' ), ff )
929+ report_url = report_url [:- len ('summary.json' )] + 'index.html'
930+ else :
931+ report_url = None
932+ max_cov = max (values [:- 1 ], default = 0 )
933+ avg_cov = round (statistics .fmean (values ), 2 )
934+ current = values [- 1 ]
935+ results [ff ] = {
936+ 'report_url' : report_url ,
937+ 'report_date' : latest_date_with_value ,
938+ 'coverages_values' : values ,
939+ 'coverages_dates' : dates ,
940+ 'max' : max_cov ,
941+ 'avg' : avg_cov ,
942+ 'current' : current ,
943+ 'has_degraded' :
944+ (max_cov - current ) > FUZZER_COVERAGE_IS_DEGRADED ,
945+ 'got_lost' : ff in lost_fuzzers ,
946+ }
947+ return results
948+
949+
950+ def calculate_recent_results (projects_with_new_results , timestamps ,
951+ num_days : int ):
952+ """Analyse recent project data to detect possible degradations of fuzzer efficiency."""
953+ from collections import defaultdict
954+
955+ data : Dict [str , Dict [str , Dict [str , Any ]]] = defaultdict (dict )
956+ for pt in timestamps :
957+ project_name = pt ['project_name' ]
958+ if project_name in projects_with_new_results :
959+ data [project_name ][pt ['date' ]] = pt
960+
961+ results = {}
962+ for project_name , project_data in data .items ():
963+ fuzzers_past = set ()
964+ fuzzers_current : Set [str ] = set ()
965+ per_fuzzer_coverages = defaultdict (list )
966+
967+ for do in (get_date_at_offset_as_str (ii )
968+ for ii in range (- num_days , 0 , 1 )):
969+ try :
970+ date_data = project_data [do ]
971+ per_fuzzer_coverage_data = date_data [
972+ 'per-fuzzer-coverage-data' ]
973+
974+ fuzzers_past |= fuzzers_current
975+ fuzzers_current = set (per_fuzzer_coverage_data .keys ())
976+
977+ for ff , cov_data in per_fuzzer_coverage_data .items ():
978+ try :
979+ perc = round (
980+ 100 * cov_data ['covered' ] / cov_data ['count' ], 2 )
981+ except :
982+ perc = 0
983+
984+ per_fuzzer_coverages [ff ].append ((perc , do ))
985+ except :
986+ continue
987+
988+ fuzzer_diff = fuzzers_past - fuzzers_current
989+ per_fuzzer_coverages = per_fuzzer_coverage_analysis (
990+ project_name , per_fuzzer_coverages , fuzzer_diff )
991+
992+ results [project_name ] = per_fuzzer_coverages
993+
994+ return results
995+
996+
997+ def extend_db_json_files (project_timestamps , output_directory ,
998+ should_include_details ):
900999 """Extends a set of DB .json files."""
9011000
9021001 existing_timestamps = []
903- logging .info ('Loading existing timestamps 1 ' )
1002+ logging .info ('Loading existing timestamps' )
9041003 if os .path .isfile (
9051004 os .path .join (output_directory , DB_JSON_ALL_PROJECT_TIMESTAMP )):
9061005 with open (
@@ -919,10 +1018,11 @@ def extend_db_json_files(project_timestamps, output_directory):
9191018 existing_timestamp_mapping = dict ()
9201019
9211020 for es in existing_timestamps :
922- if not es ['project_name' ] in existing_timestamp_mapping :
1021+ if es ['project_name' ] not in existing_timestamp_mapping :
9231022 existing_timestamp_mapping [es ['project_name' ]] = set ()
9241023 existing_timestamp_mapping [es ['project_name' ]].add (es ['date' ])
9251024
1025+ projects_with_new_results = set ()
9261026 for new_ts in project_timestamps :
9271027 to_add = True
9281028
@@ -932,24 +1032,44 @@ def extend_db_json_files(project_timestamps, output_directory):
9321032 to_add = False
9331033 if to_add :
9341034 existing_timestamps .append (new_ts )
1035+ projects_with_new_results .add (new_ts ['project_name' ])
9351036 have_added = True
9361037
9371038 if FI_EXCLUDE_ALL_NON_MUSTS :
938- new_timestamps = []
1039+ # Filter existing timstamps to to only those in MUST_INCLUDES.
1040+ kept_timestamps = []
9391041 for ts in existing_timestamps :
9401042 if ts ['project_name' ] in MUST_INCLUDES :
941- new_timestamps .append (ts )
942- existing_timestamps = new_timestamps
1043+ kept_timestamps .append (ts )
1044+ existing_timestamps = kept_timestamps
9431045
944- new_project_stamps = []
1046+ # Also filter the current project results.
1047+ kept_project_stamps = []
9451048 for project_stamp in project_timestamps :
9461049 if project_stamp ['project_name' ] in MUST_INCLUDES :
947- new_project_stamps .append (project_stamp )
948- project_timestamps = new_project_stamps
1050+ kept_project_stamps .append (project_stamp )
1051+ project_timestamps = kept_project_stamps
1052+
1053+ if should_include_details :
1054+ recent_results = calculate_recent_results (projects_with_new_results ,
1055+ existing_timestamps ,
1056+ NUM_RECENT_DAYS )
1057+ # TODO these results might detect issues that should be communicated with
1058+ # project maintainers. The best approach might be to load the
1059+ # project_timestamps file (all-project-current.json)
1060+ # separately and load recent results there and maybe issue warnings.
1061+ for pt in project_timestamps :
1062+ try :
1063+ pt ['recent_results' ] = recent_results .get (pt ['project_name' ])
1064+ except Exception as exc :
1065+ logger .warning (
1066+ f'Could not get recent results for { pt ["project_name" ]} : { exc } '
1067+ )
1068+ else :
1069+ recent_results = None
9491070
950- logging .info ('Dumping all current projects' )
951- with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT_FUNCS ),
952- 'w' ) as f :
1071+ logging .info ('Dumping current project data' )
1072+ with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT ), 'w' ) as f :
9531073 json .dump (project_timestamps , f )
9541074
9551075 # Remove any light-introspector files because they should not be saved in the
@@ -1017,7 +1137,8 @@ def update_db_files(db_timestamp,
10171137 f .write (json .dumps (all_header_files ))
10181138
10191139 logging .info ('Extending DB json files' )
1020- extend_db_json_files (project_timestamps , output_directory )
1140+ extend_db_json_files (project_timestamps , output_directory ,
1141+ should_include_details )
10211142
10221143 logging .info ('Extending DB time stamps' )
10231144 extend_db_timestamps (db_timestamp , output_directory )
0 commit comments