2424import subprocess
2525import zipfile
2626import tarfile
27+ import statistics
28+ from pathlib import Path
2729from threading import Thread
2830from typing import List , Any , Optional , Dict
2931
3436DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
3537DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
3638DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37- DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39+ DB_JSON_ALL_CURRENT = 'all-project-current.json'
3840DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
3941DB_BUILD_STATUS_JSON = 'build-status.json'
4042#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
4446 DB_JSON_ALL_PROJECT_TIMESTAMP ,
4547 DB_JSON_ALL_FUNCTIONS ,
4648 DB_JSON_ALL_CONSTRUCTORS ,
47- DB_JSON_ALL_CURRENT_FUNCS ,
49+ DB_JSON_ALL_CURRENT ,
4850]
4951
5052INTROSPECTOR_WEBAPP_ZIP = (
5355FI_EXCLUDE_ALL_NON_MUSTS = bool (int (os .getenv ('FI_EXCLUDE_ALL_NON_MUSTS' ,
5456 '0' )))
5557
58+ NUM_RECENT_DAYS = 30
59+ FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60+
5661MUST_INCLUDES = set ()
5762MUST_INCLUDE_WITH_LANG : List [Any ] = []
5863
@@ -896,11 +901,106 @@ def extend_db_timestamps(db_timestamp, output_directory):
896901 json .dump (existing_timestamps , f )
897902
898903
899- def extend_db_json_files (project_timestamps , output_directory ):
904+ def per_fuzzer_coverage_analysis (project_name : str ,
905+ coverages : Dict [str , List [tuple [int , str ]]],
906+ lost_fuzzers ):
907+ """Go through the recent coverage results and combine them into a short summary.
908+ Including an assessment if the fuzzer got worse over time.
909+ """
910+
911+ # TODO This might not be a good metric when coverage is not meaningful,
912+ # for example for very small projects or projects that have low coverage
913+ # already. Though, this might not be super bad as we are taking a look
914+ # at per fuzzer coverage, which is should already be normalized to what
915+ # can be reached.
916+ # TODO What would be a good percentage to mark as coverage degradation,
917+ # taking 5% for now but should be observed, maybe per it should be
918+ # configurable per project as well.
919+ results = {}
920+ for ff , data in coverages .items ():
921+ if len (data ) > 0 :
922+ values = [dd [0 ] for dd in data ]
923+ dates = [dd [1 ] for dd in data ]
924+ latest_date_with_value = next (dd [1 ] for dd in reversed (data )
925+ if dd [0 ] is not None )
926+ if latest_date_with_value is not None :
927+ report_url = oss_fuzz .get_fuzzer_code_coverage_summary_url (
928+ project_name , latest_date_with_value .replace ('-' , '' ), ff )
929+ report_url = report_url .removesuffix (
930+ 'summary.json' ) + 'index.html'
931+ else :
932+ report_url = None
933+ max_cov = max (values [:- 1 ], default = 0 )
934+ avg_cov = round (statistics .fmean (values ), 2 )
935+ current = values [- 1 ]
936+ results [ff ] = {
937+ 'report_url' : report_url ,
938+ 'report_date' : latest_date_with_value ,
939+ 'coverages_values' : values ,
940+ 'coverages_dates' : dates ,
941+ 'max' : max_cov ,
942+ 'avg' : avg_cov ,
943+ 'current' : current ,
944+ 'has_degraded' :
945+ (max_cov - current ) > FUZZER_COVERAGE_IS_DEGRADED ,
946+ 'got_lost' : ff in lost_fuzzers ,
947+ }
948+ return results
949+
950+
951+ def calculate_recent_results (projects_with_new_results , timestamps ,
952+ num_days : int ):
953+ """Analyse recent project data to detect possible degradations of fuzzer efficiency."""
954+ from collections import defaultdict
955+
956+ data = defaultdict (dict )
957+ for pt in timestamps :
958+ project_name = pt ['project_name' ]
959+ if project_name in projects_with_new_results :
960+ data [project_name ][pt ['date' ]] = pt
961+
962+ results = {}
963+ for project_name , data in data .items ():
964+ fuzzers_past = set ()
965+ fuzzers_current = set ()
966+ per_fuzzer_coverages = defaultdict (list )
967+
968+ for do in (get_date_at_offset_as_str (ii )
969+ for ii in range (- num_days , 0 , 1 )):
970+ try :
971+ date_data = data [do ]
972+ per_fuzzer_coverage_data = date_data [
973+ 'per-fuzzer-coverage-data' ]
974+
975+ fuzzers_past |= fuzzers_current
976+ fuzzers_current = set (per_fuzzer_coverage_data .keys ())
977+
978+ for ff , cov_data in per_fuzzer_coverage_data .items ():
979+ try :
980+ perc = round (
981+ 100 * cov_data ['covered' ] / cov_data ['count' ], 2 )
982+ except :
983+ perc = 0
984+
985+ per_fuzzer_coverages [ff ].append ((perc , do ))
986+ except :
987+ continue
988+
989+ fuzzer_diff = fuzzers_past - fuzzers_current
990+ per_fuzzer_coverages = per_fuzzer_coverage_analysis (
991+ project_name , per_fuzzer_coverages , fuzzer_diff )
992+
993+ results [project_name ] = per_fuzzer_coverages
994+
995+ return results
996+
997+
998+ def extend_db_json_files (project_timestamps , output_directory ,
999+ should_include_details ):
9001000 """Extends a set of DB .json files."""
9011001
9021002 existing_timestamps = []
903- logging .info ('Loading existing timestamps 1 ' )
1003+ logging .info ('Loading existing timestamps' )
9041004 if os .path .isfile (
9051005 os .path .join (output_directory , DB_JSON_ALL_PROJECT_TIMESTAMP )):
9061006 with open (
@@ -919,10 +1019,11 @@ def extend_db_json_files(project_timestamps, output_directory):
9191019 existing_timestamp_mapping = dict ()
9201020
9211021 for es in existing_timestamps :
922- if not es ['project_name' ] in existing_timestamp_mapping :
1022+ if es ['project_name' ] not in existing_timestamp_mapping :
9231023 existing_timestamp_mapping [es ['project_name' ]] = set ()
9241024 existing_timestamp_mapping [es ['project_name' ]].add (es ['date' ])
9251025
1026+ projects_with_new_results = set ()
9261027 for new_ts in project_timestamps :
9271028 to_add = True
9281029
@@ -932,24 +1033,44 @@ def extend_db_json_files(project_timestamps, output_directory):
9321033 to_add = False
9331034 if to_add :
9341035 existing_timestamps .append (new_ts )
1036+ projects_with_new_results .add (new_ts ['project_name' ])
9351037 have_added = True
9361038
9371039 if FI_EXCLUDE_ALL_NON_MUSTS :
938- new_timestamps = []
1040+ # Filter existing timstamps to to only those in MUST_INCLUDES.
1041+ kept_timestamps = []
9391042 for ts in existing_timestamps :
9401043 if ts ['project_name' ] in MUST_INCLUDES :
941- new_timestamps .append (ts )
942- existing_timestamps = new_timestamps
1044+ kept_timestamps .append (ts )
1045+ existing_timestamps = kept_timestamps
9431046
944- new_project_stamps = []
1047+ # Also filter the current project results.
1048+ kept_project_stamps = []
9451049 for project_stamp in project_timestamps :
9461050 if project_stamp ['project_name' ] in MUST_INCLUDES :
947- new_project_stamps .append (project_stamp )
948- project_timestamps = new_project_stamps
1051+ kept_project_stamps .append (project_stamp )
1052+ project_timestamps = kept_project_stamps
1053+
1054+ if should_include_details :
1055+ recent_results = calculate_recent_results (projects_with_new_results ,
1056+ existing_timestamps ,
1057+ NUM_RECENT_DAYS )
1058+ # TODO these results might detect issues that should be communicated with
1059+ # project maintainers. The best approach might be to load the
1060+ # project_timestamps file (all-project-current.json)
1061+ # separately and load recent results there and maybe issue warnings.
1062+ for pt in project_timestamps :
1063+ try :
1064+ pt ['recent_results' ] = recent_results .get (pt ['project_name' ])
1065+ except Exception as exc :
1066+ logger .warning (
1067+ f'Could not get recent results for { pt ["project_name" ]} : { exc } '
1068+ )
1069+ else :
1070+ recent_results = None
9491071
950- logging .info ('Dumping all current projects' )
951- with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT_FUNCS ),
952- 'w' ) as f :
1072+ logging .info ('Dumping current project data' )
1073+ with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT ), 'w' ) as f :
9531074 json .dump (project_timestamps , f )
9541075
9551076 # Remove any light-introspector files because they should not be saved in the
@@ -1017,7 +1138,8 @@ def update_db_files(db_timestamp,
10171138 f .write (json .dumps (all_header_files ))
10181139
10191140 logging .info ('Extending DB json files' )
1020- extend_db_json_files (project_timestamps , output_directory )
1141+ extend_db_json_files (project_timestamps , output_directory ,
1142+ should_include_details )
10211143
10221144 logging .info ('Extending DB time stamps' )
10231145 extend_db_timestamps (db_timestamp , output_directory )
0 commit comments