Skip to content

Commit bee9b7a

Browse files
committed
Rework project reports
Signed-off-by: phi-go <[email protected]> Signed-off-by: phi-go <[email protected]>
1 parent 2541882 commit bee9b7a

File tree

5 files changed

+228
-60
lines changed

5 files changed

+228
-60
lines changed

tools/web-fuzzing-introspection/app/static/assets/db/web_db_creator_from_summary.py

Lines changed: 137 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
import subprocess
2525
import zipfile
2626
import tarfile
27+
import statistics
28+
from pathlib import Path
2729
from threading import Thread
28-
from typing import List, Any, Optional, Dict
30+
from typing import List, Any, Optional, Dict, Tuple, Set
2931

3032
import constants
3133
import oss_fuzz
@@ -34,7 +36,7 @@
3436
DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
3537
DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
3638
DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37-
DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39+
DB_JSON_ALL_CURRENT = 'all-project-current.json'
3840
DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
3941
DB_BUILD_STATUS_JSON = 'build-status.json'
4042
#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
@@ -44,7 +46,7 @@
4446
DB_JSON_ALL_PROJECT_TIMESTAMP,
4547
DB_JSON_ALL_FUNCTIONS,
4648
DB_JSON_ALL_CONSTRUCTORS,
47-
DB_JSON_ALL_CURRENT_FUNCS,
49+
DB_JSON_ALL_CURRENT,
4850
]
4951

5052
INTROSPECTOR_WEBAPP_ZIP = (
@@ -53,6 +55,9 @@
5355
FI_EXCLUDE_ALL_NON_MUSTS = bool(int(os.getenv('FI_EXCLUDE_ALL_NON_MUSTS',
5456
'0')))
5557

58+
NUM_RECENT_DAYS = 30
59+
FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60+
5661
MUST_INCLUDES = set()
5762
MUST_INCLUDE_WITH_LANG: List[Any] = []
5863

@@ -896,11 +901,105 @@ def extend_db_timestamps(db_timestamp, output_directory):
896901
json.dump(existing_timestamps, f)
897902

898903

899-
def extend_db_json_files(project_timestamps, output_directory):
904+
def per_fuzzer_coverage_analysis(project_name: str,
905+
coverages: Dict[str, List[Tuple[int, str]]],
906+
lost_fuzzers):
907+
"""Go through the recent coverage results and combine them into a short summary.
908+
Including an assessment if the fuzzer got worse over time.
909+
"""
910+
911+
# TODO This might not be a good metric when coverage is not meaningful,
912+
# for example for very small projects or projects that have low coverage
913+
# already. Though, this might not be super bad as we are taking a look
914+
# at per fuzzer coverage, which is should already be normalized to what
915+
# can be reached.
916+
# TODO What would be a good percentage to mark as coverage degradation,
917+
# taking 5% for now but should be observed, maybe per it should be
918+
# configurable per project as well.
919+
results = {}
920+
for ff, data in coverages.items():
921+
if len(data) > 0:
922+
values = [dd[0] for dd in data]
923+
dates = [dd[1] for dd in data]
924+
latest_date_with_value = next(dd[1] for dd in reversed(data)
925+
if dd[0] is not None)
926+
if latest_date_with_value is not None:
927+
report_url = oss_fuzz.get_fuzzer_code_coverage_summary_url(
928+
project_name, latest_date_with_value.replace('-', ''), ff)
929+
report_url = report_url[:-len('summary.json')] + 'index.html'
930+
else:
931+
report_url = None
932+
max_cov = max(values[:-1], default=0)
933+
avg_cov = round(statistics.fmean(values), 2)
934+
current = values[-1]
935+
results[ff] = {
936+
'report_url': report_url,
937+
'report_date': latest_date_with_value,
938+
'coverages_values': values,
939+
'coverages_dates': dates,
940+
'max': max_cov,
941+
'avg': avg_cov,
942+
'current': current,
943+
'has_degraded':
944+
(max_cov - current) > FUZZER_COVERAGE_IS_DEGRADED,
945+
'got_lost': ff in lost_fuzzers,
946+
}
947+
return results
948+
949+
950+
def calculate_recent_results(projects_with_new_results, timestamps,
951+
num_days: int):
952+
"""Analyse recent project data to detect possible degradations of fuzzer efficiency."""
953+
from collections import defaultdict
954+
955+
data: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
956+
for pt in timestamps:
957+
project_name = pt['project_name']
958+
if project_name in projects_with_new_results:
959+
data[project_name][pt['date']] = pt
960+
961+
results = {}
962+
for project_name, project_data in data.items():
963+
fuzzers_past = set()
964+
fuzzers_current: Set[str] = set()
965+
per_fuzzer_coverages = defaultdict(list)
966+
967+
for do in (get_date_at_offset_as_str(ii)
968+
for ii in range(-num_days, 0, 1)):
969+
try:
970+
date_data = project_data[do]
971+
per_fuzzer_coverage_data = date_data[
972+
'per-fuzzer-coverage-data']
973+
974+
fuzzers_past |= fuzzers_current
975+
fuzzers_current = set(per_fuzzer_coverage_data.keys())
976+
977+
for ff, cov_data in per_fuzzer_coverage_data.items():
978+
try:
979+
perc = round(
980+
100 * cov_data['covered'] / cov_data['count'], 2)
981+
except:
982+
perc = 0
983+
984+
per_fuzzer_coverages[ff].append((perc, do))
985+
except:
986+
continue
987+
988+
fuzzer_diff = fuzzers_past - fuzzers_current
989+
per_fuzzer_coverages = per_fuzzer_coverage_analysis(
990+
project_name, per_fuzzer_coverages, fuzzer_diff)
991+
992+
results[project_name] = per_fuzzer_coverages
993+
994+
return results
995+
996+
997+
def extend_db_json_files(project_timestamps, output_directory,
998+
should_include_details):
900999
"""Extends a set of DB .json files."""
9011000

9021001
existing_timestamps = []
903-
logging.info('Loading existing timestamps 1')
1002+
logging.info('Loading existing timestamps')
9041003
if os.path.isfile(
9051004
os.path.join(output_directory, DB_JSON_ALL_PROJECT_TIMESTAMP)):
9061005
with open(
@@ -919,10 +1018,11 @@ def extend_db_json_files(project_timestamps, output_directory):
9191018
existing_timestamp_mapping = dict()
9201019

9211020
for es in existing_timestamps:
922-
if not es['project_name'] in existing_timestamp_mapping:
1021+
if es['project_name'] not in existing_timestamp_mapping:
9231022
existing_timestamp_mapping[es['project_name']] = set()
9241023
existing_timestamp_mapping[es['project_name']].add(es['date'])
9251024

1025+
projects_with_new_results = set()
9261026
for new_ts in project_timestamps:
9271027
to_add = True
9281028

@@ -932,24 +1032,44 @@ def extend_db_json_files(project_timestamps, output_directory):
9321032
to_add = False
9331033
if to_add:
9341034
existing_timestamps.append(new_ts)
1035+
projects_with_new_results.add(new_ts['project_name'])
9351036
have_added = True
9361037

9371038
if FI_EXCLUDE_ALL_NON_MUSTS:
938-
new_timestamps = []
1039+
# Filter existing timstamps to to only those in MUST_INCLUDES.
1040+
kept_timestamps = []
9391041
for ts in existing_timestamps:
9401042
if ts['project_name'] in MUST_INCLUDES:
941-
new_timestamps.append(ts)
942-
existing_timestamps = new_timestamps
1043+
kept_timestamps.append(ts)
1044+
existing_timestamps = kept_timestamps
9431045

944-
new_project_stamps = []
1046+
# Also filter the current project results.
1047+
kept_project_stamps = []
9451048
for project_stamp in project_timestamps:
9461049
if project_stamp['project_name'] in MUST_INCLUDES:
947-
new_project_stamps.append(project_stamp)
948-
project_timestamps = new_project_stamps
1050+
kept_project_stamps.append(project_stamp)
1051+
project_timestamps = kept_project_stamps
1052+
1053+
if should_include_details:
1054+
recent_results = calculate_recent_results(projects_with_new_results,
1055+
existing_timestamps,
1056+
NUM_RECENT_DAYS)
1057+
# TODO these results might detect issues that should be communicated with
1058+
# project maintainers. The best approach might be to load the
1059+
# project_timestamps file (all-project-current.json)
1060+
# separately and load recent results there and maybe issue warnings.
1061+
for pt in project_timestamps:
1062+
try:
1063+
pt['recent_results'] = recent_results.get(pt['project_name'])
1064+
except Exception as exc:
1065+
logger.warning(
1066+
f'Could not get recent results for {pt["project_name"]}: {exc}'
1067+
)
1068+
else:
1069+
recent_results = None
9491070

950-
logging.info('Dumping all current projects')
951-
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT_FUNCS),
952-
'w') as f:
1071+
logging.info('Dumping current project data')
1072+
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT), 'w') as f:
9531073
json.dump(project_timestamps, f)
9541074

9551075
# Remove any light-introspector files because they should not be saved in the
@@ -1017,7 +1137,8 @@ def update_db_files(db_timestamp,
10171137
f.write(json.dumps(all_header_files))
10181138

10191139
logging.info('Extending DB json files')
1020-
extend_db_json_files(project_timestamps, output_directory)
1140+
extend_db_json_files(project_timestamps, output_directory,
1141+
should_include_details)
10211142

10221143
logging.info('Extending DB time stamps')
10231144
extend_db_timestamps(db_timestamp, output_directory)

tools/web-fuzzing-introspection/app/webapp/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ def load_db() -> None:
6060
project_name=project_timestamp['project_name'],
6161
language=project_timestamp['language'],
6262
coverage_data=project_timestamp['coverage-data'],
63-
per_fuzzer_coverage_data=project_timestamp.get(
64-
'per-fuzzer-coverage-data', None),
6563
introspector_data=project_timestamp['introspector-data'],
6664
fuzzer_count=project_timestamp['fuzzer-count'],
6765
introspector_url=project_timestamp.get('introspector_url',
@@ -87,8 +85,9 @@ def load_db() -> None:
8785
introspector_data=project_timestamp['introspector-data'],
8886
fuzzer_count=project_timestamp['fuzzer-count'],
8987
project_repository=project_timestamp['project_repository'],
90-
light_analysis=project_timestamp.get('light-introspector',
91-
{})))
88+
light_analysis=project_timestamp.get('light-introspector', {}),
89+
recent_results=project_timestamp.get('recent_results'),
90+
))
9291

9392
introspector_data = project_timestamp.get('introspector-data', None)
9493
if introspector_data is None:

tools/web-fuzzing-introspection/app/webapp/models.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def __init__(self, name: str, language: str, date: str,
3030
introspector_data: Optional[Dict[str,
3131
Any]], fuzzer_count: int,
3232
project_repository: Optional[str], light_analysis: Dict[Any,
33-
Any]):
33+
Any],
34+
recent_results: Optional[Dict[str, Any]]):
3435
self.name = name
3536
self.language = language
3637
self.date = date
@@ -39,9 +40,13 @@ def __init__(self, name: str, language: str, date: str,
3940
self.fuzzer_count = fuzzer_count
4041
self.project_repository = project_repository
4142
self.light_analysis = light_analysis
43+
self.recent_results = recent_results
4244

4345
def has_introspector(self) -> bool:
44-
return self.introspector_data != None
46+
return self.introspector_data is not None
47+
48+
def has_recent_results(self) -> bool:
49+
return self.recent_results is not None
4550

4651

4752
class DBTimestamp:
@@ -78,7 +83,6 @@ def __init__(self,
7883
date: str,
7984
language: str,
8085
coverage_data: Optional[Dict[str, Any]],
81-
per_fuzzer_coverage_data: Optional[Dict[str, Dict[str, Any]]],
8286
introspector_data: Optional[Dict[str, Any]],
8387
fuzzer_count: int,
8488
introspector_url: Optional[str] = None,
@@ -89,7 +93,6 @@ def __init__(self,
8993
self.date = date
9094
self.language = language
9195
self.coverage_data = coverage_data
92-
self.per_fuzzer_coverage_data = per_fuzzer_coverage_data
9396
self.introspector_data = introspector_data
9497
self.fuzzer_count = fuzzer_count
9598
self.introspector_url = introspector_url

tools/web-fuzzing-introspection/app/webapp/routes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,8 @@ def project_profile():
574574
coverage_data=None,
575575
introspector_data=None,
576576
project_repository=None,
577-
light_analysis={})
577+
light_analysis={},
578+
recent_results=None)
578579

579580
# Get statistics of the project
580581
project_statistics = data_storage.PROJECT_TIMESTAMPS

0 commit comments

Comments
 (0)