Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

per fuzzer coverage information in project overview #2102

Merged
merged 4 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions tools/web-fuzzing-introspection/app/static/assets/db/oss_fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ def get_code_coverage_summary_url(project_name, datestr):
return project_url


def get_fuzzer_code_coverage_summary_url(project_name, datestr, fuzzer):
base_url = 'https://storage.googleapis.com/oss-fuzz-coverage/{0}/reports-by-target/{1}/{2}/linux/summary.json'
project_url = base_url.format(project_name, datestr, fuzzer)
return project_url


def get_coverage_report_url(project_name, datestr, language):
if language == 'java' or language == 'python' or language == 'go':
file_report = "index.html"
Expand Down Expand Up @@ -256,6 +262,20 @@ def get_code_coverage_summary(project_name, datestr):
return None


def get_fuzzer_code_coverage_summary(project_name, datestr, fuzzer):
cov_summary_url = get_fuzzer_code_coverage_summary_url(
project_name, datestr, fuzzer)
try:
coverage_summary_raw = requests.get(cov_summary_url, timeout=20).text
except:
return None
try:
json_dict = json.loads(coverage_summary_raw)
return json_dict
except:
return None


def extract_new_introspector_functions(project_name, date_str):
introspector_functions_url = get_introspector_report_url_all_functions(
project_name, date_str.replace("-", ""))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
import subprocess
import zipfile
import tarfile
import statistics
from pathlib import Path
from threading import Thread
from typing import List, Any, Optional, Dict
from typing import List, Any, Optional, Dict, Tuple, Set

import constants
import oss_fuzz
Expand All @@ -34,7 +36,7 @@
DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
DB_JSON_ALL_CURRENT = 'all-project-current.json'
DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
DB_BUILD_STATUS_JSON = 'build-status.json'
#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
Expand All @@ -44,7 +46,7 @@
DB_JSON_ALL_PROJECT_TIMESTAMP,
DB_JSON_ALL_FUNCTIONS,
DB_JSON_ALL_CONSTRUCTORS,
DB_JSON_ALL_CURRENT_FUNCS,
DB_JSON_ALL_CURRENT,
]

INTROSPECTOR_WEBAPP_ZIP = (
Expand All @@ -53,6 +55,9 @@
FI_EXCLUDE_ALL_NON_MUSTS = bool(int(os.getenv('FI_EXCLUDE_ALL_NON_MUSTS',
'0')))

NUM_RECENT_DAYS = 30
FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation

MUST_INCLUDES = set()
MUST_INCLUDE_WITH_LANG: List[Any] = []

Expand Down Expand Up @@ -283,9 +288,8 @@ def extract_and_refine_functions(all_function_list, date_str):
return refined_proj_list


def extract_code_coverage_data(code_coverage_summary, project_name, date_str,
project_language) -> Optional[Dict[str, Any]]:
"""Gets coverage URL and line coverage total of a project"""
def extract_code_coverage_data(code_coverage_summary):
"""Extract the coverage data from a loaded coverage summary.json"""
# Extract data from the code coverage reports
if code_coverage_summary is None:
return None
Expand All @@ -307,8 +311,19 @@ def extract_code_coverage_data(code_coverage_summary, project_name, date_str,
except:
pass

return line_total_summary


def prepare_code_coverage_dict(
code_coverage_summary, project_name: str, date_str: str,
project_language: str) -> Optional[Dict[str, Any]]:
"""Gets coverage URL and line coverage total of a project"""
line_total_summary = extract_code_coverage_data(code_coverage_summary)
if line_total_summary is None:
return None

coverage_url = oss_fuzz.get_coverage_report_url(project_name,
date_str.replace("-", ""),
date_str.replace('-', ''),
project_language)
code_coverage_data_dict = {
'coverage_url': coverage_url,
Expand Down Expand Up @@ -442,7 +457,7 @@ def extract_local_project_data(project_name, oss_fuzz_path,
project_name
}

code_coverage_data_dict = extract_code_coverage_data(
code_coverage_data_dict = prepare_code_coverage_dict(
code_coverage_summary, project_name, '', project_language)

if cov_fuzz_stats is not None:
Expand All @@ -465,8 +480,8 @@ def extract_local_project_data(project_name, oss_fuzz_path,
dictionary_key = '%s###%s' % (project_name, '')
manager_return_dict[dictionary_key] = {
'project_timestamp': project_timestamp,
"introspector-data-dict": introspector_data_dict,
"coverage-data-dict": code_coverage_data_dict,
'introspector-data-dict': introspector_data_dict,
'coverage-data-dict': code_coverage_data_dict,
'all-header-files': all_header_files,
}

Expand Down Expand Up @@ -704,20 +719,30 @@ def extract_project_data(project_name, date_str, should_include_details,
'project_name': project_name
}

code_coverage_data_dict = extract_code_coverage_data(
code_coverage_data_dict = prepare_code_coverage_dict(
code_coverage_summary, project_name, date_str, project_language)

per_fuzzer_cov = {}
if cov_fuzz_stats is not None:
all_fuzzers = cov_fuzz_stats.split("\n")
if all_fuzzers[-1] == '':
all_fuzzers = all_fuzzers[0:-1]
amount_of_fuzzers = len(all_fuzzers)
for ff in all_fuzzers:
try:
fuzzer_cov = oss_fuzz.get_fuzzer_code_coverage_summary(
project_name, date_str.replace("-", ""), ff)
fuzzer_cov_data = extract_code_coverage_data(fuzzer_cov)
per_fuzzer_cov[ff] = fuzzer_cov_data
except:
pass

project_timestamp = {
"project_name": project_name,
"date": date_str,
'language': project_language,
'coverage-data': code_coverage_data_dict,
'per-fuzzer-coverage-data': per_fuzzer_cov,
'introspector-data': introspector_data_dict,
'fuzzer-count': amount_of_fuzzers,
'project_repository': project_repository,
Expand Down Expand Up @@ -878,11 +903,105 @@ def extend_db_timestamps(db_timestamp, output_directory):
json.dump(existing_timestamps, f)


def extend_db_json_files(project_timestamps, output_directory):
def per_fuzzer_coverage_analysis(project_name: str,
coverages: Dict[str, List[Tuple[int, str]]],
lost_fuzzers):
"""Go through the recent coverage results and combine them into a short summary.
Including an assessment if the fuzzer got worse over time.
"""

# TODO This might not be a good metric when coverage is not meaningful,
# for example for very small projects or projects that have low coverage
# already. Though, this might not be super bad as we are taking a look
# at per fuzzer coverage, which is should already be normalized to what
# can be reached.
# TODO What would be a good percentage to mark as coverage degradation,
# taking 5% for now but should be observed, maybe per it should be
# configurable per project as well.
results = {}
for ff, data in coverages.items():
if len(data) > 0:
values = [dd[0] for dd in data]
dates = [dd[1] for dd in data]
latest_date_with_value = next(dd[1] for dd in reversed(data)
if dd[0] is not None)
if latest_date_with_value is not None:
report_url = oss_fuzz.get_fuzzer_code_coverage_summary_url(
project_name, latest_date_with_value.replace('-', ''), ff)
report_url = report_url[:-len('summary.json')] + 'index.html'
else:
report_url = None
max_cov = max(values[:-1], default=0)
avg_cov = round(statistics.fmean(values), 2)
current = values[-1]
results[ff] = {
'report_url': report_url,
'report_date': latest_date_with_value,
'coverages_values': values,
'coverages_dates': dates,
'max': max_cov,
'avg': avg_cov,
'current': current,
'has_degraded':
(max_cov - current) > FUZZER_COVERAGE_IS_DEGRADED,
'got_lost': ff in lost_fuzzers,
}
return results


def calculate_recent_results(projects_with_new_results, timestamps,
num_days: int):
"""Analyse recent project data to detect possible degradations of fuzzer efficiency."""
from collections import defaultdict

data: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
for pt in timestamps:
project_name = pt['project_name']
if project_name in projects_with_new_results:
data[project_name][pt['date']] = pt

results = {}
for project_name, project_data in data.items():
fuzzers_past = set()
fuzzers_current: Set[str] = set()
per_fuzzer_coverages = defaultdict(list)

for do in (get_date_at_offset_as_str(ii)
for ii in range(-num_days, 0, 1)):
try:
date_data = project_data[do]
per_fuzzer_coverage_data = date_data[
'per-fuzzer-coverage-data']

fuzzers_past |= fuzzers_current
fuzzers_current = set(per_fuzzer_coverage_data.keys())

for ff, cov_data in per_fuzzer_coverage_data.items():
try:
perc = round(
100 * cov_data['covered'] / cov_data['count'], 2)
except:
perc = 0

per_fuzzer_coverages[ff].append((perc, do))
except:
continue

fuzzer_diff = fuzzers_past - fuzzers_current
per_fuzzer_coverages = per_fuzzer_coverage_analysis(
project_name, per_fuzzer_coverages, fuzzer_diff)

results[project_name] = per_fuzzer_coverages

return results


def extend_db_json_files(project_timestamps, output_directory,
should_include_details):
"""Extends a set of DB .json files."""

existing_timestamps = []
logging.info('Loading existing timestamps 1')
logging.info('Loading existing timestamps')
if os.path.isfile(
os.path.join(output_directory, DB_JSON_ALL_PROJECT_TIMESTAMP)):
with open(
Expand All @@ -901,10 +1020,11 @@ def extend_db_json_files(project_timestamps, output_directory):
existing_timestamp_mapping = dict()

for es in existing_timestamps:
if not es['project_name'] in existing_timestamp_mapping:
if es['project_name'] not in existing_timestamp_mapping:
existing_timestamp_mapping[es['project_name']] = set()
existing_timestamp_mapping[es['project_name']].add(es['date'])

projects_with_new_results = set()
for new_ts in project_timestamps:
to_add = True

Expand All @@ -914,24 +1034,44 @@ def extend_db_json_files(project_timestamps, output_directory):
to_add = False
if to_add:
existing_timestamps.append(new_ts)
projects_with_new_results.add(new_ts['project_name'])
have_added = True

if FI_EXCLUDE_ALL_NON_MUSTS:
new_timestamps = []
# Filter existing timstamps to to only those in MUST_INCLUDES.
kept_timestamps = []
for ts in existing_timestamps:
if ts['project_name'] in MUST_INCLUDES:
new_timestamps.append(ts)
existing_timestamps = new_timestamps
kept_timestamps.append(ts)
existing_timestamps = kept_timestamps

new_project_stamps = []
# Also filter the current project results.
kept_project_stamps = []
for project_stamp in project_timestamps:
if project_stamp['project_name'] in MUST_INCLUDES:
new_project_stamps.append(project_stamp)
project_timestamps = new_project_stamps
kept_project_stamps.append(project_stamp)
project_timestamps = kept_project_stamps

if should_include_details:
recent_results = calculate_recent_results(projects_with_new_results,
existing_timestamps,
NUM_RECENT_DAYS)
# TODO these results might detect issues that should be communicated with
# project maintainers. The best approach might be to load the
# project_timestamps file (all-project-current.json)
# separately and load recent results there and maybe issue warnings.
for pt in project_timestamps:
try:
pt['recent_results'] = recent_results.get(pt['project_name'])
except Exception as exc:
logger.warning(
f'Could not get recent results for {pt["project_name"]}: {exc}'
)
else:
recent_results = None

logging.info('Dumping all current projects')
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT_FUNCS),
'w') as f:
logging.info('Dumping current project data')
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT), 'w') as f:
json.dump(project_timestamps, f)

# Remove any light-introspector files because they should not be saved in the
Expand Down Expand Up @@ -999,7 +1139,8 @@ def update_db_files(db_timestamp,
f.write(json.dumps(all_header_files))

logging.info('Extending DB json files')
extend_db_json_files(project_timestamps, output_directory)
extend_db_json_files(project_timestamps, output_directory,
should_include_details)

logging.info('Extending DB time stamps')
extend_db_timestamps(db_timestamp, output_directory)
Expand Down
5 changes: 3 additions & 2 deletions tools/web-fuzzing-introspection/app/webapp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ def load_db() -> None:
introspector_data=project_timestamp['introspector-data'],
fuzzer_count=project_timestamp['fuzzer-count'],
project_repository=project_timestamp['project_repository'],
light_analysis=project_timestamp.get('light-introspector',
{})))
light_analysis=project_timestamp.get('light-introspector', {}),
recent_results=project_timestamp.get('recent_results'),
))

introspector_data = project_timestamp.get('introspector-data', None)
if introspector_data is None:
Expand Down
9 changes: 7 additions & 2 deletions tools/web-fuzzing-introspection/app/webapp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def __init__(self, name: str, language: str, date: str,
introspector_data: Optional[Dict[str,
Any]], fuzzer_count: int,
project_repository: Optional[str], light_analysis: Dict[Any,
Any]):
Any],
recent_results: Optional[Dict[str, Any]]):
self.name = name
self.language = language
self.date = date
Expand All @@ -39,9 +40,13 @@ def __init__(self, name: str, language: str, date: str,
self.fuzzer_count = fuzzer_count
self.project_repository = project_repository
self.light_analysis = light_analysis
self.recent_results = recent_results

def has_introspector(self) -> bool:
return self.introspector_data != None
return self.introspector_data is not None

def has_recent_results(self) -> bool:
return self.recent_results is not None


class DBTimestamp:
Expand Down
3 changes: 2 additions & 1 deletion tools/web-fuzzing-introspection/app/webapp/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,8 @@ def project_profile():
coverage_data=None,
introspector_data=None,
project_repository=None,
light_analysis={})
light_analysis={},
recent_results=None)

# Get statistics of the project
project_statistics = data_storage.PROJECT_TIMESTAMPS
Expand Down
Loading