Skip to content

Commit

Permalink
Merge pull request #150 from intel/report-format
Browse files Browse the repository at this point in the history
Align formatting of all reports
  • Loading branch information
Pennycook authored Jan 22, 2025
2 parents 6c25257 + e9edf80 commit 6450b98
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 32 deletions.
23 changes: 11 additions & 12 deletions codebasin/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,11 @@ def _main():
default=[],
choices=["all", "summary", "clustering", "duplicates", "files"],
help=_help_string(
"Generate a report of the specified type.",
"Generate a report of the specified type:",
"- summary: code divergence information",
"- clustering: distance matrix and dendrogram",
"- duplicates: detected duplicate files",
"- files: information about individual files",
"May be specified multiple times.",
"If not specified, all reports will be generated.",
is_long=True,
Expand Down Expand Up @@ -264,10 +268,8 @@ def _main():
stdout_handler.setFormatter(Formatter(colors=sys.stdout.isatty()))
log.addHandler(stdout_handler)

# If no specific report was specified, generate all reports.
# Handled here to prevent "all" always being in the list.
if len(args.reports) == 0:
args.reports = ["all"]
if "all" in args.reports:
log.warning("Passing 'all' to -R is deprecated. Omit -R instead.")

# Determine the root directory based on where codebasin is run.
rootdir = os.path.abspath(os.getcwd())
Expand Down Expand Up @@ -322,22 +324,21 @@ def _main():
# Generate meta-warnings and statistics.
# Temporarily override log_level to ensure they are visible.
stdout_handler.setLevel(logging.WARNING)
print("")
aggregator.warn()
stdout_handler.setLevel(log_level)

# Count lines for platforms
setmap = state.get_setmap(codebase)

def report_enabled(name):
if "all" in args.reports:
if "all" in args.reports or len(args.reports) == 0:
return True
return name in args.reports

# Print summary report
if report_enabled("summary"):
summary = report.summary(setmap)
if summary is not None:
print(summary)
report.summary(setmap)

# Print files report
if report_enabled("files"):
Expand All @@ -350,9 +351,7 @@ def report_enabled(name):
output_prefix = "-".join([filename] + args.platforms)

clustering_output_name = output_prefix + "-dendrogram.png"
clustering = report.clustering(clustering_output_name, setmap)
if clustering is not None:
print(clustering)
report.clustering(clustering_output_name, setmap)

# Print duplicates report
if report_enabled("duplicates"):
Expand Down
82 changes: 67 additions & 15 deletions codebasin/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,28 @@
log = logging.getLogger(__name__)


def _heading(text: str, stream: TextIO):
"""
Parameters
----------
text: str
The text to use as the heading.
stream: TextIO
The stream the heading will eventually be written to.
Returns
-------
str
A heading string appropriately formatted for the output stream.
"""
if stream.isatty():
return f"\033[1m\033[4m{text}\033[0m\n"
else:
underline = "=" * len(text)
return f"{text}\n{underline}"


def extract_platforms(setmap):
"""
Extract a list of unique platforms from a set map
Expand Down Expand Up @@ -137,11 +159,20 @@ def normalized_utilization(
return utilization(setmap) / total_platforms


def summary(setmap):
def summary(setmap: defaultdict[str, int], stream: TextIO = sys.stdout):
"""
Produce a summary report for the platform set
Produce a summary report for the platform set, including
a breakdown of SLOC per platform subset, code divergence, etc.
Parameters
----------
setmap: defaultdict[str, int]
The setmap used to compute the summary report.
stream: TextIO, default: sys.stdout
The stream to write the report to.
"""
lines = []
lines = ["", _heading("Summary", stream)]

total = sum(setmap.values())
data = []
Expand Down Expand Up @@ -171,13 +202,30 @@ def summary(setmap):
lines += [f"Unused Code (%): {unused:.2f}"]
lines += [f"Total SLOC: {total_count}"]

return "\n".join(lines)
print("\n".join(lines), file=stream)


def clustering(output_name, setmap):
def clustering(
output_name: str,
setmap: defaultdict[str, int],
stream: TextIO = sys.stdout,
):
"""
Produce a clustering report for the platform set
Produce a clustering report for the platform set.
Parameters
----------
output_name: str
The filename for the dendrogram.
setmap: defaultdict[str, int]
The setmap used to compute the clustering statistics.
stream: TextIO, default: sys.stdout
The stream to write the report to.
"""
lines = ["", _heading("Clustering", stream)]

# Sort the platform list to ensure that the ordering of platforms in the
# distance matrix and dendrogram do not change from run to run
platforms = sorted(extract_platforms(setmap))
Expand Down Expand Up @@ -207,8 +255,7 @@ def clustering(output_name, setmap):
]

# Print distance matrix as a table
lines = []
lines += ["", "Distance Matrix"]
lines += ["Distance Matrix:"]
labelled_matrix = [
[name] + [f"{column:.2f}" for column in matrix[row]]
for (row, name) in enumerate(platforms)
Expand Down Expand Up @@ -243,7 +290,10 @@ def clustering(output_name, setmap):
with util.safe_open_write_binary(output_name) as fp:
fig.savefig(fp)

return "\n".join(lines)
lines += [""]
lines += [f"Dendrogram written to {output_name}"]

print("\n".join(lines), file=stream)


def find_duplicates(codebase: CodeBase) -> list[set[Path]]:
Expand Down Expand Up @@ -304,8 +354,8 @@ def duplicates(codebase: CodeBase, stream: TextIO = sys.stdout):
"""
confirmed_matches = find_duplicates(codebase)

print("Duplicates", file=stream)
print("----------", file=stream)
print("", file=stream)
print(_heading("Duplicates", stream), file=stream)

if len(confirmed_matches) == 0:
print("No duplicates found.", file=stream)
Expand All @@ -315,6 +365,8 @@ def duplicates(codebase: CodeBase, stream: TextIO = sys.stdout):
print(f"Match {i}:", file=stream)
for path in matches:
print(f"- {path}")
if i != len(confirmed_matches) - 1:
print("")


def _human_readable(x: int) -> str:
Expand Down Expand Up @@ -740,17 +792,17 @@ def files(
setmap[frozenset(assoc)] += node.num_lines
tree.insert(f, setmap)

print("Files", file=stream)
print("-----", file=stream)
print("", file=stream)
print(_heading("Files", stream), file=stream)

# Print a legend.
legend = []
legend += ["\033[1mLegend\033[0m:"]
legend += ["Legend:"]
for i, platform in enumerate(sorted(tree.root.platforms)):
label = string.ascii_uppercase[i]
legend += [f"\033[33m{label}\033[0m: {platform}"]
legend += [""]
legend += ["\033[1mColumns\033[0m:"]
legend += ["Columns:"]
header = [
"Platform Set",
"Used SLOC / Total SLOC",
Expand Down
9 changes: 4 additions & 5 deletions docs/source/cmd.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@ Command Line Interface
``-R <report>``
Generate a report of the specified type.

- ``summary``: output only code divergence information.
- ``clustering``: output only distance matrix and dendrogram.
- ``duplicates``: output only detected duplicate files.
- ``files``: output only information about individual files.
- ``all``: generate all available reports.
- ``summary``: code divergence information
- ``clustering``: distance matrix and dendrogram
- ``duplicates``: detected duplicate files
- ``files``: information about individual files

``-x <pattern>, --exclude <pattern>``
Exclude files matching this pattern from the code base.
Expand Down

0 comments on commit 6450b98

Please sign in to comment.