Merge pull request #150 from intel/report-format

Align formatting of all reports
intel · Jan 22, 2025 · 6450b98 · 6450b98
2 parents 6c25257 + e9edf80
commit 6450b98
Show file tree

Hide file tree

Showing 3 changed files with 82 additions and 32 deletions.
diff --git a/codebasin/__main__.py b/codebasin/__main__.py
@@ -192,7 +192,11 @@ def _main():
         default=[],
         choices=["all", "summary", "clustering", "duplicates", "files"],
         help=_help_string(
-            "Generate a report of the specified type.",
+            "Generate a report of the specified type:",
+            "- summary: code divergence information",
+            "- clustering: distance matrix and dendrogram",
+            "- duplicates: detected duplicate files",
+            "- files: information about individual files",
             "May be specified multiple times.",
             "If not specified, all reports will be generated.",
             is_long=True,
@@ -264,10 +268,8 @@ def _main():
     stdout_handler.setFormatter(Formatter(colors=sys.stdout.isatty()))
     log.addHandler(stdout_handler)
 
-    # If no specific report was specified, generate all reports.
-    # Handled here to prevent "all" always being in the list.
-    if len(args.reports) == 0:
-        args.reports = ["all"]
+    if "all" in args.reports:
+        log.warning("Passing 'all' to -R is deprecated. Omit -R instead.")
 
     # Determine the root directory based on where codebasin is run.
     rootdir = os.path.abspath(os.getcwd())
@@ -322,22 +324,21 @@ def _main():
     # Generate meta-warnings and statistics.
     # Temporarily override log_level to ensure they are visible.
     stdout_handler.setLevel(logging.WARNING)
+    print("")
     aggregator.warn()
     stdout_handler.setLevel(log_level)
 
     # Count lines for platforms
     setmap = state.get_setmap(codebase)
 
     def report_enabled(name):
-        if "all" in args.reports:
+        if "all" in args.reports or len(args.reports) == 0:
             return True
         return name in args.reports
 
     # Print summary report
     if report_enabled("summary"):
-        summary = report.summary(setmap)
-        if summary is not None:
-            print(summary)
+        report.summary(setmap)
 
     # Print files report
     if report_enabled("files"):
@@ -350,9 +351,7 @@ def report_enabled(name):
         output_prefix = "-".join([filename] + args.platforms)
 
         clustering_output_name = output_prefix + "-dendrogram.png"
-        clustering = report.clustering(clustering_output_name, setmap)
-        if clustering is not None:
-            print(clustering)
+        report.clustering(clustering_output_name, setmap)
 
     # Print duplicates report
     if report_enabled("duplicates"):

diff --git a/codebasin/report.py b/codebasin/report.py
@@ -28,6 +28,28 @@
 log = logging.getLogger(__name__)
 
 
+def _heading(text: str, stream: TextIO):
+    """
+    Parameters
+    ----------
+    text: str
+        The text to use as the heading.
+
+    stream: TextIO
+        The stream the heading will eventually be written to.
+
+    Returns
+    -------
+    str
+        A heading string appropriately formatted for the output stream.
+    """
+    if stream.isatty():
+        return f"\033[1m\033[4m{text}\033[0m\n"
+    else:
+        underline = "=" * len(text)
+        return f"{text}\n{underline}"
+
+
 def extract_platforms(setmap):
     """
     Extract a list of unique platforms from a set map
@@ -137,11 +159,20 @@ def normalized_utilization(
         return utilization(setmap) / total_platforms
 
 
-def summary(setmap):
+def summary(setmap: defaultdict[str, int], stream: TextIO = sys.stdout):
     """
-    Produce a summary report for the platform set
+    Produce a summary report for the platform set, including
+    a breakdown of SLOC per platform subset, code divergence, etc.
+
+    Parameters
+    ----------
+    setmap: defaultdict[str, int]
+        The setmap used to compute the summary report.
+
+    stream: TextIO, default: sys.stdout
+        The stream to write the report to.
     """
-    lines = []
+    lines = ["", _heading("Summary", stream)]
 
     total = sum(setmap.values())
     data = []
@@ -171,13 +202,30 @@ def summary(setmap):
     lines += [f"Unused Code (%): {unused:.2f}"]
     lines += [f"Total SLOC: {total_count}"]
 
-    return "\n".join(lines)
+    print("\n".join(lines), file=stream)
 
 
-def clustering(output_name, setmap):
+def clustering(
+    output_name: str,
+    setmap: defaultdict[str, int],
+    stream: TextIO = sys.stdout,
+):
     """
-    Produce a clustering report for the platform set
+    Produce a clustering report for the platform set.
+
+    Parameters
+    ----------
+    output_name: str
+        The filename for the dendrogram.
+
+    setmap: defaultdict[str, int]
+        The setmap used to compute the clustering statistics.
+
+    stream: TextIO, default: sys.stdout
+        The stream to write the report to.
     """
+    lines = ["", _heading("Clustering", stream)]
+
     # Sort the platform list to ensure that the ordering of platforms in the
     # distance matrix and dendrogram do not change from run to run
     platforms = sorted(extract_platforms(setmap))
@@ -207,8 +255,7 @@ def clustering(output_name, setmap):
     ]
 
     # Print distance matrix as a table
-    lines = []
-    lines += ["", "Distance Matrix"]
+    lines += ["Distance Matrix:"]
     labelled_matrix = [
         [name] + [f"{column:.2f}" for column in matrix[row]]
         for (row, name) in enumerate(platforms)
@@ -243,7 +290,10 @@ def clustering(output_name, setmap):
     with util.safe_open_write_binary(output_name) as fp:
         fig.savefig(fp)
 
-    return "\n".join(lines)
+    lines += [""]
+    lines += [f"Dendrogram written to {output_name}"]
+
+    print("\n".join(lines), file=stream)
 
 
 def find_duplicates(codebase: CodeBase) -> list[set[Path]]:
@@ -304,8 +354,8 @@ def duplicates(codebase: CodeBase, stream: TextIO = sys.stdout):
     """
     confirmed_matches = find_duplicates(codebase)
 
-    print("Duplicates", file=stream)
-    print("----------", file=stream)
+    print("", file=stream)
+    print(_heading("Duplicates", stream), file=stream)
 
     if len(confirmed_matches) == 0:
         print("No duplicates found.", file=stream)
@@ -315,6 +365,8 @@ def duplicates(codebase: CodeBase, stream: TextIO = sys.stdout):
         print(f"Match {i}:", file=stream)
         for path in matches:
             print(f"- {path}")
+        if i != len(confirmed_matches) - 1:
+            print("")
 
 
 def _human_readable(x: int) -> str:
@@ -740,17 +792,17 @@ def files(
                     setmap[frozenset(assoc)] += node.num_lines
         tree.insert(f, setmap)
 
-    print("Files", file=stream)
-    print("-----", file=stream)
+    print("", file=stream)
+    print(_heading("Files", stream), file=stream)
 
     # Print a legend.
     legend = []
-    legend += ["\033[1mLegend\033[0m:"]
+    legend += ["Legend:"]
     for i, platform in enumerate(sorted(tree.root.platforms)):
         label = string.ascii_uppercase[i]
         legend += [f"\033[33m{label}\033[0m: {platform}"]
     legend += [""]
-    legend += ["\033[1mColumns\033[0m:"]
+    legend += ["Columns:"]
     header = [
         "Platform Set",
         "Used SLOC / Total SLOC",

diff --git a/docs/source/cmd.rst b/docs/source/cmd.rst
@@ -28,11 +28,10 @@ Command Line Interface
 ``-R <report>``
     Generate a report of the specified type.
 
-    - ``summary``: output only code divergence information.
-    - ``clustering``: output only distance matrix and dendrogram.
-    - ``duplicates``: output only detected duplicate files.
-    - ``files``: output only information about individual files.
-    - ``all``: generate all available reports.
+    - ``summary``: code divergence information
+    - ``clustering``: distance matrix and dendrogram
+    - ``duplicates``: detected duplicate files
+    - ``files``: information about individual files
 
 ``-x <pattern>, --exclude <pattern>``
     Exclude files matching this pattern from the code base.