Merge pull request #155 from rhpvorderman/pairedreportorder

rhpvorderman · web-flow · commit 0f394e2e055d · 2024-05-21T07:19:08.000+02:00
Paired end report has the same order as single end report.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -9,6 +9,9 @@ Changelog
 
 version 0.9.0-dev
 -----------------
++ Sort modules for paired end reports in the same order as single end reports.
+  For example, the sequence length distributions for read 1 and read 2 are now
+  right after each other.
 + Add common human genome repeats and Illumina poly-G dark cycles to the
   overrepresented sequences database.
 + Illumina adapter trimming sequences were added to the contaminants database
diff --git a/src/sequali/report_modules.py b/src/sequali/report_modules.py
@@ -146,7 +146,10 @@ def create_toc(content: str):
             if header_level != current_level:
                 if header_level > current_level:
                     for i in range(header_level - current_level):
-                        toc.write('<li><ul class="toc_list">')
+                        # List style type: none prevents seeing two bullets in
+                        # front of the list item.
+                        toc.write('<li style="list-style-type:none;">'
+                                  '<ul class="toc_list">')
                 else:
                     for i in range(current_level - header_level):
                         toc.write("</ul></li>")
@@ -1893,12 +1896,12 @@ def to_html(self) -> str:
             <th>Adapter Sequence</th><th>Best match</th></tr>
             <tr>
                 <td>Read 1</td>
-                <td>{self.longest_adapter_read1}</td>
+                <td style="font-family:monospace;">{self.longest_adapter_read1}</td>
                 <td>{self.longest_adapter_read1_match}</td>
             </tr>
             <tr>
                 <td>Read 2</td>
-                <td>{self.longest_adapter_read2}</td>
+                <td style="font-family:monospace;">{self.longest_adapter_read2}</td>
                 <td>{self.longest_adapter_read2_match}</td>
             </tr>
             </table>
@@ -1910,7 +1913,7 @@ def to_html(self) -> str:
         for adapter, count in self.adapters_read1:
             report.write(
                 f"""<tr>
-                        <td>{adapter}</td>
+                        <td style="font-family:monospace;">{adapter}</td>
                         <td style="text-align:right;">{count}</td>
                     </tr>
                 """)
@@ -1922,7 +1925,7 @@ def to_html(self) -> str:
         for adapter, count in self.adapters_read2:
             report.write(
                 f"""<tr>
-                        <td>{adapter}</td>
+                        <td style="font-family:monospace;">{adapter}</td>
                         <td style="text-align:right;">{count}</td>
                     </tr>
                 """)
@@ -1952,6 +1955,33 @@ def to_html(self) -> str:
 CLASS_TO_NAME: Dict[Type[ReportModule], str] = {
     value: key for key, value in NAME_TO_CLASS.items()}
 
+CLASS_TO_ORDER = {
+    Meta: 0,
+    Summary: 1,
+    SequenceLengthDistribution: 2,
+    PerBaseQualityScoreDistribution: 3,
+    PerPositionMeanQualityAndSpread: 4,
+    PerSequenceAverageQualityScores: 5,
+    PerPositionBaseContent: 6,
+    PerPositionNContent: 7,
+    PerSequenceGCContent: 8,
+    AdapterContent: 9,
+    AdapterFromOverlapReport: 10,
+    InsertSizeMetricsReport: 11,
+    PerTileQualityReport: 12,
+    DuplicationCounts: 13,
+    OverRepresentedSequences: 14,
+    NanoStatsReport: 15,
+}
+
+
+def module_sort_key(m: ReportModule):
+    prio = CLASS_TO_ORDER[type(m)]
+    read_pair_info = ""
+    if hasattr(m, "read_pair_info"):
+        read_pair_info = getattr(m, "read_pair_info")
+    return prio, read_pair_info
+
 
 def report_modules_to_dict(report_modules: Iterable[ReportModule]):
     def get_name(module: ReportModule) -> str:
@@ -2113,39 +2143,31 @@ def calculate_stats(
         data_ranges = list(equidistant_ranges(max_length, graph_resolution))
     modules = [
         Meta.from_filepath(filename, filename_reverse),
-    ]
-    # Generic modules for both read1 and read2 come first.
-    if insert_size_metrics:
-        modules.append(
-            AdapterFromOverlapReport.from_insert_size_metrics(insert_size_metrics))
-        modules.append(
-            InsertSizeMetricsReport.from_insert_size_metrics(insert_size_metrics))
-    if filename_reverse:
-        modules.append(
-            DuplicationCounts.from_dedup_estimator(dedup_estimator),
-        )
-    modules.extend(qc_metrics_modules(metrics, data_ranges,
-                                      read_pair_info=read_pair_info1))
-    if adapter_counter:
-        modules.append(
-            AdapterContent.from_adapter_counter_adapters_and_ranges(
-                adapter_counter, adapters, data_ranges, read_pair_info=read_pair_info1)
-        )
-    modules.append(PerTileQualityReport.from_per_tile_quality_and_ranges(
-        per_tile_quality, data_ranges, read_pair_info=read_pair_info1),)
-    if not filename_reverse:
-        modules.append(
-            DuplicationCounts.from_dedup_estimator(dedup_estimator)
-        )
-    modules.append(
+        *qc_metrics_modules(metrics, data_ranges, read_pair_info=read_pair_info1),
+        PerTileQualityReport.from_per_tile_quality_and_ranges(
+            per_tile_quality, data_ranges, read_pair_info=read_pair_info1),
         OverRepresentedSequences.from_sequence_duplication(
             sequence_duplication,
             fraction_threshold=fraction_threshold,
             min_threshold=min_threshold,
             max_threshold=max_threshold,
             read_pair_info=read_pair_info1,
+        ),
+        DuplicationCounts.from_dedup_estimator(dedup_estimator),
+        NanoStatsReport.from_nanostats(nanostats)
+    ]
+    if adapter_counter:
+        modules.append(
+            AdapterContent.from_adapter_counter_adapters_and_ranges(
+                adapter_counter, adapters, data_ranges, read_pair_info=read_pair_info1)
         )
-    )
+
+    # Generic modules for both read1 and read2 come first.
+    if insert_size_metrics:
+        modules.append(
+            AdapterFromOverlapReport.from_insert_size_metrics(insert_size_metrics))
+        modules.append(
+            InsertSizeMetricsReport.from_insert_size_metrics(insert_size_metrics))
 
     if (metrics_reverse and per_tile_quality_reverse and
             sequence_duplication_reverse):
@@ -2168,6 +2190,5 @@ def calculate_stats(
             max_threshold=max_threshold,
             read_pair_info=READ2,
         ))
-
-    modules.append(NanoStatsReport.from_nanostats(nanostats))
+    modules.sort(key=module_sort_key)
     return modules
diff --git a/tox.ini b/tox.ini
@@ -78,5 +78,6 @@ allowlist_externals=bash
 commands=
     mkdir -p reports
     bash -c 'for FILE in tests/data/*.fastq tests/data/*.fastq.gz tests/data/*.bam; do sequali --outdir reports $FILE; done'
+    sequali --outdir reports tests/data/LTB-A-BC001_S1_L003_R1_001.fastq.gz tests/data/LTB-A-BC001_S1_L003_R2_001.fastq.gz
     # || exit 1 needed to force crashes when html5validator fails with 255
     bash -c 'html5validator -- reports/*.html || exit 1'