Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Paired end report has the same order as single end report. #155

Merged
merged 5 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ Changelog

version 0.9.0-dev
-----------------
+ Sort modules for paired end reports in the same order as single end reports.
For example, the sequence length distributions for read 1 and read 2 are now
right after each other.
+ Add common human genome repeats and Illumina poly-G dark cycles to the
overrepresented sequences database.
+ Illumina adapter trimming sequences were added to the contaminants database
Expand Down
87 changes: 54 additions & 33 deletions src/sequali/report_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ def create_toc(content: str):
if header_level != current_level:
if header_level > current_level:
for i in range(header_level - current_level):
toc.write('<li><ul class="toc_list">')
# List style type: none prevents seeing two bullets in
# front of the list item.
toc.write('<li style="list-style-type:none;">'
'<ul class="toc_list">')
else:
for i in range(current_level - header_level):
toc.write("</ul></li>")
Expand Down Expand Up @@ -1893,12 +1896,12 @@ def to_html(self) -> str:
<th>Adapter Sequence</th><th>Best match</th></tr>
<tr>
<td>Read 1</td>
<td>{self.longest_adapter_read1}</td>
<td style="font-family:monospace;">{self.longest_adapter_read1}</td>
<td>{self.longest_adapter_read1_match}</td>
</tr>
<tr>
<td>Read 2</td>
<td>{self.longest_adapter_read2}</td>
<td style="font-family:monospace;">{self.longest_adapter_read2}</td>
<td>{self.longest_adapter_read2_match}</td>
</tr>
</table>
Expand All @@ -1910,7 +1913,7 @@ def to_html(self) -> str:
for adapter, count in self.adapters_read1:
report.write(
f"""<tr>
<td>{adapter}</td>
<td style="font-family:monospace;">{adapter}</td>
<td style="text-align:right;">{count}</td>
</tr>
""")
Expand All @@ -1922,7 +1925,7 @@ def to_html(self) -> str:
for adapter, count in self.adapters_read2:
report.write(
f"""<tr>
<td>{adapter}</td>
<td style="font-family:monospace;">{adapter}</td>
<td style="text-align:right;">{count}</td>
</tr>
""")
Expand Down Expand Up @@ -1952,6 +1955,33 @@ def to_html(self) -> str:
CLASS_TO_NAME: Dict[Type[ReportModule], str] = {
value: key for key, value in NAME_TO_CLASS.items()}

CLASS_TO_ORDER = {
Meta: 0,
Summary: 1,
SequenceLengthDistribution: 2,
PerBaseQualityScoreDistribution: 3,
PerPositionMeanQualityAndSpread: 4,
PerSequenceAverageQualityScores: 5,
PerPositionBaseContent: 6,
PerPositionNContent: 7,
PerSequenceGCContent: 8,
AdapterContent: 9,
AdapterFromOverlapReport: 10,
InsertSizeMetricsReport: 11,
PerTileQualityReport: 12,
DuplicationCounts: 13,
OverRepresentedSequences: 14,
NanoStatsReport: 15,
}


def module_sort_key(m: ReportModule):
prio = CLASS_TO_ORDER[type(m)]
read_pair_info = ""
if hasattr(m, "read_pair_info"):
read_pair_info = getattr(m, "read_pair_info")
return prio, read_pair_info


def report_modules_to_dict(report_modules: Iterable[ReportModule]):
def get_name(module: ReportModule) -> str:
Expand Down Expand Up @@ -2113,39 +2143,31 @@ def calculate_stats(
data_ranges = list(equidistant_ranges(max_length, graph_resolution))
modules = [
Meta.from_filepath(filename, filename_reverse),
]
# Generic modules for both read1 and read2 come first.
if insert_size_metrics:
modules.append(
AdapterFromOverlapReport.from_insert_size_metrics(insert_size_metrics))
modules.append(
InsertSizeMetricsReport.from_insert_size_metrics(insert_size_metrics))
if filename_reverse:
modules.append(
DuplicationCounts.from_dedup_estimator(dedup_estimator),
)
modules.extend(qc_metrics_modules(metrics, data_ranges,
read_pair_info=read_pair_info1))
if adapter_counter:
modules.append(
AdapterContent.from_adapter_counter_adapters_and_ranges(
adapter_counter, adapters, data_ranges, read_pair_info=read_pair_info1)
)
modules.append(PerTileQualityReport.from_per_tile_quality_and_ranges(
per_tile_quality, data_ranges, read_pair_info=read_pair_info1),)
if not filename_reverse:
modules.append(
DuplicationCounts.from_dedup_estimator(dedup_estimator)
)
modules.append(
*qc_metrics_modules(metrics, data_ranges, read_pair_info=read_pair_info1),
PerTileQualityReport.from_per_tile_quality_and_ranges(
per_tile_quality, data_ranges, read_pair_info=read_pair_info1),
OverRepresentedSequences.from_sequence_duplication(
sequence_duplication,
fraction_threshold=fraction_threshold,
min_threshold=min_threshold,
max_threshold=max_threshold,
read_pair_info=read_pair_info1,
),
DuplicationCounts.from_dedup_estimator(dedup_estimator),
NanoStatsReport.from_nanostats(nanostats)
]
if adapter_counter:
modules.append(
AdapterContent.from_adapter_counter_adapters_and_ranges(
adapter_counter, adapters, data_ranges, read_pair_info=read_pair_info1)
)
)

# Generic modules for both read1 and read2 come first.
if insert_size_metrics:
modules.append(
AdapterFromOverlapReport.from_insert_size_metrics(insert_size_metrics))
modules.append(
InsertSizeMetricsReport.from_insert_size_metrics(insert_size_metrics))

if (metrics_reverse and per_tile_quality_reverse and
sequence_duplication_reverse):
Expand All @@ -2168,6 +2190,5 @@ def calculate_stats(
max_threshold=max_threshold,
read_pair_info=READ2,
))

modules.append(NanoStatsReport.from_nanostats(nanostats))
modules.sort(key=module_sort_key)
return modules
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,6 @@ allowlist_externals=bash
commands=
mkdir -p reports
bash -c 'for FILE in tests/data/*.fastq tests/data/*.fastq.gz tests/data/*.bam; do sequali --outdir reports $FILE; done'
sequali --outdir reports tests/data/LTB-A-BC001_S1_L003_R1_001.fastq.gz tests/data/LTB-A-BC001_S1_L003_R2_001.fastq.gz
# || exit 1 needed to force crashes when html5validator fails with 255
bash -c 'html5validator -- reports/*.html || exit 1'
Loading