@@ -6,109 +6,133 @@ load_metrics <- function(se = se_object, multiqc = multiqc_data_dir,
66 gtf = gtf_fn ,
77 counts = counts ,
88 single_end = FALSE ) {
9- # bcbio input
10- if (! is.na(se_object )) {
11- se <- readRDS(se_object )
12- metrics <- metadata(se )$ metrics %> % as.data.frame()
13- # left_join(coldata %>% rownames_to_column('sample')) %>% column_to_rownames('sample')
14- } else { # nf-core input
9+ # Get metrics from nf-core into bcbio like table
10+ # many metrics are already in the General Table of MultiQC, this reads the file
11+ metrics <- read_tsv(file.path(multiqc_data_dir , " multiqc_general_stats.txt" ))
1512
16- # Get metrics from nf-core into bcbio like table
17- # many metrics are already in the Genereal Table of MultiQC, this reads the file
18- metrics <- read_tsv(file.path(multiqc_data_dir , " multiqc_general_stats.txt" ))
13+ # we use the names in the multiqc general stats file to determine which version of the pipeline was used.
14+ # this affects other metrics processing throughout this function.
15+ if (any(grepl(" mqc-generalstats" , names(metrics )))) {
16+ version <- " 3.14"
17+ } else {
18+ version <- " 3.18"
19+ }
1920
20- # we get some more metrics from Qualimap and rename columns
21+ # we get some more metrics from Qualimap and rename columns
22+ if (version == 3.14 ) {
2123 metrics_qualimap <- read_tsv(file.path(multiqc_data_dir , " mqc_qualimap_genomic_origin_1.txt" ))
22- metrics <- metrics %> % full_join(metrics_qualimap )
24+ } else {
25+ metrics_qualimap <- read_tsv(file.path(multiqc_data_dir , " qualimap_genomic_origin.txt" ))
26+ }
27+
28+ metrics <- metrics %> % full_join(metrics_qualimap )
29+ metrics <- metrics %> %
30+ clean_names()
31+
32+ if (version == " 3.14" ) {
33+ metrics <- metrics %> % dplyr :: rename_with(~ gsub(" .*mqc_generalstats_" , " " , . ))
34+ }
35+
36+ # This uses the fastqc metrics to get total reads
37+ total_reads <- metrics %> %
38+ dplyr :: filter(! is.na(fastqc_raw_total_sequences )) %> %
39+ remove_empty(which = " cols" ) %> %
40+ dplyr :: rename(single_sample = sample ) %> %
41+ mutate(sample = gsub(" _[12]+$" , " " , single_sample )) %> %
42+ group_by(sample ) %> %
43+ summarize(total_reads = sum(fastqc_raw_total_sequences ))
44+
45+ # This renames to user-friendly names the metrics columns
46+ if (single_end ) {
2347 metrics <- metrics %> %
24- clean_names() %> %
25- dplyr :: rename_with(~ gsub(" .*mqc_generalstats_" , " " , . ))
26-
27- # This uses the fastqc metrics to get total reads
28- total_reads <- metrics %> %
29- dplyr :: filter(! is.na(fastqc_raw_total_sequences )) %> %
30- remove_empty(which = " cols" ) %> %
31- dplyr :: rename(single_sample = sample ) %> %
32- mutate(sample = gsub(" _[12]+$" , " " , single_sample )) %> %
33- group_by(sample ) %> %
34- summarize(total_reads = sum(fastqc_raw_total_sequences ))
35-
36- # This renames to user-friendly names the metrics columns
37- if (single_end ) {
38- metrics <- metrics %> %
39- dplyr :: filter(! is.na(fastqc_raw_total_sequences ))
40- } else {
41- metrics <- metrics %> %
42- dplyr :: filter(is.na(fastqc_raw_total_sequences ))
43- }
48+ dplyr :: filter(! is.na(fastqc_raw_total_sequences ))
49+ } else {
4450 metrics <- metrics %> %
45- remove_empty(which = " cols" ) %> %
46- full_join(total_reads ) %> %
47- mutate(mapped_reads = samtools_reads_mapped ) %> %
48- rowwise() %> %
49- mutate(exonic_rate = exonic / (exonic + intronic + intergenic )) %> %
50- mutate(intronic_rate = intronic / (exonic + intronic + intergenic )) %> %
51- mutate(intergenic_rate = intergenic / (exonic + intronic + intergenic )) %> %
52- mutate(x5_3_bias = qualimap_5_3_bias )
53-
54- # Sometimes we don't have rRNA due to mismatch annotation, We skip this if is the case
55- gtf <- NULL
56- biotype <- NULL
57-
58- if (genome == " other" ) {
59- gtf <- gtf_fn
60- } else {
61- if (genome == " hg38" ) {
62- gtf <- " hg38.rna.gtf.gz"
63- } else if (genome == " mm10" ) {
64- gtf <- " mm10.rna.gtf.gz"
65- } else if (genome == " mm39" ) {
66- gtf <- " mm39.rna.gtf.gz"
67- }
68- gtf <- file.path(" https://github.com/bcbio/bcbioR/raw/refs/heads/main/inst/extdata/annotation" , gtf )
69- }
70- if (is.null(gtf )) {
71- warning(" No genome provided! Please add it at the top of this Rmd" )
72- } else {
73- gtf <- rtracklayer :: import(gtf )
74-
75- one <- grep(" gene_type" , colnames(as.data.frame(gtf )), value = TRUE )
76- another <- grep(" gene_biotype" , colnames(as.data.frame(gtf )), value = TRUE )
77- if (length(one ) == 1 ) {
78- biotype <- one
79- } else if (length(another ) == 1 ) {
80- biotype <- another
81- } else {
82- warning(" No gene biotype founded" )
83- }
51+ dplyr :: filter(is.na(fastqc_raw_total_sequences ))
52+ }
53+
54+ metrics <- metrics %> %
55+ remove_empty(which = " cols" ) %> %
56+ full_join(total_reads )
57+
58+ if (version == " 3.14" ) {
59+ metrics <- metrics %> % mutate(mapped_reads = samtools_reads_mapped )
60+ } else {
61+ metrics <- metrics %> % mutate(mapped_reads = samtools_stats_reads_mapped )
62+ }
63+
64+ metrics <- metrics %> %
65+ rowwise() %> %
66+ mutate(exonic_rate = exonic / (exonic + intronic + intergenic )) %> %
67+ mutate(intronic_rate = intronic / (exonic + intronic + intergenic )) %> %
68+ mutate(intergenic_rate = intergenic / (exonic + intronic + intergenic ))
69+
70+ if (version == " 3.14" ) {
71+ metrics <- metrics %> % mutate(x5_3_bias = qualimap_5_3_bias )
72+ } else {
73+ metrics <- metrics %> % mutate(x5_3_bias = qualimap_rnaseq_5_3_bias )
74+ }
75+
76+ # Sometimes we don't have rRNA due to mismatch annotation, We skip this if is the case
77+ gtf <- NULL
78+ biotype <- NULL
79+
80+ if (genome == " other" ) {
81+ gtf <- gtf_fn
82+ } else {
83+ if (genome == " hg38" ) {
84+ gtf <- " hg38.rna.gtf.gz"
85+ } else if (genome == " mm10" ) {
86+ gtf <- " mm10.rna.gtf.gz"
87+ } else if (genome == " mm39" ) {
88+ gtf <- " mm39.rna.gtf.gz"
8489 }
90+ gtf <- file.path(" https://github.com/bcbio/bcbioR/raw/refs/heads/main/inst/extdata/annotation" , gtf )
91+ }
92+ if (is.null(gtf )) {
93+ warning(" No genome provided! Please add it at the top of this Rmd" )
94+ } else {
95+ gtf <- rtracklayer :: import(gtf )
8596
86- metrics $ sample <- make.names(metrics $ sample )
87- if (! is.null(biotype )) {
88- annotation <- as.data.frame(gtf ) %> % . [, c(" gene_id" , biotype )]
89- annotation $ gene_id <- stringr :: str_remove(annotation $ gene_id , " \\ ..*$" ) # remove .1 from end of gene
90- rRNA <- grepl(" rRNA|tRNA" , annotation [[biotype ]])
91- genes <- intersect(annotation [rRNA , " gene_id" ], row.names(counts ))
92- ratio <- data.frame (
93- sample = colnames(counts ),
94- r_and_t_rna_rate = colSums(counts [genes , ]) / colSums(counts )
95- )
97+ one <- grep(" gene_type" , colnames(as.data.frame(gtf )), value = TRUE )
98+ another <- grep(" gene_biotype" , colnames(as.data.frame(gtf )), value = TRUE )
99+ if (length(one ) == 1 ) {
100+ biotype <- one
101+ } else if (length(another ) == 1 ) {
102+ biotype <- another
103+ metrics $ sample <- make.names(metrics $ sample )
96104 metrics <- left_join(metrics , ratio , by = " sample" )
97105 } else {
98- metrics [[ " r_and_t_rna_rate " ]] <- NA
106+ warning( " No gene biotype founded " )
99107 }
108+ }
100109
101- # if ("custom_content_biotype_counts_percent_r_rna" %in% colnames(metrics)){
102- # metrics <- mutate(metrics, r_rna_rate = custom_content_biotype_counts_percent_r_rna)
103- # }else{
104- # metrics[["r_rna_rate"]] <- NA
105- # }
106- metrics <- metrics [, c(
107- " sample" , " mapped_reads" , " exonic_rate" , " intronic_rate" ,
108- " total_reads" ,
109- " x5_3_bias" , " r_and_t_rna_rate" , " intergenic_rate"
110- )]
110+ metrics $ sample <- make.names(metrics $ sample )
111+ if (! is.null(biotype )) {
112+ annotation <- as.data.frame(gtf ) %> % . [, c(" gene_id" , biotype )]
113+ annotation $ gene_id <- stringr :: str_remove(annotation $ gene_id , " \\ ..*$" ) # remove .1 from end of gene
114+ rRNA <- grepl(" rRNA|tRNA" , annotation [[biotype ]])
115+ genes <- intersect(annotation [rRNA , " gene_id" ], row.names(counts ))
116+ ratio <- data.frame (
117+ sample = colnames(counts ),
118+ r_and_t_rna_rate = colSums(counts [genes , ]) / colSums(counts )
119+ )
120+ metrics <- left_join(metrics , ratio , by = " sample" )
121+ } else {
122+ metrics [[" r_and_t_rna_rate" ]] <- NA
111123 }
124+
125+ # if ("custom_content_biotype_counts_percent_r_rna" %in% colnames(metrics)){
126+ # metrics <- mutate(metrics, r_rna_rate = custom_content_biotype_counts_percent_r_rna)
127+ # }else{
128+ # metrics[["r_rna_rate"]] <- NA
129+ # }
130+ metrics <- metrics [, c(
131+ " sample" , " mapped_reads" , " exonic_rate" , " intronic_rate" ,
132+ " total_reads" ,
133+ " x5_3_bias" , " r_and_t_rna_rate" , " intergenic_rate"
134+ )]
135+
112136 rownames(metrics ) <- metrics $ sample
113137 return (metrics )
114138}
0 commit comments