-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathatac-fastq-quality-control.Rmd
76 lines (59 loc) · 1.96 KB
/
atac-fastq-quality-control.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
---
title: "FASTQ Quality Control"
author: "Sam Buckberry"
date: "23/10/2021"
output: html_document
---
Load R libraries
```{r, message=FALSE}
library(stringr)
library(magrittr)
library(Rfastp)
library(DT)
library(ggplot2)
#library(cowplot)
```
### FASTQ quality control and filtering
Write a function to process all the samples.
```{r, cache=TRUE, message=FALSE}
r1_list <- list.files(path = "fastq/",
pattern = "chr22_R1.fastq.gz",
full.names = TRUE)
r2_list <- list.files(path = "fastq/",
pattern = "chr22_R2.fastq.gz",
full.names = TRUE)
fq_df <- data.frame(r1 = r1_list, r2 = r2_list,
prefix = str_replace(r1_list,
pattern = "_chr22_R1.fastq.gz",
replacement = "_filt_fastp"))
multi_fastp <- function(x){
fastp_json_report <- rfastp(read1 = fq_df$r1[x],
read2 = fq_df$r2[x],
outputFastq = fq_df$prefix[x],
adapterTrimming = TRUE,
thread = 2)
return(fastp_json_report)
}
report_list <- lapply(1:nrow(fq_df), multi_fastp)
```
Generate a table of statistics
```{r, cache=TRUE}
# Short function to extract summary data from fastp
get_report <- function(x){unlist(report_list[[x]]$summary)}
# Make a table of the report data.
# Some re-formatting of the numbers is required.
report_table <- lapply(1:length(report_list), get_report) %>%
do.call(cbind, .) %>%
format(scientific=FALSE,
digits = 2,
drop0trailing = TRUE)
report_table <- data.frame(report_table)
colnames(report_table) <- fq_df$prefix
datatable(data = report_table)
```
Make some plots of the statistics
```{r, cache=TRUE}
cp_list <- lapply(X = 1:length(report_list),
FUN = function(x){curvePlot(report_list[[x]]) + ggtitle(fq_df$prefix[x])})
cp_list
````