|
| 1 | +## ----setup, include=FALSE----------------------------------------------------- |
| 2 | +knitr::opts_chunk$set(echo = TRUE) |
| 3 | + |
| 4 | +knitr::opts_knit$set(root.dir = '.') |
| 5 | +library(CNVScope) |
| 6 | +options(scipen=999) |
| 7 | +library(magrittr) |
| 8 | + |
| 9 | +## ----aml_files,eval=F,echo=T-------------------------------------------------- |
| 10 | +# if(!dir.exists("extracted_aml_data")){dir.create("extracted_aml_data")} |
| 11 | +# untar("gdc_download_aml.tar.gz",exdir = "./extracted_aml_data") |
| 12 | +# target_files_aml<-list.files(path = "extracted_aml_data",pattern=glob2rx("*NormalVsPrimary.tsv"),recursive=T,full.names = T) |
| 13 | +# print(target_files_aml) |
| 14 | + |
| 15 | +## ----eval=F,echo=T------------------------------------------------------------ |
| 16 | +# sample_aggregated_segvals_aml<-formSampleMatrixFromRawGDCData(tcga_files = target_files_aml,format = "TARGET") |
| 17 | +# saveRDS(sample_aggregated_segvals_aml,"aml_sample_matched_input_matrix.rds") |
| 18 | + |
| 19 | +## ----aml_plots, eval=T,echo=T------------------------------------------------- |
| 20 | +sample_aggregated_segvals_aml<-readRDS("aml_sample_matched_input_matrix.rds") |
| 21 | +invariant_bins<-which((sample_aggregated_segvals_aml[stringr::str_detect(rownames(sample_aggregated_segvals_aml),"chr7"),] %>% t() %>% as.data.frame() %>% sapply(sd))==0) |
| 22 | +chr_7_mat<-sample_aggregated_segvals_aml[(stringr::str_detect(rownames(sample_aggregated_segvals_aml),"chr7") & rownames(sample_aggregated_segvals_aml) %in% setdiff(rownames(sample_aggregated_segvals_aml),names(invariant_bins))),] %>% t() |
| 23 | + |
| 24 | +## ----chr7_cor----------------------------------------------------------------- |
| 25 | +chr_7_mat %>% cor(use="pairwise.complete.obs",method="pearson") %>% |
| 26 | + CNVScope::signedRescale(max_cap=1) %>% |
| 27 | + reshape2::melt() %>% |
| 28 | + ggplot(aes(x=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start, |
| 29 | + y=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start, |
| 30 | + fill=value)) + geom_raster() + |
| 31 | + theme(axis.text.x = element_blank(),axis.text.y=element_blank(),axis.title = element_blank()) + |
| 32 | + ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) |
| 33 | + |
| 34 | +## ----breakpoints-------------------------------------------------------------- |
| 35 | +if(Sys.info()['sysname'] == "Linux" | |
| 36 | +Sys.info()['sysname'] == "Windows"){ |
| 37 | +colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"))] |
| 38 | +breakpoints<-colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"))] %>% stringr::str_split_fixed(string = .,pattern="_",n=3) %>% as.matrix() %>% .[,2] %>% as.numeric() |
| 39 | +breakpoint_labels <- colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"))] |
| 40 | +breakpoint_labels} else { |
| 41 | +colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"),algorithm = "jointSeg",nb_change_max = round(min(dim(chr_7_mat))/5))$breakpoints_col] |
| 42 | +breakpoints<-colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"),algorithm = "jointSeg",nb_change_max = round(min(dim(chr_7_mat))/5))$breakpoints_col] %>% stringr::str_split_fixed(string = .,pattern="_",n=3) %>% as.matrix() %>% .[,2] %>% as.numeric() |
| 43 | +breakpoint_labels <- colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"),algorithm = "jointSeg",nb_change_max = round(min(dim(chr_7_mat))/5))$breakpoints_col] |
| 44 | +breakpoint_labels |
| 45 | +} |
| 46 | + |
| 47 | +## ----breakpoint_plot---------------------------------------------------------- |
| 48 | +chr_7_mat %>% cor(use="pairwise.complete.obs",method="pearson") %>% |
| 49 | + CNVScope::signedRescale(max_cap=1) %>% |
| 50 | + reshape2::melt() %>% |
| 51 | + ggplot(aes(x=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start, |
| 52 | + y=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start, |
| 53 | + fill=value)) + geom_raster() + |
| 54 | + theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank(),axis.title = element_blank()) + |
| 55 | + scale_x_continuous(breaks=breakpoints,labels=breakpoint_labels) + |
| 56 | + ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) |
| 57 | + |
| 58 | + |
| 59 | +## ----probdist----------------------------------------------------------------- |
| 60 | +if(requireNamespace("smoothie",quietly=T)){ |
| 61 | +chr_7_probdist <- CNVScope::calcCNVKernelProbDist(cor(chr_7_mat,use="pairwise.complete.obs"),parallel=F)$percentile_matrix |
| 62 | +js_breakpoints<-jointseg::jointSeg(chr_7_probdist,K=20)$bestBkp |
| 63 | +js_breakpoint_labels<-colnames(chr_7_mat)[js_breakpoints] |
| 64 | +} else{ |
| 65 | + print("Please install smoothie in order to run this example.") |
| 66 | +} |
| 67 | + |
| 68 | +## ----plot_probdist------------------------------------------------------------ |
| 69 | +if(requireNamespace("smoothie",quietly=T)){ |
| 70 | +chr_7_probdist %>% |
| 71 | +# CNVScope::signedRescale(max_cap=1) %>% |
| 72 | + reshape2::melt() %>% |
| 73 | + ggplot(aes(x=Var1, |
| 74 | + y=Var2, |
| 75 | + fill=value)) + geom_tile() + |
| 76 | +# theme(axis.title = element_blank()) + #axis.text.x = element_blank(),axis.text.y=element_blank(), |
| 77 | + theme(axis.text.x = element_text(angle=90, hjust=1), |
| 78 | + axis.text.y = element_text(angle=0, hjust=1) |
| 79 | + ,axis.title = element_blank()) + |
| 80 | +# scale_x_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) + |
| 81 | +# scale_y_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) + |
| 82 | + scale_x_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) + |
| 83 | + scale_y_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) + |
| 84 | + |
| 85 | + ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) |
| 86 | +} else{ |
| 87 | + print("Please install smoothie in order to run this example.") |
| 88 | +} |
| 89 | + |
| 90 | + |
| 91 | +## ----census_data,eval=F------------------------------------------------------- |
| 92 | +# census_data <- readRDS(system.file("censushg19.rds",package = "CNVScope")) |
| 93 | +# census_data[census_data@seqnames %in% "chr7"] %>% sort() %>% tibble::as_tibble() %>% janitor::clean_names() %>% dplyr::select(seqnames,start,end,gene_symbol,tumour_types_somatic,tumour_types_germline) %>% dplyr::filter(start>60e6,stringr::str_detect(string = tumour_types_somatic,pattern="AML") | stringr::str_detect(string = tumour_types_germline,pattern="AML")) |
| 94 | + |
| 95 | +## ----blca_files,eval=F,echo=T------------------------------------------------- |
| 96 | +# if(!dir.exists("extracted_blca_data")){dir.create("extracted_blca_data") |
| 97 | +# untar("gdc_download_blca.tar.gz",exdir = "./extracted_blca_data")} |
| 98 | +# tcga_files_blca<-list.files(path = "extracted_blca_data",pattern=glob2rx("*.tsv"),recursive=T,full.names = T) |
| 99 | +# print(tcga_files_blca) |
| 100 | + |
| 101 | +## ----eval=F,echo=T------------------------------------------------------------ |
| 102 | +# sample_aggregated_segvals_blca<-formSampleMatrixFromRawGDCData(tcga_files = tcga_files_blca,format = "TCGA",parallel=T) |
| 103 | +# saveRDS(sample_aggregated_segvals_blca,"blca_sample_matched_input_matrix.rds") |
| 104 | + |
| 105 | +## ----blca_plots, eval=T,echo=T------------------------------------------------ |
| 106 | +sample_aggregated_segvals_blca<-readRDS("blca_sample_matched_input_matrix.rds") |
| 107 | +invariant_bins<-which((sample_aggregated_segvals_blca[stringr::str_detect(rownames(sample_aggregated_segvals_blca),"chr17"),] %>% t() %>% as.data.frame() %>% sapply(sd))==0) |
| 108 | +chr_17_mat<-sample_aggregated_segvals_blca[(stringr::str_detect(rownames(sample_aggregated_segvals_blca),"chr17") & rownames(sample_aggregated_segvals_blca) %in% setdiff(rownames(sample_aggregated_segvals_blca),names(invariant_bins))),] %>% t() |
| 109 | + |
| 110 | +## ----chr17_cor---------------------------------------------------------------- |
| 111 | +chr_17_mat %>% cor(use="pairwise.complete.obs",method="pearson") %>% |
| 112 | + CNVScope::signedRescale(max_cap=1) %>% |
| 113 | + reshape2::melt() %>% |
| 114 | + ggplot(aes(x=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start, |
| 115 | + y=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start, |
| 116 | + fill=value)) + geom_raster() + |
| 117 | + theme(axis.text.x = element_blank(),axis.text.y=element_blank(),axis.title = element_blank()) + |
| 118 | + ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) |
| 119 | + |
| 120 | +## ----probdist_chr17----------------------------------------------------------- |
| 121 | +if(requireNamespace("smoothie",quietly=T)){ |
| 122 | +chr_17_probdist <- CNVScope::calcCNVKernelProbDist(cor(chr_17_mat,use="pairwise.complete.obs"),parallel=F)$percentile_matrix |
| 123 | +colnames(chr_17_probdist)<-colnames(chr_17_mat) |
| 124 | +rownames(chr_17_probdist)<-colnames(chr_17_mat) |
| 125 | +chr_17_js_breakpoints<-jointseg::jointSeg(chr_17_probdist,K=40)$bestBkp |
| 126 | +chr_17_js_breakpoint_labels<-colnames(cor(chr_17_mat))[chr_17_js_breakpoints] |
| 127 | +chr_17_js_breakpoint_labels |
| 128 | +} else{ |
| 129 | + print("Please install smoothie in order to run this example.") |
| 130 | +} |
| 131 | + |
| 132 | + |
| 133 | +## ----breakpoint_plot_chr17,eval=F--------------------------------------------- |
| 134 | +# |
| 135 | +# breakpoint_plot_probdist <- chr_17_probdist %>% # cor(use="pairwise.complete.obs",method="pearson") %>% |
| 136 | +# CNVScope::signedRescale(max_cap=1) %>% |
| 137 | +# reshape2::melt() %>% |
| 138 | +# dplyr::mutate(col_pos=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start, |
| 139 | +# row_pos=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start, |
| 140 | +# rel_prob=value) %>% |
| 141 | +# ggplot(aes(x=col_pos, |
| 142 | +# y=row_pos, |
| 143 | +# fill=rel_prob)) + geom_raster() + |
| 144 | +# theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank()) + |
| 145 | +# scale_x_continuous(breaks=reshape2::colsplit(chr_17_js_breakpoint_labels,"_",c("chr","start","end"))$start,labels=chr_17_js_breakpoint_labels) + |
| 146 | +# ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) + |
| 147 | +# labs(x="col_pos",y="row_pos",value="Pearson Correlation:") + ggtitle("Chromosome 17 relationship probability") + |
| 148 | +# geom_contour(binwidth = .395, aes(z = value)) |
| 149 | +# breakpoint_plot <- chr_17_mat %>% cor(use="pairwise.complete.obs",method="pearson") %>% |
| 150 | +# CNVScope::signedRescale(max_cap=1) %>% |
| 151 | +# reshape2::melt() %>% |
| 152 | +# dplyr::mutate(col_pos=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start, |
| 153 | +# row_pos=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start, |
| 154 | +# correlation=value) %>% |
| 155 | +# ggplot(aes(x=col_pos, |
| 156 | +# y=row_pos, |
| 157 | +# fill=correlation)) + geom_raster() + |
| 158 | +# theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank()) + |
| 159 | +# scale_x_continuous(breaks=reshape2::colsplit(chr_17_js_breakpoint_labels,"_",c("chr","start","end"))$start,labels=chr_17_js_breakpoint_labels) + |
| 160 | +# ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) + |
| 161 | +# labs(x="col_pos",y="row_pos",value="Pearson Correlation:") + ggtitle("Chromosome 17 linear relationship domains") + |
| 162 | +# geom_contour(binwidth = .395, aes(z = value)) |
| 163 | +# breakpoint_plot_corr_diff <- ((chr_17_mat %>% cor(use="pairwise.complete.obs",method="spearman"))-(chr_17_mat %>% cor(use="pairwise.complete.obs",method="pearson"))) %>% |
| 164 | +# CNVScope::signedRescale(max_cap=1) %>% |
| 165 | +# reshape2::melt() %>% |
| 166 | +# dplyr::mutate(col_pos=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start, |
| 167 | +# row_pos=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start, |
| 168 | +# corr_diff=value) %>% |
| 169 | +# ggplot(aes(x=col_pos, |
| 170 | +# y=row_pos, |
| 171 | +# fill=corr_diff)) + geom_raster() + |
| 172 | +# theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank()) + |
| 173 | +# scale_x_continuous(breaks=reshape2::colsplit(chr_17_js_breakpoint_labels,"_",c("chr","start","end"))$start,labels=chr_17_js_breakpoint_labels) + |
| 174 | +# ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) + |
| 175 | +# labs(x="col_pos",y="row_pos",value="Pearson Correlation:") + ggtitle("Chromosome 17 nonlinear (red) relationship regions, inferred by nonlinear-linear correlation difference") + |
| 176 | +# geom_contour(binwidth = .395, aes(z = value)) |
| 177 | +# |
| 178 | +# breakpoint_plot |
| 179 | +# breakpoint_plot_probdist |
| 180 | +# breakpoint_plot_corr_diff |
| 181 | +# |
| 182 | + |
| 183 | +## ----plotly_blca,eval=F------------------------------------------------------- |
| 184 | +# library(plotly) |
| 185 | +# breakpoint_plot %>% plotly::ggplotly() |
| 186 | + |
| 187 | +## ----3D_blca,eval=F----------------------------------------------------------- |
| 188 | +# chr_17_long <- chr_17_mat %>% cor(use="pairwise.complete.obs",method="pearson") %>% |
| 189 | +# CNVScope::signedRescale(max_cap=1) %>% |
| 190 | +# reshape2::melt() %>% |
| 191 | +# dplyr::mutate(col_pos=as.numeric(reshape2::colsplit(Var1,"_",c("chr","start","end"))$start), |
| 192 | +# row_pos=as.numeric(reshape2::colsplit(Var2,"_",c("chr","start","end"))$start), |
| 193 | +# correlation=value) %>% dplyr::select(col_pos,row_pos,correlation) |
| 194 | +# plot_ly(data = chr_17_long, x=chr_17_long$col_pos,y=chr_17_long$row_pos,z=chr_17_long$correlation,color=c(0,0.5,1),colors=circlize::colorRamp(c("blue","white","red")),intensity=chr_17_long$correlation,type = "mesh3d") |
| 195 | + |
| 196 | +## ----skcm_files,eval=F,echo=T------------------------------------------------- |
| 197 | +# if(!dir.exists("extracted_skcm_data")){dir.create("extracted_skcm_data")} |
| 198 | +# untar("gdc_download_skcm.tar.gz",exdir = "./extracted_skcm_data") |
| 199 | +# tcga_files_skcm<-list.files(path = "extracted_skcm_data",pattern=glob2rx("*.tsv"),recursive=T,full.names = T) |
| 200 | +# print(tcga_files_skcm) |
| 201 | + |
| 202 | +## ----eval=F,echo=T------------------------------------------------------------ |
| 203 | +# #ptm <- proc.time() |
| 204 | +# #doMC::registerDoMC() |
| 205 | +# #doParallel::registerDoParallel() |
| 206 | +# sample_aggregated_segvals_skcm<-formSampleMatrixFromRawGDCData(tcga_files = tcga_files_skcm,format = "TCGA",parallel = T) |
| 207 | +# #proc.time() - ptm |
| 208 | +# saveRDS(sample_aggregated_segvals_skcm,"skcm_sample_matched_input_matrix.rds") |
| 209 | + |
| 210 | +## ----eval=F,echo=T------------------------------------------------------------ |
| 211 | +# if(!dir.exists("extracted_prad_data")){dir.create("extracted_prad_data") |
| 212 | +# untar("gdc_download_prad.tar.gz",exdir = "extracted_prad_data")} |
| 213 | +# tcga_files_prad<-list.files(path = "extracted_prad_data",pattern=glob2rx("*.tsv"),recursive=T,full.names = T) |
| 214 | +# print(tcga_files_prad) |
| 215 | +# |
| 216 | + |
| 217 | +## ----eval=F,echo=T------------------------------------------------------------ |
| 218 | +# sample_aggregated_segvals_output_full_prad<-formSampleMatrixFromRawGDCData(tcga_files = tcga_files_prad,format = "TCGA",binsize=1e6) |
| 219 | +# saveRDS(sample_aggregated_segvals_output_full_prad,"PRAD_sample_matched_input_matrix.rds") |
| 220 | +# |
| 221 | + |
0 commit comments