Skip to content

Commit

Permalink
infercnv update
Browse files Browse the repository at this point in the history
  • Loading branch information
maud-p authored Jan 30, 2025
1 parent cae85f7 commit b8b2bb3
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 15 deletions.
4 changes: 2 additions & 2 deletions analyses/cell-type-wilms-tumor-06/scripts/06_infercnv.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ if (opts$HMM == "no") {
dir.create(output_dir, recursive = TRUE)

# retrieve the gene order file created in `06a_build-geneposition.R`
gene_order_file <- file.path(module_base, "results", "references", "gencode_v19_gene_pos.txt")
gene_arm_order_file <- file.path(module_base, "results","references", 'gencode_v29_gene_pos_arm.txt')

# Run infercnv ------------------------------------------------------------------
# create inferCNV object and run method
Expand All @@ -191,7 +191,7 @@ infercnv_obj <- infercnv::CreateInfercnvObject(
raw_counts_matrix = as.matrix(counts),
annotations_file = annot_df,
ref_group_names = normal_cells,
gene_order_file = gene_order_file,
gene_order_file = gene_arm_order_file,
# ensure all cells are included
min_max_counts_per_cell = c(-Inf, +Inf)
)
Expand Down
133 changes: 120 additions & 13 deletions analyses/cell-type-wilms-tumor-06/scripts/06a_build-geneposition.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,126 @@ module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06"

# load library
library(tidyverse)
library(dplyr)

# Define the gene order file
gene_order_file <- file.path(module_base, "results","references", 'gencode_v19_gene_pos.txt')
gene_order_file <- file.path(module_base, "results","references", 'gencode_v19_gene_pos.txt')

# Define the arm order file
arm_order_file <- file.path(module_base, "results","references", 'hg38_cytoBand.txt')

# Define the gene/arm combined information file
gene_arm_order_file <- file.path(module_base, "results","references", 'gencode_v29_gene_pos_arm.txt')

# Download the file if ot existing
if (!file.exists(gene_order_file)) {
download.file(url = 'https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt',
destfile = gene_order_file)
gene_order <- read_tsv(gene_order_file, col_names = FALSE)
tmp <- gsub("\\..*","",gene_order$X1)
tmp <- gsub(".*\\|","",tmp)
gene_order$X1 <- tmp
gene_order <- gene_order[grepl("ENSG", x = gene_order$X1),]

write_tsv(col_names = FALSE, gene_order, gene_order_file, append = FALSE)
}
# Add to the gene order file information on the chromosome arms
if (!file.exists(gene_arm_order_file)) {

# Download the the gene order file
if (!file.exists(gene_order_file)) {

download.file(url = 'https://data.broadinstitute.org/Trinity/CTAT/cnv/gencode_v19_gen_pos.complete.txt',
destfile = gene_order_file)
}
gene_order <- read_tsv(gene_order_file, col_names = FALSE)
tmp <- gsub("\\..*","",gene_order$X1)
tmp <- gsub(".*\\|","",tmp)
gene_order$X1 <- tmp
gene_order <- gene_order[grepl("ENSG", x = gene_order$X1),]

# Download the the arm order file
if (!file.exists(arm_order_file)) {
download.file(url = 'http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz',
destfile = arm_order_file)
}
# Load cytoBand file into R
cytoBand <- read.table(arm_order_file, header = FALSE, sep = "\t", stringsAsFactors = FALSE)

# Assign column names
colnames(cytoBand) <- c("chrom", "start", "end", "band", "stain")

# Add a column for the chromosome arm (p or q)
cytoBand <- cytoBand %>%
mutate(arm = substr(band, 1, 1)) # Extract 'p' or 'q' from the band column

# Group by chromosome and arm to calculate arm start and end positions
chromosome_arms <- cytoBand %>%
group_by(chrom, arm) %>%
summarize(
start = min(start),
end = max(end),
.groups = "drop"
)

# Filter out unwanted chromosomes (e.g., scaffolds, mitochondrial DNA)
chromosome_arms <- chromosome_arms %>%
filter(grepl("^chr[0-9XY]+$", chrom)) # Keep only standard chromosomes

gene_order$X5 <- "p"
for(i in 1:dim(gene_order)[1]){
start_q <- chromosome_arms[chromosome_arms$chrom %in% gene_order[i, 2], ]
if(dim(start_q)[1]>0){
start_q <- start_q[start_q$arm == "p", 4]
if(gene_order[i,3] > start_q){
gene_order[i,5] <- "q"
}
}else{
gene_order[i,5] <- "NA"
}
gene_order[i,2] <- paste0(gene_order[i,2], gene_order[i,5])

}
gene_order$X2 <- factor(gene_order$X2, levels = c("chr1p",
"chr1q",
"chr2p",
"chr2q",
"chr3p",
"chr3q",
"chr4p",
"chr4q",
"chr5p",
"chr5q",
"chr6p",
"chr6q",
"chr7p",
"chr7q",
"chr8p",
"chr8q",
"chr9p",
"chr9q",
"chr10p",
"chr10q",
"chr11p",
"chr11q",
"chr12p",
"chr12q",
"chr13p",
"chr13q",
"chr14p",
"chr14q",
"chr15p",
"chr15q",
"chr16p",
"chr16q",
"chr17p",
"chr17q",
"chr18p",
"chr18q",
"chr19p",
"chr19q",
"chr20p",
"chr20q",
"chr21p",
"chr21q",
"chr22p",
"chr22q",
"chrXp",
"chrXq",
"chrYp",
"chrYq",
"chrMNA"))
gene_order <- gene_order[order(gene_order$X2, gene_order$X3),]

write_tsv(col_names = FALSE, gene_order[,-5], gene_arm_order_file, append = FALSE)

}

0 comments on commit b8b2bb3

Please sign in to comment.