-
Notifications
You must be signed in to change notification settings - Fork 4
Examples
rtahmasbi edited this page Mar 8, 2016
·
4 revisions
In this example, you will create genotypes and the required parameters in R.
##############################################################################
# create genotypes in hap, legend, indv format
# creating CVs
##############################################################################
NCHR <- 3
NSNP <- rep(1000,NCHR) #number SNPs per chromosome
NCV <- rep(100,NCHR) #number CVs per chromosome
NIND <- 1000
VAR.A <- 1
VAR.D <- .1
INCLUDE.CHRS <- 1:NCHR
# We create this map in order to use the real genomic map distance
map.pos <- c(249238555, 242900000, 197900000, 199150000, 180750000, 170955878, 159167276, 146364984, 141088894, 135526070, 135002856, 133841619, 115148564, 105826742, 102484095, 90201263, 81100000, 78062535, 59160912, 6.3e+07, 48157860, 51246300)
NCHR <- length(NSNP)
cv.info <- c()
for (ichr in 1:NCHR)
{
print("---------------------------------------")
nsnp_chr <- NSNP[ichr]
p <- runif(nsnp_chr,min=.05,max=.95)
hap <- matrix(0, nrow=NIND, ncol=2*nsnp_chr)
for (isnp in 1:nsnp_chr)
{
hap[,2*isnp-1] <- rbinom(NIND,1,p[isnp])
hap[,2*isnp] <- rbinom(NIND,1,p[isnp])
}
# creating ref.hap file
write.table(hap, file=paste("ref.chr",ichr,".hap",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
# creating ref.legend file
legend.id <- paste("rs",1:nsnp_chr,sep='')
legend.pos <- sort(sample(map.pos[ichr], nsnp_chr, replace = FALSE))
legend.al0 <- rep("C", nsnp_chr)
legend.al1 <- rep("T", nsnp_chr)
write.table(cbind(legend.id,legend.pos,legend.al0,legend.al1), file=paste("ref.chr",ichr,".legend",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
# creating ref.indv file
write.table(1:NIND, file=paste("ref.chr",ichr,".indv",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
####
# creating cv.hap file
ncv_chr <- NCV[ichr]
cv_chr <- sort(sample(1:nsnp_chr, ncv_chr, replace = FALSE))
cv_hap_index <- sort(c(2*cv_chr-1,2*cv_chr))
cvs <- hap[,cv_hap_index]
write.table(cvs, file=paste("cv.chr",ichr,".hap",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
cv.pos <- legend.pos[cv_chr]
cv.maf <- apply(matrix(c(cvs),nrow=2*NIND),2,mean)
cv.maf[which(cv.maf>.5)] <- 1-cv.maf[which(cv.maf>.5)]
cv.a <- rnorm(ncv_chr,mean=0,sd=sqrt(VAR.A))
cv.d <- rnorm(ncv_chr,mean=0,sd=sqrt(VAR.D))
cv.info.chr <- cbind(ichr,cv.pos,cv.maf,cv.a,cv.d)
cv.info <- rbind(cv.info,cv.info.chr)
}
colnames(cv.info) <- c("chr","pos","maf","a","d")
write.table(cv.info, file=paste("cv.info",sep=''), quote=FALSE,row.names=FALSE, col.names=TRUE)
########### --file_cvs
b <- paste("cv.chr",INCLUDE.CHRS,".hap",sep='')
b <- cbind(INCLUDE.CHRS,b)
write.table(b,file=paste("par.pop1.cv_hap_files.txt",sep=''),quote=FALSE,row.names=FALSE,col.names=FALSE)
########### --file_hap_name
a1 <- paste("ref.chr",INCLUDE.CHRS,".hap",sep="")
a2 <- paste("ref.chr",INCLUDE.CHRS,".legend",sep="")
a3 <- paste("ref.chr",INCLUDE.CHRS,".indv",sep="")
a <- cbind(INCLUDE.CHRS,a1,a2,a3)
write.table(a,file=paste("par.pop1.hap_sample_address.txt",sep=''),quote=FALSE,row.names=FALSE,col.names=c("chr","hap","legend","sample"))
# creating population info
##############################################################################
#DEFINE WILDCARDS
#Must run this section. These need to be changed as you see fit
##############################################################################
NUM.GENERATIONS <- 20 #number of generations
POP.SIZE <- rep(3000,NUM.GENERATIONS) #population size over time
PHENO.MATE.COR <- rep(.5,NUM.GENERATIONS) #phenotypic correlation between mates at each generation
OFFSPRING_DIST <- rep("p",NUM.GENERATIONS) # p or P=Poisson distribution, f or F=fixed distribution
OFFSPRING_DIST[NUM.GENERATIONS] <- "f" # last generation is fixed
SELECTION.FUNCTION <- rep("logit",NUM.GENERATIONS) #Selection function for each generation
SELECTION.FUNCTION.PAR1 <- rep(20,NUM.GENERATIONS) #Selection function for each generation
SELECTION.FUNCTION.PAR2 <- rep(0,NUM.GENERATIONS) #Selection function for each generation
#How fine grained should the recombination map be? This has an important effect on speed & memory. The lower this number (in kb) the more RAM and longer the program takes. Numbers < 10 are too fine-grained to make any difference. Typical choices are between 10-50.
##############################################################################
#END WILDCARDS
##############################################################################
###########popinfo.txt
write.table(cbind(POP.SIZE,PHENO.MATE.COR,OFFSPRING_DIST,SELECTION.FUNCTION,SELECTION.FUNCTION.PAR1,SELECTION.FUNCTION.PAR2), file=paste("par.pop1.info.txt",sep=''),quote=FALSE,row.names=FALSE, col.names=c("pop_size","mat_cor","offspring_dist","selection_func","selection_func_par1","selection_func_par2"))
Then run the following command
#Run this command (change "path" to appropriate directory)
# you also need to download and unzip Recom_Map.zip
/path/GeneEvolve --file_gen_info par.pop1.info.txt --file_hap_name par.pop1.hap_sample_address.txt --file_recom_map Recom.Map.b37.50KbDiff --file_cv_info cv.info --file_cvs par.pop1.cv_hap_files.txt --va 1 --ve 1 --no_output --prefix out1
In the following example, you will simulate a population of size 3000 and evolve it for 10 generations. The correlation between mates is 0.
/path/GeneEvolve \
--file_gen_info par.pop1.info.txt \
--file_hap_name par.pop1.hap_sample_address.txt \
--file_recom_map Recom.Map.b37.50KbDiff \
--file_cv_info par.pop1.cv_info.txt \
--file_cvs par.pop1.cv_hap_files.txt \
--no_output \
--prefix out2
The file par.pop1.info.txt
is:
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
This example is as same as the above example, but here we rescale the --va
, --vd
and --ve
.
/path/GeneEvolve \
--file_gen_info par.pop1.info.txt \
--file_hap_name par.pop1.hap_sample_address.txt \
--file_recom_map Recom.Map.b37.50KbDiff \
--file_cv_info par.pop1.cv_info.txt \
--file_cvs par.pop1.cv_hap_files.txt \
--va 1 \
--vd .1 \
--ve 1 \
--no_output \
--prefix out3