Skip to content

Examples

rtahmasbi edited this page Mar 8, 2016 · 4 revisions

Example 1

In this example, you will create genotypes and the required parameters in R.

##############################################################################
# create genotypes in hap, legend, indv format
# creating CVs
##############################################################################
NCHR <- 3
NSNP <- rep(1000,NCHR) #number SNPs per chromosome
NCV <- rep(100,NCHR) #number CVs per chromosome
NIND <- 1000
VAR.A <- 1
VAR.D <- .1
INCLUDE.CHRS <- 1:NCHR

# We create this map in order to use the real genomic map distance
map.pos <- c(249238555, 242900000, 197900000, 199150000, 180750000, 170955878, 159167276, 146364984, 141088894, 135526070, 135002856, 133841619, 115148564, 105826742,  102484095, 90201263, 81100000, 78062535, 59160912, 6.3e+07, 48157860, 51246300)



NCHR <- length(NSNP)
cv.info <- c()

for (ichr in 1:NCHR)
{
        print("---------------------------------------")
        nsnp_chr <- NSNP[ichr]
        p <- runif(nsnp_chr,min=.05,max=.95)
        hap <- matrix(0, nrow=NIND, ncol=2*nsnp_chr)
        for (isnp in 1:nsnp_chr)
        {
            hap[,2*isnp-1] <- rbinom(NIND,1,p[isnp])
            hap[,2*isnp] <- rbinom(NIND,1,p[isnp])
        }
    # creating ref.hap file
    write.table(hap, file=paste("ref.chr",ichr,".hap",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
    # creating ref.legend file
    legend.id <- paste("rs",1:nsnp_chr,sep='')
    legend.pos <- sort(sample(map.pos[ichr], nsnp_chr, replace = FALSE))
    legend.al0 <- rep("C", nsnp_chr)
    legend.al1 <- rep("T", nsnp_chr)
    write.table(cbind(legend.id,legend.pos,legend.al0,legend.al1), file=paste("ref.chr",ichr,".legend",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
    # creating ref.indv file
    write.table(1:NIND, file=paste("ref.chr",ichr,".indv",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
    ####
    # creating cv.hap file
    ncv_chr <- NCV[ichr]
    cv_chr <- sort(sample(1:nsnp_chr, ncv_chr, replace = FALSE))
    cv_hap_index <- sort(c(2*cv_chr-1,2*cv_chr))
    cvs <- hap[,cv_hap_index]
    write.table(cvs, file=paste("cv.chr",ichr,".hap",sep=''), quote=FALSE,row.names=FALSE, col.names=FALSE)
    cv.pos <- legend.pos[cv_chr]
    cv.maf <- apply(matrix(c(cvs),nrow=2*NIND),2,mean)
    cv.maf[which(cv.maf>.5)] <- 1-cv.maf[which(cv.maf>.5)]
    cv.a <- rnorm(ncv_chr,mean=0,sd=sqrt(VAR.A))
    cv.d <- rnorm(ncv_chr,mean=0,sd=sqrt(VAR.D))
    cv.info.chr <- cbind(ichr,cv.pos,cv.maf,cv.a,cv.d)
    cv.info <- rbind(cv.info,cv.info.chr)
}

colnames(cv.info) <- c("chr","pos","maf","a","d")
write.table(cv.info, file=paste("cv.info",sep=''), quote=FALSE,row.names=FALSE, col.names=TRUE)


########### --file_cvs
b <- paste("cv.chr",INCLUDE.CHRS,".hap",sep='')
b <- cbind(INCLUDE.CHRS,b)
write.table(b,file=paste("par.pop1.cv_hap_files.txt",sep=''),quote=FALSE,row.names=FALSE,col.names=FALSE)


########### --file_hap_name
a1 <- paste("ref.chr",INCLUDE.CHRS,".hap",sep="")
a2 <- paste("ref.chr",INCLUDE.CHRS,".legend",sep="")
a3 <- paste("ref.chr",INCLUDE.CHRS,".indv",sep="")
a <- cbind(INCLUDE.CHRS,a1,a2,a3)
write.table(a,file=paste("par.pop1.hap_sample_address.txt",sep=''),quote=FALSE,row.names=FALSE,col.names=c("chr","hap","legend","sample"))



# creating population info

##############################################################################
#DEFINE WILDCARDS
#Must run this section. These need to be changed as you see fit
##############################################################################

NUM.GENERATIONS <- 20    #number of generations
POP.SIZE <- rep(3000,NUM.GENERATIONS)  #population size over time
PHENO.MATE.COR <- rep(.5,NUM.GENERATIONS)   #phenotypic correlation between mates at each generation
OFFSPRING_DIST <- rep("p",NUM.GENERATIONS) # p or P=Poisson distribution, f or F=fixed distribution
OFFSPRING_DIST[NUM.GENERATIONS] <- "f" # last generation is fixed
SELECTION.FUNCTION <- rep("logit",NUM.GENERATIONS) #Selection function for each generation
SELECTION.FUNCTION.PAR1 <- rep(20,NUM.GENERATIONS) #Selection function for each generation
SELECTION.FUNCTION.PAR2 <- rep(0,NUM.GENERATIONS) #Selection function for each generation

#How fine grained should the recombination map be? This has an important effect on speed & memory. The lower this number (in kb) the more RAM and longer the program takes. Numbers < 10 are too fine-grained to make any difference. Typical choices are between 10-50.



##############################################################################
#END WILDCARDS
##############################################################################




###########popinfo.txt
write.table(cbind(POP.SIZE,PHENO.MATE.COR,OFFSPRING_DIST,SELECTION.FUNCTION,SELECTION.FUNCTION.PAR1,SELECTION.FUNCTION.PAR2), file=paste("par.pop1.info.txt",sep=''),quote=FALSE,row.names=FALSE, col.names=c("pop_size","mat_cor","offspring_dist","selection_func","selection_func_par1","selection_func_par2"))

Then run the following command

#Run this command (change "path" to appropriate directory)
# you also need to download and unzip Recom_Map.zip

/path/GeneEvolve --file_gen_info par.pop1.info.txt --file_hap_name par.pop1.hap_sample_address.txt --file_recom_map Recom.Map.b37.50KbDiff --file_cv_info cv.info --file_cvs par.pop1.cv_hap_files.txt --va 1 --ve 1 --no_output --prefix out1

Example 2

In the following example, you will simulate a population of size 3000 and evolve it for 10 generations. The correlation between mates is 0.

/path/GeneEvolve \
--file_gen_info par.pop1.info.txt \
--file_hap_name par.pop1.hap_sample_address.txt \
--file_recom_map Recom.Map.b37.50KbDiff \
--file_cv_info par.pop1.cv_info.txt \
--file_cvs par.pop1.cv_hap_files.txt \
--no_output \
--prefix out2

The file par.pop1.info.txt is:

3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0
3000 0 p logit 20 0

Example 3

This example is as same as the above example, but here we rescale the --va, --vd and --ve.

/path/GeneEvolve \
--file_gen_info par.pop1.info.txt \
--file_hap_name par.pop1.hap_sample_address.txt \
--file_recom_map Recom.Map.b37.50KbDiff \
--file_cv_info par.pop1.cv_info.txt \
--file_cvs par.pop1.cv_hap_files.txt \
--va 1 \
--vd .1 \
--ve 1 \
--no_output \
--prefix out3
Clone this wiki locally