#' Simulate summary statistics using msABC
#' @description This function simulates summary statistics using msABC.
#' It is optimized for nextgen data, but it should also work for sanger.
#' This will simulate the entire loci, not a single SNP.
#' You should include all loci in the model object, including invariable ones. Use the get.data.structure function to set up the parameters of the loci - bp and number of individuals - to be simulated.
#' @param model A model object bult by the main.menu function. Both genomic and sanger-type models are allowed.
#' @param use.alpha Logical. If TRUE the most recent population size change will be exponential. If FALSE sudden demographic changes. Default is FALSE.
#' This argument changes ONLY the MOST RECENT demographich change.
#' @param nsim.blocks Number of blocks to simulate. The total number of simulations is: nsim.blocks x sim.block.size.
#' @param sim.block.size Simulations are performed in blocks. This argument defines the size of the block in number of simulations, i.e. how many simulations to run per block.
#' A block of 1000 will work for most cases. Increse the total number of simulations with nsim.block argument.
#' @param path Path to write the output. By default outputs will be saved in the working directory.
#' @param output.name String. The prefix of the output names. Defalt is "model"
#' @param append.sims Logical. If TRUE simulations will be appended in the last output. Default is FALSE.
#' @param msABC.call String. Path to the msABC executable. msABC binaries for Mac and Linux are included in the package and should work cases for these operating systems.
#' There is no need to change this unless you want to compile the program yourself and point the function to it.
#' @param mu.rates List. Distribution to sample the mutation rates. The first element of the list should be the name of the distribution as a character string. All distributions available in r-base and r-package e1071 are allowed.
#' The second element of the list must be the number of loci. The following elements are the parameters of the distribution to be passed on to the r-distribution function.
#' Ex.: mu.rates = list("rtnorm", 1000, 1e-9, 1e-9, 0). i.e sample 1000 values using the rtnorm function with mean 1e-9 and SD 1e-9, with lower tail limit at zero.
#' @param rec.rates List. Distribution to sample the recombination rates. The first element of the list should be the name of the distribution as a character string. All distributions available in r-base and r-package e1071 are allowed.
#' The second element of the list must be the number of loci. The following elements are the parameters of the distribution to be passed on to the r-distribution function.
#' Ex.: rec.rates = list("rtnorm", 1000, 1e-9, 1e-9, 0). i.e sample 1000 values using the rtnorm function with mean 1e-9 and SD 1e-9, with lower tail limit at zero.
#' @return Writes simulations and parameters to the path directory.
#' @references Hudson R.R. (2002) Generating samples under a Wright-Fisher neutral model of genetic variation. Bioinformatics, 18, 337–338.
#' @references Pavlidis P., Laurent S., & Stephan W. (2010) msABC: A modification of Hudson’s ms to facilitate multi-locus ABC analysis. Molecular Ecology Resources, 10, 723–727.
#' @author Marcelo Gehara
#' @export
sim.msABC.sumstat<-function(model, nsim.blocks, path=getwd(), use.alpha=F, mu.rates=NULL, rec.rates = NULL,
append.sims=F,block.size=10, msABC.call = get.msABC(),output.name,ncores){
WD<-getwd()
if(class(model)!="Model"){
stop("First arguments should be an object of class Model generated by the main.menu() function.")
}
if(model$I[1,1]=="genomic"){
stop("You need to get the data structure first. See function get.data.structure")
}
# set working directory
setwd(path)
locfile <- PipeMaster:::get.locfile(model)
msABC.path <- find.package("PipeMaster")
if(append.sims==F){
com <- PipeMaster:::msABC.commander(model,use.alpha=use.alpha,arg=1)
write.table(locfile,paste(".",1,"locfile.txt",sep=""),row.names = F,col.names = T,quote = F,sep=" ")
options(warn=-1)
x <- strsplit(system2(msABC.call, args=com[[1]], stdout = T,stderr=T,wait=T),"\t")
options(warn=0)
nam<-x[1][[1]]
#TD_denom <- paste(nam[grep("pi",nam)],nam[grep("_w",nam)],sep="_")
#nam<-nam[-grep("ZnS",nam)]
#nam<-nam[-grep("thomson",nam)]
#cols <- grep("fwh",nam)
cols <- grep("thomson",nam)
cols <- c(cols, grep("ZnS",nam))
#cols <- c(cols,grep("_FayWuH",nam))
if(length(cols)!=0) nam <- nam[-cols]
#nam <- c(nam, TD_denom)
nam <- c(com[[2]][1,],"mean.rate","sd.rate",nam)
#t(paste(nam,"_skew",sep="")),t(paste(nam,"_var",sep="")))
write.table(t(nam),file=paste("SIMS_",output.name,".txt",sep=""),quote=F,row.names = F,col.names = F, append=F,sep="\t")
}
write(paste('arg <- commandArgs(TRUE)',
'cat(paste("Core",arg),sep="\\n")',
"suppressMessages(library(PipeMaster))",
#"suppressMessages(library(devtools))",
#"load_all('~/Github/PipeMaster')",
'load(file=".PM_objects.RData")',
"res<-sim.func(arg)",
'save(res,file=paste(".",arg,"_SIMS.rda",sep=""))',
'write(1,".log",append=T,sep="\\n")',
'invisible(file.remove(paste(".",arg,"locfile.txt",sep="")))',
"quit(save='no')",sep="\n"),".script_parallel.R")
sim.func<-function(arg){
simulations<-NULL
for(i in 1:block.size){
if(!(is.null(mu.rates))){
rates<-do.call(mu.rates[[1]],args=mu.rates[2:length(mu.rates)])
rates<-rep(rates, each=as.numeric(model$I[1,3]))
rates<-list(rates,c(0,0))
} else {
rates <- sample.mu.rates(model)
}
if(!(is.null(rec.rates))){
r.rates <- do.call(rec.rates[[1]],args=rec.rates[2:length(rec.rates)])
r.rates <- rep(r.rates, each = as.numeric(model$I[1,3]))
} else {
r.rates <- 0
}
locfile[,5] <- rates[[1]]
locfile[,6] <- r.rates
write.table(locfile,paste(".",arg,"locfile.txt",sep=""),row.names = F,col.names = T,quote = F,sep=" ")
com <- msABC.commander(model, use.alpha=use.alpha,arg=arg)
options(warn=-1)
sumstat <- read.table(text=system2(msABC.call, args=com[[1]], stdout = T,stderr=T,wait=T),header=T,sep="\t")
options(warn=0)
sumstat <- subset(sumstat, select=-c(X))
#TD_denom <- data.frame(sumstat[,grep("pi",colnames(sumstat))]-sumstat[,grep("_w",colnames(sumstat))])
#colnames(TD_denom) <- paste(colnames(sumstat)[grep("pi",colnames(sumstat))],sumstat[,grep("_w",colnames(sumstat))])
#sumstat<-sumstat[,-grep("ZnS",colnames(sumstat))]
#sumstat<-sumstat[,-grep("thomson",colnames(sumstat))]
#sumstat <- cbind(sumstat, TD_denom)
#Mean<-apply(sumstat,2,mean, na.rm = T)
#var<-apply(sumstat,2,var, na.rm=T)
#kur<-apply(sumstat,2,kurtosis, na.rm=T)
#skew<-apply(sumstat,2,skewness, na.rm=T)
#cols <- grep("fwh",colnames(sumstat))
cols <- grep("thomson",colnames(sumstat))
cols <- c(cols,grep("ZnS",colnames(sumstat)))
#cols <- c(cols,grep("_FayWuH",colnames(sumstat)))
if(length(cols)!=0) sumstat <- sumstat[,-cols]
param <- c(com[[2]][2,],rates[[2]])
names(param) <- c(com[[2]][1,],"mean.rate","sd.rate")
simulations <- rbind(simulations,c(param,sumstat))
}
return(simulations)
}
parentls <- function()ls(envir=parent.frame())
save(list=parentls(),file='.PM_objects.RData')
total.sims<-0
for(j in 1:nsim.blocks) {
start.time <- Sys.time()
write(0,".log")
for(c in 1:ncores){
system(paste("Rscript .script_parallel.R",c), wait = F)
}
l<-"0"
while(sum(as.numeric(unlist(strsplit(l, "")))) < ncores){
Sys.sleep(5)
l<-readLines(".log")
}
file.remove(".log")
simulations<-NULL
cat("Reading simulations from worker nodes", sep="\n")
for(c in 1:ncores){
load(file = paste(".",c,"_SIMS.rda",sep=""))
simulations <- rbind(simulations, res)
}
cat("Writing simulations to file", sep="\n")
write.table(simulations,file=paste("SIMS_",output.name,".txt",sep=""),quote=F,row.names = F,col.names = F, append=T,sep="\t")
cat("Removing old simulations", sep="\n")
for(t in 1:ncores){
file.remove(paste(".",t,"_SIMS.rda",sep=""))
}
end.time <- Sys.time()
total.sims <- total.sims+(block.size*ncores)
cycle.time <- (as.numeric(end.time)-as.numeric(start.time))/60/60
total.time <- cycle.time*nsim.blocks
passed.time <- cycle.time*j
remaining.time <- round(total.time-passed.time,3)
cat(paste("PipeMaster:: ",total.sims," (~",round((block.size*ncores)/cycle.time)," sims/h) | ~",remaining.time," hours remaining",sep=""),"\n")
}
setwd(WD)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.