#' Tracer Accumlation Function
#'
#' This slightly brute force function searches for all directories under a parent directory, assuming they
#' were created with Generate_New_Sample_Data script, (see script for possible differences in directories
#' and naming conventions for tracer exports) and collates the necessary summary statistics into one table.
#'
#' Summary Statistics are clock rate, growthRate, ePop and TMRCA.
#'
#'
#' @param reps replicates total. Default = 10
#' @param dirs number of subdirectories to be scanned
#' @param vars variables total. Default = 5
#' @param var.R0 boolean for whether Generation_Sim or Broken was used in data creation. Default=TRUE
#' @param N.v column vector of N. Default: N.v=c(rep(10000,reps)),
#' @param mu.v column vector of mutation rates in directory. Default: mu.v=c(1e-04, rep(0.001,3), rep(1e-04,3), rep(1e-05,3)),
#' @param Tg.v column vector of generation times in directory. Default: Tg.v=c(1,rep(3,9)),
#' @param R0.v column vector of R0 in directory. Default: R0.v=c(2,rep(c(2,5,20),3)),
#' @param root.dir - root directory, omitting a final /
#' @return Returns a data frame of collated statstics
#'
#'
#' @export
#'
#'
Tracer_Accumulation <- function(reps=10,
dirs=15,
vars=5,
var.R0=TRUE,
N.v=c(rep(10000,dirs)),
mu.v=c(rep(0.001,5), rep(1e-04,5), rep(1e-05,5)),
Tg.v=c(rep(3,15)),
R0.v=c(rep(c(2,3.5,5,10,20),3)),
root.dir="C:/Users/Oliver/Google Drive/Academic Work/Imperial/O15-12/BEAST/Data Collection/Variable"){
## WARNINGS ##
## AUXILLARY FUNCTIONS ##
nex_read <- function(nex.file, case.tree){
# pull nexus file and isolate the names for meta data
nex <- read.nexus.data(nex.file)
nex.names <- names(nex)
split.names <- strsplit(nex.names,split = "_")
# set up results to store from each split
Seq.Res <- vector(mode="character",length=length(split.names))
Time.Res <- vector(mode="character",length=length(split.names))
for (i in 1:length(split.names)){
Seq.Res[i] <- split.names[[i]][2]
Time.Res[i] <- split.names[[i]][3]
}
# calculate needed statistics before returning
Sample.Size <- length(Seq.Res)
Sequence.Count <- length(unique(Seq.Res))
Time.Span <- max(as.numeric(Time.Res))-min(as.numeric(Time.Res))
Youngest.Sample <- max(case.tree$Time)-max(as.numeric(Time.Res))
Oldest.Sample <- min(as.numeric(Time.Res))
Percent.Cover <- Time.Span/max(case.tree$Time)
res <- c(Sample.Size,Sequence.Count,Time.Span, Youngest.Sample,Oldest.Sample, Percent.Cover)
return(res)
}
RMSE_calculation <- function(Full.Table){
l <- rep(0,dim(Full.Table)[1])
Errors <- list("RMedSE_mu"=l,"RMedSE_Gr"=l,"RMedSE_ePop"=l,"RMedSE_TMRCA"=l,"RMeanSE_mu"=l,"RMeanSE_Gr"=l,"RMeanSE_ePop"=l,"RMeanSE_TMRCA"=l)
Errors$RMedSE_mu <- sqrt((Full.Table$O_Mu_Med - Full.Table$E_Mu)^2)
Errors$RMedSE_Gr <- sqrt((Full.Table$O_Gr_Med - Full.Table$E_Gr)^2)
Errors$RMedSE_ePop <- sqrt((Full.Table$O_ePop_Med - Full.Table$E_ePop)^2)
Errors$RMedSE_TMRCA <- sqrt((Full.Table$O_TMRCA_Med - Full.Table$E_TMRCA)^2)
Errors$RMeanSE_mu <- sqrt((Full.Table$O_Mu_Mean - Full.Table$E_Mu)^2)
Errors$RMeanSE_Gr <- sqrt((Full.Table$O_Gr_Mean - Full.Table$E_Gr)^2)
Errors$RMeanSE_ePop <- sqrt((Full.Table$O_ePop_Mean - Full.Table$E_ePop)^2)
Errors$RMeanSE_TMRCA <- sqrt((Full.Table$O_TMRCA_Mean - Full.Table$E_TMRCA)^2)
res <- cbind(Full.Table,data.frame(Errors))
return(res)
}
for (i in 1:dirs){
## create file and directory locations from the given root.dir
case.file <- paste(root.dir,"/N_",N.v[i],"_mu_",mu.v[i],"_Tg_",Tg.v[i],"_R0_",R0.v[i],"/casetree.RData",sep="")
summary.file <- paste(root.dir,"/N_",N.v[i],"_mu_",mu.v[i],"_Tg_",Tg.v[i],"_R0_",R0.v[i],"/summary.txt",sep="")
nex.dir <- paste(root.dir,"/N_",N.v[i],"_mu_",mu.v[i],"_Tg_",Tg.v[i],"_R0_",R0.v[i],"/NT_1_SS_Heterochronous/",sep="")
load(case.file)
## pull data sequentially from nexus files
for(j in 1:reps){
if(j==1){
nex.output <- nex_read(paste(nex.dir,"replicate",j,".nex",sep=""),case.tree)
} else {
nex.output <- rbind(nex.output,nex_read(paste(nex.dir,"replicate",j,".nex",sep=""),case.tree))
}
}
## load the summary statistic file exported from TRACER
Tracer_Addon(file.dir=paste(root.dir,"/N_",N.v[i],"_mu_",mu.v[i],"_Tg_",Tg.v[i],"_R0_",R0.v[i],"/",sep=""),log.dir=nex.dir,reps=reps)
all <- read.delim(summary.file, row.names=1,stringsAsFactors = FALSE)
## create results
Result <- list("Var"=rep(var.R0,reps),"Rep"=1:reps,"Tg"=rep(Tg.v[i],reps),"E_Mu"=rep(mu.v[i],reps), "O_Mu_Med"=as.numeric(all["median",seq(3,vars*(reps-1)+3,vars)]),"O_Mu_Mean"=as.numeric(all["mean",seq(3,vars*(reps-1)+3,vars)]),"O_Mu_StdErr"=as.numeric(all["stdErr",seq(3,vars*(reps-1)+3,vars)]), "ESS_mu"=as.numeric(all["effective sample size (ESS)",seq(3,vars*(reps-1)+3,vars)]),
"E_Gr"=rep((log(R0.v[i])/Tg.v[i]),reps), "O_Gr_Med"=as.numeric(all["median",seq(5,vars*(reps-1)+5,vars)]),"O_Gr_Mean"=as.numeric(all["mean",seq(5,vars*(reps-1)+5,vars)]),"O_Gr_StdErr"=as.numeric(all["stdErr",seq(5,vars*(reps-1)+5,vars)]), "ESS_Gr"=as.numeric(all["effective sample size (ESS)",seq(5,vars*(reps-1)+5,vars)]),
"E_ePop"=rep(N.v[i],reps), "O_ePop_Med"=as.numeric(all["median",seq(4,vars*(reps-1)+4,vars)]),"O_ePop_Mean"=as.numeric(all["mean",seq(4,vars*(reps-1)+4,vars)]),"O_ePop_StdErr"=as.numeric(all["stdErr",seq(4,vars*(reps-1)+4,vars)]), "ESS_ePop"=as.numeric(all["effective sample size (ESS)",seq(4,vars*(reps-1)+4,vars)]),
"E_TMRCA"=rep(max(case.tree$Time),reps), "O_TMRCA_Med"=as.numeric(all["median",seq(2,vars*(reps-1)+2,vars)]),"O_TMRCA_Mean"=as.numeric(all["mean",seq(2,vars*(reps-1)+2,vars)]),"O_TMRCA_StdErr"=as.numeric(all["stdErr",seq(2,vars*(reps-1)+2,vars)]), "ESS_TMRCA"=as.numeric(all["effective sample size (ESS)",seq(2,vars*(reps-1)+2,vars)]),
"Sample_Size"=as.numeric(nex.output[,1]), "Sequence_Count"=as.numeric(nex.output[,2]),"Sequence_Cover"=as.numeric(nex.output[,2]/max(case.tree$Seq.ID)),"Time_Span"=as.numeric(nex.output[,3]),"Youngest"=as.logical(nex.output[,4]),"Oldest"=as.numeric(nex.output[,5]),"Time_Cover"=as.numeric(nex.output[,6]),"R0"=rep((R0.v[i]),reps))
## collate results before returning
if(i==1){
Full.Table <- data.frame(Result)
} else {
Full.Table <- rbind(Full.Table, data.frame(Result))
}
}
res <- RMSE_calculation(Full.Table = Full.Table)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.