#' Network_Pull_Tracer
#'
#' Summary Statistics are clock rate, growthRate, ePop and TMRCA.
#'
#' This slightly brute force function searches for all directories under a parent directory,
#' assuming they were created with BEAST_R0_Check or similar script, (see script for possible
#' differences in directories and naming conventions for tracer exports) and collates the necessary summary statistics into one table.
#'
#'
#'
#' @param reps replicates total. Default = 10
#' @param dirs number of subdirectories to be scanned
#' @param vars variables total. Default = 5
#' @param var.R0 boolean for whether Generation_Sim or Broken was used in data creation. Default=TRUE
#' @param N.v column vector of N. Default: N.v=c(rep(10000,reps)),
#' @param mu.v column vector of mutation rates in directory. Default: mu.v=c(1e-04, rep(0.001,3), rep(1e-04,3), rep(1e-05,3)),
#' @param Tg.v column vector of generation times in directory. Default: Tg.v=c(1,rep(3,9)),
#' @param R0.v column vector of R0 in directory. Default: R0.v=c(2,rep(c(2,5,20),3)),
#' @param root.dir - root directory, omitting a final /
#' @return Returns a data frame of collated statstics
#'
#'
#' @export
#'
#'
Network_Pull_Tracer <- function(reps=1,
dirs=seq(from=1,to=19.5,by=0.5),
vars=5,
var.R0=TRUE,
N.v=c(rep(10000,length(dirs))),
mu.v=c(rep(1e-04,length(dirs))),
Tg.v=c(rep(50,length(dirs))),
R0.v=c(rep(2.4,length(dirs))),
network.root.dir="LSD4\\mu_1e-04_R0_2.4_Tg_50\\",
local.root.dir="LSD4/mu_1e-04_R0_2.4_Tg_50/"){
## WARNINGS ##
## AUXILLARY FUNCTIONS ##
nex_read <- function(nex.file, case.tree){
# pull nexus file and isolate the names for meta data
nex <- read.nexus.data(nex.file)
nex.names <- names(nex)
split.names <- strsplit(nex.names,split = "_")
# set up results to store from each split
Seq.Res <- vector(mode="character",length=length(split.names))
Time.Res <- vector(mode="character",length=length(split.names))
for (i in 1:length(split.names)){
Seq.Res[i] <- split.names[[i]][2]
Time.Res[i] <- split.names[[i]][3]
}
# calculate needed statistics before returning
Sample.Size <- length(Seq.Res)
Sequence.Count <- length(unique(Seq.Res))
Time.Span <- max(as.numeric(Time.Res))-min(as.numeric(Time.Res))
Youngest.Sample <- max(case.tree$Time)-max(as.numeric(Time.Res))
Oldest.Sample <- min(as.numeric(Time.Res))
Percent.Cover <- Time.Span/max(case.tree$Time)
res <- c(Sample.Size,Sequence.Count,Time.Span, Youngest.Sample,Oldest.Sample, Percent.Cover)
return(res)
}
RMSE_calculation <- function(Full.Table){
l <- rep(0,dim(Full.Table)[1])
Errors <- list("RMedSE_mu"=l,"RMedSE_Gr"=l,"RMedSE_ePop"=l,"RMedSE_TMRCA"=l,"RMeanSE_mu"=l,"RMeanSE_Gr"=l,"RMeanSE_ePop"=l,"RMeanSE_TMRCA"=l)
Errors$RMedSE_mu <- sqrt((Full.Table$O_Mu_Med - Full.Table$E_Mu)^2)
Errors$RMedSE_Gr <- sqrt((Full.Table$O_Gr_Med - Full.Table$E_Gr)^2)
Errors$RMedSE_ePop <- sqrt((Full.Table$O_ePop_Med - Full.Table$E_ePop)^2)
Errors$RMedSE_TMRCA <- sqrt((Full.Table$O_TMRCA_Med - Full.Table$E_TMRCA)^2)
Errors$RMeanSE_mu <- sqrt((Full.Table$O_Mu_Mean - Full.Table$E_Mu)^2)
Errors$RMeanSE_Gr <- sqrt((Full.Table$O_Gr_Mean - Full.Table$E_Gr)^2)
Errors$RMeanSE_ePop <- sqrt((Full.Table$O_ePop_Mean - Full.Table$E_ePop)^2)
Errors$RMeanSE_TMRCA <- sqrt((Full.Table$O_TMRCA_Mean - Full.Table$E_TMRCA)^2)
res <- cbind(Full.Table,data.frame(Errors))
return(res)
}
## Handle network path
network.dir <- paste("\\\\fi--san02.dide.ic.ac.uk\\homes\\olw13\\Data\\",network.root.dir,sep="")
root.dir <- paste("C:/Users/Oliver/Google Drive/Academic Work/Imperial/O15-12/BEAST/Data Collection/",local.root.dir,sep="")
for (i in 1:length(dirs)){
## create file and directory locations from the given root.dir
case.file <- paste(root.dir,"/casetree.RData",sep="")
summary.file <- paste(root.dir,"sigma",dirs[i],"/summary.txt",sep="")
nex.dir <- paste(root.dir,"sigma",dirs[i],"/",sep="")
load(case.file)
## load the summary statistic file exported from TRACER
if(file.exists(paste(nex.dir,"summary.txt",sep=""))==FALSE){
Tracer_Addon(file.dir=nex.dir,log.dir=paste(network.dir,"sigma",dirs[i],"\\",sep=""),reps=reps,replicate.name = paste("sigma",dirs[i],sep=""))
}
all <- read.delim(summary.file, row.names=1,stringsAsFactors = FALSE)
## create results
Result <- list("Var"=rep(var.R0,reps),"Rep"=1:reps,"Tg"=rep(Tg.v[i],reps),"E_Mu"=rep(mu.v[i],reps), "O_Mu_Med"=as.numeric(all["median",seq(3,vars*(reps-1)+3,vars)]),
"O_Mu_Mean"=as.numeric(all["mean",seq(3,vars*(reps-1)+3,vars)]),"O_Mu_StdErr"=as.numeric(all["stdErr",seq(3,vars*(reps-1)+3,vars)]),
"ESS_mu"=as.numeric(all["effective sample size (ESS)",seq(3,vars*(reps-1)+3,vars)]),
"O_Mu_95L"=as.numeric(all["95% HPD Interval lower",seq(3,vars*(reps-1)+3,vars)]),
"O_Mu_95H"=as.numeric(all["95% HPD Interval higher",seq(3,vars*(reps-1)+3,vars)]),
"E_Gr"=rep((log(R0.v[i])/Tg.v[i]),reps), "O_Gr_Med"=as.numeric(all["median",seq(5,vars*(reps-1)+5,vars)]),
"O_Gr_Mean"=as.numeric(all["mean",seq(5,vars*(reps-1)+5,vars)]),"O_Gr_StdErr"=as.numeric(all["stdErr",seq(5,vars*(reps-1)+5,vars)]),
"ESS_Gr"=as.numeric(all["effective sample size (ESS)",seq(5,vars*(reps-1)+5,vars)]),
"O_Gr_95L"=as.numeric(all["95% HPD Interval lower",seq(5,vars*(reps-1)+5,vars)]),
"O_Gr_95H"=as.numeric(all["95% HPD Interval higher",seq(5,vars*(reps-1)+5,vars)]),
"E_ePop"=rep(N.v[i],reps), "O_ePop_Med"=as.numeric(all["median",seq(4,vars*(reps-1)+4,vars)]),
"O_ePop_Mean"=as.numeric(all["mean",seq(4,vars*(reps-1)+4,vars)]),"O_ePop_StdErr"=as.numeric(all["stdErr",seq(4,vars*(reps-1)+4,vars)]),
"ESS_ePop"=as.numeric(all["effective sample size (ESS)",seq(4,vars*(reps-1)+4,vars)]),
"O_ePop_95L"=as.numeric(all["95% HPD Interval lower",seq(4,vars*(reps-1)+4,vars)]),
"O_ePop_95H"=as.numeric(all["95% HPD Interval higher",seq(4,vars*(reps-1)+4,vars)]),
"E_TMRCA"=rep(max(case.tree$Time),reps), "O_TMRCA_Med"=as.numeric(all["median",seq(2,vars*(reps-1)+2,vars)]),
"O_TMRCA_Mean"=as.numeric(all["mean",seq(2,vars*(reps-1)+2,vars)]),"O_TMRCA_StdErr"=as.numeric(all["stdErr",seq(2,vars*(reps-1)+2,vars)]),
"ESS_TMRCA"=as.numeric(all["effective sample size (ESS)",seq(2,vars*(reps-1)+2,vars)]),
"O_TMRCA_95L"=as.numeric(all["95% HPD Interval lower",seq(2,vars*(reps-1)+2,vars)]),
"O_TMRCA_95H"=as.numeric(all["95% HPD Interval higher",seq(2,vars*(reps-1)+2,vars)]))
## collate results before returning
if(i==1){
Full.Table <- data.frame(Result)
} else {
Full.Table <- rbind(Full.Table, data.frame(Result))
}
}
res <- RMSE_calculation(Full.Table = Full.Table)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.