Nothing
#' Analyze batch data generated by \code{\link{sim.fit.stick.data.batch}}
#'
#' @param mut.vals Vector of number of mutations to simulate
#' @param coe.vals Vector of stickbreaking coefficients to analyze
#' @param sig.vals Vector of sigma values to analyze
#' @param fit.methods Vector of all methods of estimating d to then fit model and output results.
#' Accepts "MLE", "RDB", "max", "seq", "RDB.all" and "All". "All" does all methods. Default is "seq". Case sensitive.
#' Note you should use same fit.methods (or a subset of) as used when generating data in
#' @param d.range Search range for d.
#' @param d.true The distance to the boundary (d) used in simulations
#' \code{\link{sim.fit.stick.data.batch}}.
#' @param inpath The path to read input files from. Generally same as \code{outpath} from
#' \code{\link{sim.fit.stick.data.batch}}.
#' @param relative \code{TRUE/FALSE}. Should bias and rMSE be divided by parameter value to make them relative values?
#' @param coes Vector of stickbreaking coefficients for each mutation
#' @return List: \cr
#' [[1]] \code{error.table} A data frame with the parametric conditions and the failure rate, bias and rMSE
#' for the parameters d, the stickbreaking coefficients, and sigma. \cr
#' [[2]] \code{seq.method.props} A data frame with the proportion of sequential estimates coming from the MLE
#' RDB and max methods for each parametric condition. If "seq" is not among the fit.methods, the dataframe is
#' returned but empty.
#' @details This function assesses bias and relative root mean squared error (rMSE) from the data generated by
#' \code{\link{sim.fit.stick.data.batch}}. It also determines the failure rate for each method.
#' An estimate is considered invalid (a failure) when it does not fall in the range \code{d.range}. The
#' function also returns the data frame \code{seq.method.props} that indicates for the sequential method,
#' what proportion of the estimates come from the MLE, RDB and max methods. If "seq" is not part of
#' fit.methods (and if fit.methods <- "All" then it is), this dataframe is empty but still returned.
#' @importFrom stats as.formula dnorm lm median optimize pf predict rnorm runif sigma
#' @importFrom utils read.table write.table
#' @export
analyze.stick.data.batch <- function(mut.vals, coe.vals, sig.vals, inpath, fit.methods, d.range, d.true, relative, coes){
if (fit.methods[1] == "All"){
fit.methods2 <- c("MLE", "RDB", "max", "seq", "RDB.all")
} else{
fit.methods2 <- fit.methods
}
error.columns <- c("n.muts", "coes", "sigma", "d.true", "method", "n.reps", "fail.rate", "d.bias", "d.rMSE", "coe.bias", "coe.rMSE", "sig.bias", "sig.rMSE")
parm.sets <- expand.grid(n.muts=mut.vals, coe=coe.vals, sig=sig.vals, d.true=d.true, method=fit.methods2)
parm.sets.string <- apply(parm.sets, 1, function(x) paste(x, collapse="_"))
error.table <- as.data.frame(matrix(nrow=length(parm.sets[,1]), ncol=length(error.columns)))
colnames(error.table) <- error.columns
error.table[,1:length(parm.sets[1,])] <- parm.sets
er.cols2 <- c("n.muts", "coes", "sigma", "d.true", "p.MLE", "p.RDB", "p.max", "p.seq", "p.RDB.all")
parm.sets2 <- expand.grid(n.muts=mut.vals, coe=coe.vals, sig=sig.vals, d.true=d.true)
parm.sets.string2 <- apply(parm.sets2, 1, function(x) paste(x, collapse="_"))
seq.method.props <- as.data.frame(matrix(nrow=length(parm.sets2[,1]), ncol=length(er.cols2)))
colnames(seq.method.props) <- er.cols2
seq.method.props[,1:length(parm.sets2[1,])] <- parm.sets2
for (method.i in 1:length(fit.methods2)){ # loop over each method
for (mut.i in 1:length(mut.vals)){ # loop over each number of mutations
infile <- paste(inpath, "_", fit.methods2[method.i], "_", mut.vals[mut.i], "muts.txt", sep="")
data <- read.table(file=infile, header=TRUE)
for (coe.i in 1:length(coe.vals)){
for (sig.i in 1:length(sig.vals)){
sub.data <- subset(data, (coes==coe.vals[coe.i] & sigma==sig.vals[sig.i]))
sub.data.raw <- sub.data
parm.set.string <- paste(c(sub.data[1, 2:5], fit.methods2[method.i]), collapse="_")
error.row <- which(parm.sets.string == parm.set.string)
error.table$n.reps[error.row] <- length(sub.data$d.hat)
error.table$fail.rate[error.row] <- (length(which(sub.data$d.hat >= 0.99*d.range[2] | sub.data$d.hat <= 1.1*d.range[1])))/length(sub.data$d.hat)
nf.rows <- which(!(sub.data$d.hat > 0.99*d.range[2] | sub.data$d.hat < 1.1*d.range[1]))
sub.data <- sub.data[nf.rows,]
if (length(nf.rows)>0){
denom <- 1
if (relative==TRUE){
denom <- d.true
}
error.table$d.bias[error.row] <- mean((sub.data$d.hat - sub.data$d.true))/denom
error.table$d.rMSE[error.row] <-mean((abs(sub.data$d.hat - sub.data$d.true)))/denom
coe.col1 <- which(colnames(sub.data)=="u.hats.1")
coe.cols <- seq(coe.col1,(coe.col1+mut.vals[mut.i]-1))
coe.errors <- apply(sub.data[,coe.cols], 2, function(x) (x-sub.data$coe))
denom <- 1
if (relative==TRUE){denom <- coe.vals[coe.i]}
error.table$coe.bias[error.row] <- mean(coe.errors)/denom
error.table$coe.rMSE[error.row] <- mean(abs(coe.errors))/denom
denom <- 1
if (relative==TRUE){denom <- sig.vals[sig.i]}
error.table$sig.bias[error.row] <- mean((sub.data$sig.hat - sub.data$sigma))/denom
error.table$sig.rMSE[error.row] <- mean(abs(sub.data$sig.hat - sub.data$sigma))/denom
}
if (fit.methods2[method.i]=="seq"){
parm.string2 <- paste(sub.data.raw[1,2:5], collapse="_")
seq.methods.row <- which(parm.sets.string2==parm.string2)
nreps <- length(sub.data.raw$method)
seq.method.props$p.MLE[seq.methods.row] <- length(which(sub.data.raw$method=="MLE"))/nreps
seq.method.props$p.RDB[seq.methods.row] <- length(which(sub.data.raw$method=="RDB"))/nreps
seq.method.props$p.max[seq.methods.row] <- length(which(sub.data.raw$method=="max"))/nreps
seq.method.props$p.seq[seq.methods.row] <- length(which(sub.data.raw$method=="seq"))/nreps
seq.method.props$p.RDB.all[seq.methods.row] <- length(which(sub.data.raw$method=="RDB.all"))/nreps
}
}
}
}
}
return(list(error.table=error.table, seq.method.props=seq.method.props))
}
#' Analyze batch data generated by \code{\link{sim.fit.mult.add.data.batch}}
#'
#' @param mut.vals Vector of number of mutations to simulate
#' @param coe.vals Vector of stickbreaking coefficients to analyze
#' @param sig.vals Vector of sigma values to analyze
#' @param inpath The path to read input files from. Generally same as \code{outpath} from
#' \code{\link{sim.fit.stick.data.batch}}.
#' @param relative \code{TRUE/FALSE}. Should bias and rMSE be divided by parameter value to make them relative values?
#' @param coes Vector of stickbreaking coefficients for each mutation
#' @return \code{error.table} A data frame with the parametric conditions and the failure rate, bias and rMSE
#' for the parameters d, the stickbreaking coefficients, and sigma. \cr
#' @details This function assesses bias and relative root mean squared error (rMSE) from the data generated by
#' \code{\link{sim.fit.mult.add.data.batch}}.
#' @export
analyze.mult.add.data.batch <- function(mut.vals, coe.vals, sig.vals, inpath, relative, coes){
error.columns <- c("n.muts", "coes", "sigma", "n.reps", "coe.bias", "coe.rMSE", "sig.bias", "sig.rMSE")
parm.sets <- expand.grid(n.muts=mut.vals, coe=coe.vals, sig=sig.vals)
parm.sets.string <- apply(parm.sets, 1, function(x) paste(x, collapse="_"))
error.table <- as.data.frame(matrix(nrow=length(parm.sets[,1]), ncol=length(error.columns)))
colnames(error.table) <- error.columns
error.table[,1:length(parm.sets[1,])] <- parm.sets
for (mut.i in 1:length(mut.vals)){ # loop over each number of mutations
infile <- paste(inpath, "_", mut.vals[mut.i], "muts.txt", sep="")
data <- read.table(file=infile, header=TRUE)
for (coe.i in 1:length(coe.vals)){
for (sig.i in 1:length(sig.vals)){
sub.data <- subset(data, (coes==coe.vals[coe.i] & sigma==sig.vals[sig.i]))
parm.set.string <- paste(sub.data[1, 3:5], collapse="_")
error.row <- which(parm.sets.string == parm.set.string)
error.table$n.reps[error.row] <- length(sub.data$n.muts)
coe.col1 <- which(colnames(sub.data) %in% c("s.hats.1", "w.hats.1"))
coe.cols <- seq(coe.col1,(coe.col1+mut.vals[mut.i]-1))
coe.errors <- apply(sub.data[,coe.cols], 2, function(x) (x-sub.data$coe))
denom <- 1
if (relative==TRUE){denom <- coe.vals[coe.i]}
error.table$coe.bias[error.row] <- mean(coe.errors)/denom
error.table$coe.rMSE[error.row] <- mean(abs(coe.errors))/denom
denom <- 1
if (relative==TRUE){denom <- sig.vals[sig.i]}
error.table$sig.bias[error.row] <- mean((sub.data$sig.hat - sub.data$sigma))/denom
error.table$sig.rMSE[error.row] <- mean(abs(sub.data$sig.hat - sub.data$sigma))/denom
} # next sig.i
} #next coe.i
} #next mut.i
return(error.table)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.