###########
#Called by toxboot(). This function will will take
#data table datchemval with columns logc, resp, bmad
#
#Function returns datchemsample which is a data table with number of columns equal to the number of sampled replicates
#and number of rows equal to the number of measured points. For example, a chemical with 6 doses each measured
#3 times with 1000 boot strap replicates will return at data table with 18 rows and 1000 columns.
if(getRversion() >= "2.15.1") utils::globalVariables(c("index", "sd", "median"))
#' Generates bootstrap samples of dose response data
#'
#' \code{toxbootReplicates} takes input dose response data and returns
#' a matrix of sampled dose response values.
#'
#' @param datchemval data.table, columns logc, resp, bmad
#' @param boot_method string, specifies method. Default is 'smooth'.
#' @param replicates number of bootstrap samples. Default 100
#'
#' @details
#' Accepted methods are "case", "smooth", "residuals_hill", "wild_hill",
#' "residuals_gnls", "wild_gnls", "watt_normal", "watt_student_df5"
#'
#' @seealso \code{\link{toxboot}}
#'
#' @import data.table
#' @importFrom tcpl tcplFit
#' @importFrom stats mad sd median rnorm rt
#'
#' @export
toxbootReplicates <- function(datchemval,
boot_method = 'smooth',
replicates = 100
){
#make a copy of datchemval to avoid modification in parent function
datchemval_copy <- copy(datchemval)
#check to make sure boot_method is supported, return error if not
methods <- c("case",
"smooth",
"residuals_hill",
"wild_hill",
"residuals_gnls",
"wild_gnls",
"watt_normal",
"watt_student_df5")
if(!(boot_method %in% methods)){
stop("boot_method not supported")
}
#datchemstat is used in multiple cases,
#best to have it before the different methods to make sure it's done the same way
#get mean, sd, median of values at given conc
datchemstat = data.table(logc = sort(unique(datchemval_copy$logc)))
for (conc in unique(datchemstat$logc)){
datchemstat[logc == conc, sd := sd( datchemval_copy[logc == conc, resp])]
datchemstat[logc == conc, mean := mean( datchemval_copy[logc == conc, resp])]
datchemstat[logc == conc, median := median(datchemval_copy[logc == conc, resp])]
#Adds column that gives the number of repeats for that chemical at that concentration
datchemstat[logc == conc,
number_measurements := length(datchemval_copy[logc == conc, resp])]
}
bmad = datchemval_copy$bmad[1]
#now generate the samples depending on method
if(boot_method == 'watt_normal'){
#Make data frame with number of columns of sampled values equal to replicates
tempmat <- c()
for (conc in datchemstat[, logc]){ #REMOVE SORT AND UNIQUE?
number_measurements <- datchemstat[logc == conc, number_measurements]
resp_vect <- datchemval_copy[logc == conc, resp] #gets the measured responses at this concentration
if(number_measurements > 1){
for (numpoint in 1:number_measurements){
mean_resp <- datchemstat[logc == conc, mean]
sd_resp <- datchemstat[logc == conc, sd]
vect <- rnorm(n = replicates,
mean = mean_resp,
sd = sd_resp)
tempmat <- rbind(tempmat, vect)
}
}
else{
vect <- rep(resp_vect, times = replicates) #This is necessary when only one dose, as sample will do 1:X if length of X == 1
tempmat <- rbind(tempmat, vect)
}
}
} #end method 'watt_normal'
if(boot_method == 'watt_student_df5'){
#Make data frame with number of columns of sampled values equal to replicates
tempmat <- c()
for (conc in datchemstat[, logc]){ #REMOVE SORT AND UNIQUE?
number_measurements <- datchemstat[logc == conc, number_measurements]
resp_vect <- datchemval_copy[logc == conc, resp] #gets the measured responses at this concentration
if(number_measurements > 1){
for (numpoint in 1:number_measurements){
mean_resp <- datchemstat[logc == conc, mean]
sd_resp <- datchemstat[logc == conc, sd]
vect <- rt(n = replicates, df = 5) * sd_resp * sqrt((5 - 2) / 5) + mean_resp
tempmat <- rbind(tempmat, vect)
}
}
else{
vect <- rep(resp_vect, times = replicates) #This is necessary when only one dose, as sample will do 1:X if length of X == 1
tempmat <- rbind(tempmat, vect)
}
}
} #end method 'watt_student_df5'
if(boot_method == 'case'){
#This method samples each only actual measured values
#At each concentration, there are N samples where N is equal to
#the number of measurements
#Make data frame with number of columns of sampled values equal to replicates
tempmat <- c()
for (conc in datchemstat[, logc]){ #REMOVE SORT AND UNIQUE?
number_measurements <- datchemstat[logc == conc, number_measurements]
resp_vect <- datchemval_copy[logc == conc, resp] #gets the measured responses at this concentration
if(number_measurements > 1){
for (numpoint in 1:number_measurements){
vect <- sample(resp_vect, replicates, replace = TRUE)
tempmat <- rbind(tempmat, vect)
}
}
else{
vect <- rep(resp_vect, times = replicates) #This is necessary when only one dose, as sample will do 1:X if length of X == 1
tempmat <- rbind(tempmat, vect)
}
}
} #end method 'case'
if(boot_method == 'smooth'){
#A few options for this one. Does that small amount of zero centered random noise
#Get added to each measured point, or is this on top of case sampling?
#For now, I'll mix with case sampling, performing the same case sampling above
#But adding a noise value
#
#Second, what function to use for noise? Going to use normal distribution centered
#on measured point and stdev = bmad (bmad already multiplied by 1.4826 within mad() function)
#also pdf www.anawida.de/teach/SS12/compStat/Boots/smoothboot/smoothboot.pdf
tempmat <- c()
for (conc in datchemstat[, logc]){ #REMOVE SORT AND UNIQUE?
number_measurements <- datchemstat[logc == conc, number_measurements]
resp_vect <- datchemval_copy[logc == conc, resp] #gets the measured responses at this concentration
if(number_measurements > 1){
for (numpoint in 1:number_measurements){
vect <- sample(resp_vect, replicates, replace = TRUE) +
rnorm(n = replicates, mean = 0, sd = bmad)
tempmat <- rbind(tempmat, vect)
}
}
else{
vect <- resp_vect + #This is necessary when only one dose, as sample will do 1:X if length of X == 1
rnorm(n = replicates, mean = 0, sd = bmad)
tempmat <- rbind(tempmat, vect)
}
}
} #end method 'smooth'
if(boot_method == 'residuals_hill'){
#Get parameters from pipeline fit with actual points
normalfit <- tcplFit(datchemval_copy$logc,
datchemval_copy$resp,
datchemval_copy$bmad)#Fit the input data as usual, to compare
normalfit_hill_ga <- normalfit$hill_ga
normalfit_hill_tp <- normalfit$hill_tp
normalfit_hill_gw <- normalfit$hill_gw
#Get vector of residuals for each measured point
datchemval_copy[, fitval := hill_curve(hill_tp = normalfit_hill_tp,
hill_ga = normalfit_hill_ga,
hill_gw = normalfit_hill_gw,
lconc = logc)]
datchemval_copy[, residual := resp - fitval]
#now, column residual lists the residuals and
#column fitval are the values from the pipeline fit at measured concentrations
#Currently issue for those that aren't hits, presumably NAs being put in for fitval or residual
#for now, set these to 0, but will need to have better way of handling this.
datchemval_copy[is.na(residual), residual := 0]
#
tempmat <- c()
for (conc in datchemval_copy[, logc]){
fitval <- hill_curve(hill_tp = normalfit_hill_tp,
hill_ga = normalfit_hill_ga,
hill_gw = normalfit_hill_gw,
lconc = conc)
#like above note, if fitval is NA, set to 0 to avoid error, but figure out better way of handling this
if (is.na(fitval)){
fitval <- 0
}
vect <- fitval + sample(datchemval_copy[, residual], replicates, replace = TRUE)
tempmat <- rbind(tempmat, vect)
}
} #end method 'resampling_hill'
if(boot_method == 'wild_hill'){
#Multiple options listed for how to sample the residual. Compared to resampling residuals,
#wild keeps the residual and calculated response paired, and then scales the residual based
#on some distribution, centered on the fit response. Going to use normal distribution with sd of 1
#Borrows a lot of code from 'resampling'
#Get parameters from pipeline fit with actual points
normalfit <- tcplFit(datchemval_copy$logc,
datchemval_copy$resp,
datchemval_copy$bmad)#Fit the input data as usual, to compare
normalfit_hill_ga <- normalfit$hill_ga
normalfit_hill_tp <- normalfit$hill_tp
normalfit_hill_gw <- normalfit$hill_gw
#Get vector of residuals for each measured point
datchemval_copy[, fitval := hill_curve(hill_tp = normalfit_hill_tp,
hill_ga = normalfit_hill_ga,
hill_gw = normalfit_hill_gw,
lconc = logc)]
datchemval_copy[, residual := fitval - resp]
#now, column residual lists the residuals and
#column fitval are the values from the pipeline fit at measured concentrations
#Currently issue for those that aren't hits, presumably NAs being put in for fitval or residual
#for now, set these to 0, but will need to have better way of handling this.
datchemval_copy[is.na(residual), residual := 0]
###
#I want to be able to apply the for loop before over each row, and have access to both fitval and the residual
#since concs are repeated, I can't reference the row by them, such as datchemval_copy[logc==conc,fitval], as this
#would return more than 1
#best option for now seems to be to make a column of 1:nrow
datchemval_copy[, index := c(1:nrow(datchemval_copy))]
#
tempmat <- c()
for (rowindex in datchemval_copy[, index]){
fitval <- datchemval_copy[index == rowindex, fitval]
residual <- datchemval_copy[index == rowindex, residual]
vect <- fitval + residual * rnorm(n = replicates, mean = 0, sd = 1)
tempmat <- rbind(tempmat, vect)
}
} #end method 'wild_hill'
if(boot_method == 'residuals_gnls'){
#Get parameters from pipeline fit with actual points
normalfit <- tcplFit(datchemval_copy$logc,
datchemval_copy$resp,
datchemval_copy$bmad)
normalfit_gnls_ga <- normalfit$gnls_ga
normalfit_gnls_tp <- normalfit$gnls_tp
normalfit_gnls_gw <- normalfit$gnls_gw
normalfit_gnls_la <- normalfit$gnls_la
normalfit_gnls_lw <- normalfit$gnls_lw
#Get vector of residuals for each measured point
datchemval_copy[, fitval := gnls_curve(top = normalfit_gnls_tp,
ga = normalfit_gnls_ga,
gw = normalfit_gnls_gw,
la = normalfit_gnls_la,
lw = normalfit_gnls_lw,
lconc = logc)]
datchemval_copy[, residual := resp - fitval]
#now, column residual lists the residuals and
#column fitval are the values from the pipeline fit at measured concentrations
#Currently issue for those that aren't hits, presumably NAs being put in for fitval or residual
#for now, set these to 0, but will need to have better way of handling this.
datchemval_copy[is.na(residual), residual := 0]
#
tempmat <- c()
for (conc in datchemval_copy[, logc]){ #removed sort, using setkey before calling tcplBootReplicates now so already sorted
fitval <- gnls_curve(top = normalfit_gnls_tp,
ga = normalfit_gnls_ga,
gw = normalfit_gnls_gw,
la = normalfit_gnls_la,
lw = normalfit_gnls_lw,
lconc = conc)
#like above note, if fitval is NA, set to 0 to avoid error, but figure out better way of handling this
if (is.na(fitval)){
fitval <- 0
}
vect <- fitval + sample(datchemval_copy[, residual], replicates, replace = TRUE)
tempmat <- rbind(tempmat, vect)
}
} #end method 'resampling_gnls'
if(boot_method == 'wild_gnls'){
#Multiple options listed for how to sample the residual. Compared to resampling residuals,
#wild keeps the residual and calculated response paired, and then scales the residual based
#on some distribution, centered on the fit response. Going to use normal distribution with sd of 1
#Borrows a lot of code from 'resampling'
#Get parameters from pipeline fit with actual points
normalfit <- tcplFit(datchemval_copy$logc,
datchemval_copy$resp,
datchemval_copy$bmad)#Fit the input data as usual, to compare
normalfit_gnls_ga <- normalfit$gnls_ga
normalfit_gnls_tp <- normalfit$gnls_tp
normalfit_gnls_gw <- normalfit$gnls_gw
normalfit_gnls_la <- normalfit$gnls_la
normalfit_gnls_lw <- normalfit$gnls_lw
#Get vector of residuals for each measured point
datchemval_copy[,fitval := gnls_curve(top = normalfit_gnls_tp,
ga = normalfit_gnls_ga,
gw = normalfit_gnls_gw,
la = normalfit_gnls_la,
lw = normalfit_gnls_lw,
lconc = logc)]
datchemval_copy[,residual := fitval - resp]
#now, column residual lists the residuals and
#column fitval are the values from the pipeline fit at measured concentrations
#Currently issue for those that aren't hits, presumably NAs being put in for fitval or residual
#for now, set these to 0, but will need to have better way of handling this.
datchemval_copy[is.na(residual), residual := 0]
###
#I want to be able to apply the for loop before over each row, and have access to both fitval and the residual
#since concs are repeated, I can't reference the row by them, such as datchemval_copy[logc==conc,fitval], as this
#would return more than 1
#best option for now seems to be to make a column of 1:nrow
datchemval_copy[, index := c(1:nrow(datchemval_copy))]
#
tempmat <- c()
for (rowindex in datchemval_copy[, index]){
fitval <- datchemval_copy[index == rowindex, fitval]
residual <- datchemval_copy[index == rowindex, residual]
vect <- fitval + residual * rnorm(n = replicates, mean = 0, sd = 1)
tempmat <- rbind(tempmat, vect)
}
} #end method 'wild_gnls'
datchemsample <- data.table(tempmat)
boot_replicates <- list('tempmat' = tempmat, 'datchemsample' = datchemsample)
return(boot_replicates)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.