Nothing
#' Optimization of Floating Percentile Model Parameters
#'
#' Calculate parameter inputs that optimize benchmark performance
#'
#' @param data data.frame containing, at a minimum, chemical concentrations as columns and a logical \code{Hit} column classifying toxicity
#' @param paramList character vector of column names of chemical concentration variables in \code{data}
#' @param FN_crit numeric vector over which to optimize false negative thresholds (default = \code{seq(0.1, 0.9, by = 0.05))}
#' @param alpha.test numeric vector of type-I error rate values over which to optimize (default = \code{seq(0.05, 0.5, by = 0.05))}
#' @param which numeric or character indicating which type of plot to generate (see Details; default = \code{c(1, 2)})
#' @param simplify logical; whether to generate simplified output (default = \code{TRUE})
#' @param plot logical; whether to generate a plot to visualize the opimization results
#' @param colors values recognizible as colors to be passed to \code{colorRampPalette} (via \code{colorGradient}) to generate a palette for plotting (default = \code{heat.colors(10)})
#' @param colsteps integer; number of discrete steps to interpolate colors in \code{colorGradient} (default = \code{100})
#' @param ... additional argument passed to \code{FPM}, \code{chemSig}, \code{chemSigSelect}, and \code{colorGradient}
#' @details
#' \code{optimFPM} was designed to help optimize the predictive capacity of the benchmarks generated by \code{FPM}. The default input parameters to
#' \code{FPM} (i.e., \code{FN_crit = 0.2} and \code{alpha.test = 0.05}) are arbitrary, and optimization can help to objectively establish more accurate benchmarks.
#' Graphical output from \code{optimFPM} can also help users to understand the relationship(s) between benchmark accuracy/error, \code{FN_crit}, and \code{alpha.test}.
#'
#' Default inputs for \code{FN_crit} and \code{alpha.test} were selected to represent a reasonable range of values to test. Testing over both ranges
#' will result in a two-way optimization, which can be computationally intensive. Alternatively, \code{optimFPM} can be run for one parameter at a time
#' by specifying a single value for \code{FN_crit} or \code{alpha.test}. Note that inputting single values for both \code{FN_crit} and \code{alpha.test} will generate unhelpful results.
#'
#' Several metrics are used for optimization:
#' 1. Ratio of sensitivity/specificity ("sensSpecRatio"), calculated as the minimum of the two metrics divided by the maximum of the two. Therefore, this value will always be between 0 and 1, representing the balance between correct \code{Hit==TRUE} and \code{Hit==FALSE} predictions.
#' 2. Overall reliability ("OR") (i.e., probability of correctly predicting \code{Hit} values)
#' 3. Fowlkes-Mallows Index ("FM") - an average of metrics focusing on predicting \code{Hit==TRUE}
#' 4. Matthew's Correlation Coefficient ("MCC") - a measure of the correspondence between the data and predictions analogous to a Pearson's correlation coefficient (but for binary data)
#'
#' Graphical output will differ depending on whether or not a single value is input for \code{FN_crit} or \code{alpha.test}. Providing a single value for one
#' of the two arguments will generate a line graph, whereas providing longer vectors (i.e., length > 1) of inputs for both arguments will generate dot matrix plots using \code{colors} to generate
#' a color palette and \code{colsteps} to define the granularity of the color gradient with the palette. The order of \code{colors} will be plotted
#' from more optimal to less optimal; for example, the default of \code{heat.colors(10)} will show optimal colors as red and less optimal colors as yellower.
#' By default, multiple plots will be generated, however the \code{which} argument can control which plots are generated. Inputs
#' to \code{which} are, by default, \code{c(1, 2, 3, 4)} for the metrics noted above, and flexible character inputs also can be used to a degree.
#' Black squares indicate the optimal argument inputs; these values are also printed to the console and can be assigned to an object.
#'
#' @return data.frame of optimized \code{FN_crit} and/or \code{alpha.test} values
#' @seealso FPM, colorGradient, colorRampPalette
#' @importFrom dplyr bind_rows
#' @importFrom graphics points
#' @importFrom graphics lines
#' @importFrom graphics abline
#' @importFrom graphics legend
#' @importFrom graphics polygon
#' @examples
#' paramList = c("Cd", "Cu", "Fe", "Mn", "Ni", "Pb", "Zn")
#' FN_seq <- seq(0.1, 0.3, 0.05)
#' alpha_seq <- seq(0.05, 0.2, 0.05)
#' optimFPM(h.tristate, paramList, FN_seq, 0.05)
#' optimFPM(h.tristate, paramList, 0.2, alpha_seq)
#' optimFPM(h.tristate, paramList, FN_seq, alpha_seq, which=2)
#' @export
optimFPM <- function(data,
paramList,
FN_crit = seq(0.1, 0.9, by = 0.05),
alpha.test = seq(0.05, 0.5, by = 0.05),
which = c(1, 2, 3, 4),
simplify = TRUE,
plot = TRUE,
colors = heat.colors(10),
colsteps = 100,
...){
if(length(FN_crit) > 1 & length(alpha.test) > 1){
grid <- expand.grid(FN_crit, alpha.test)
names(grid) <- c("FN_crit", "alpha.test")
x <- apply(as.matrix(grid), 1, function(x) {
suppressWarnings(expr = {
tmp <- FPM(data, paramList, FN_crit = x[[1]], alpha.test = x[[2]], ...)[["FPM"]]
})
return(tmp)
})
z <- as.data.frame(matrix(ncol = length(paramList) + 12,
nrow = 1, data = rep(NA, length(paramList) + 12)))
names(z) <- c(paramList, "FN_crit", "TP", "FN", "TN", "FP", "pFN", "pFP",
"sens", "spec", "OR", "FM", "MCC")
z <- do.call(rbind, lapply(x, function(i) dplyr::bind_rows(z, i)[-1,])) # remove dummy row
row.names(z) <- NULL
y <- do.call(rbind, lapply(x, function(tmp) {
c(sensSpec = min(tmp$sens, tmp$spec)/max(tmp$sens, tmp$spec),
OR = tmp$OR, FM = tmp$FM, MCC = tmp$MCC)
}))
out <- data.frame(grid, y)
min1 <- out[min(which.min(1 - out$sensSpec)),]
min2 <- out[min(which.min(1 - out$OR)),]
min3 <- out[min(which.min(1 - out$FM)),]
min4 <- out[min(which.min(1 - out$MCC)),]
if(plot){
if(any(c(1, "sens", "spec", "sensitivity", "Sensitivity", "senSpec", "SenSpec",
"specificity", "Specificity", "sensSpec", "SensSpec", "sensspec", "ss", "SS") %in% which)){
plot(out[1:2],
col = colorGradient(1 - out[, "sensSpec"], colors = colors, colsteps = colsteps, na.rm = TRUE),
pch = 15, xlab = "FN Limit", ylab = "Alpha", cex = 3,
main = paste0("Sensitivity-Specificity Ratio\nRange:",
paste(signif(range(out[, "sensSpec"], na.rm = T), 2), collapse = "-")))
points(x = min1$FN_crit, y = min1$alpha.test, pch = 0, cex = 4, col = "black")
}
if(any(c(2, "OR", "or", "reliability", "Reliability", "rel", "Rel") %in% which)){
plot(out[1:2],
col = colorGradient(1 - out[, "OR"], colors = colors, colsteps = colsteps, na.rm = TRUE),
pch = 15, xlab = "FN Limit", ylab = "Alpha", cex = 3,
main = paste0("Overall Reliability\nRange:",
paste(signif(range(out[, "OR"], na.rm = T), 2), collapse = "-")))
points(x = min2$FN_crit, y = min2$alpha.test, pch = 0, cex = 4, col = "black")
}
if(any(c(3, "fm", "FM", "Fowlkes", "Fowlks", "fowlkes", "mallows", "mallow", "Mallows", "Mallow",
"f-m", "F-M") %in% which)){
plot(out[1:2],
col = colorGradient(1 - out[, "FM"], colors = colors, colsteps = colsteps, na.rm = TRUE),
pch = 15, xlab = "FN Limit", ylab = "Alpha", cex = 3,
main = paste0("Fowlkes-Mallows Index\nRange:",
paste(signif(range(out[, "FM"], na.rm = T), 2), collapse = "-")))
points(x = min3$FN_crit, y = min3$alpha.test, pch = 0, cex = 4, col = "black")
}
if(any(c(4, "Matthews", "matthews", "MCC", "mcc", "Mcc") %in% which)){
plot(out[1:2],
col = colorGradient(1 - out[, "MCC"], colors = colors, colsteps = colsteps, na.rm = TRUE),
pch = 15, xlab = "FN Limit", ylab = "Alpha", cex = 3,
main = paste0("Matthew's Correlation Coefficient\nRange:",
paste(signif(range(out[, "MCC"], na.rm = T), 2), collapse = "-")))
points(x = min4$FN_crit, y = min4$alpha.test, pch = 0, cex = 4, col = "black")
}
}
tmp <- data.frame(t(min1[1:2]), t(min2[1:2]), t(min3[1:2]), t(min4[1:2]))
names(tmp) <- c("sensSpec", "OR", "FM", "MCC")
if(simplify){
return(tmp)
} else {
return(list(z, tmp))
}
} else if(length(FN_crit) > 1 & length(alpha.test) == 1) {
if(any(FN_crit > 1) | any(FN_crit <= 0)){
stop("FN_crit values must be between 0 and 1")
}
tmp <- suppressWarnings(expr = {
sapply(FN_crit, function(x) {
FPM(data = data, paramList = paramList, FN_crit = x, alpha.test = alpha.test, ...)[["FPM"]]
})
})
pFN <- unlist(tmp["pFN",])
pFP <- unlist(tmp["pFP",])
sens <- unlist(tmp["sens",])
spec <- unlist(tmp["spec",])
OR <- unlist(tmp["OR",])
FM <- unlist(tmp["FM",])
MCC <- unlist(tmp["MCC",])
sensSpec <- numeric()
for(i in 1:length(sens)){
sensSpec[i] <- min(c(sens[i], spec[i]))/max(c(sens[i], spec[i]))
}
pick1 <- FN_crit[which.min(1 - sensSpec)]
pick2 <- FN_crit[which.min(1 - OR)]
pick3 <- FN_crit[which.min(1 - FM)]
pick4 <- FN_crit[which.min(1 - MCC)]
z <- as.data.frame(matrix(ncol = length(paramList) + 12,
nrow = 1, data = rep(NA, length(paramList) + 12)))
names(z) <- c(paramList, "FN_crit", "TP", "FN", "TN", "FP", "pFN", "pFP",
"sens", "spec", "OR", "FM", "MCC")
out <- suppressWarnings(expr = {
do.call(dplyr::bind_rows,
list(z,
data.frame(FPM(data = data, paramList = paramList, FN_crit = pick1, ...)[["FPM"]]),
data.frame(FPM(data = data, paramList = paramList, FN_crit = pick2, ...)[["FPM"]]),
data.frame(FPM(data = data, paramList = paramList, FN_crit = pick3, ...)[["FPM"]]),
data.frame(FPM(data = data, paramList = paramList, FN_crit = pick4, ...)[["FPM"]])
)
)[-1,]
})
row.names(out) <- c("sensSpec", "OR", "FM", "MCC")
if(plot){
plot(x = FN_crit, y = OR, ylim = c(0,1),
xlab = "FN Limit", ylab = "Error rate / Metric Value", type = "n")
lines(x = FN_crit, y = pFN, col = "darkred", type = "l", lwd = 2)
lines(x = FN_crit, y = pFP, col = "darkblue", type = "l", lwd = 2)
lines(x = FN_crit, y = FN_crit, col = "darkred", lty = 2)
polygon(x = c(FN_crit, rev(FN_crit)),
y = c(FN_crit, rev(pFN)),
col = rgb(1,0,0,0.1), border = F)
lines(x = FN_crit, y = sensSpec, col = "darkorchid")
lines(x = FN_crit, y = OR, col = "darkorange")
lines(x = FN_crit, y = FM, col = "gold3")
lines(x = FN_crit, y = MCC, col = "forestgreen")
legend("top", inset = -0.1, xpd = TRUE, cex = 0.8, ncol = 2, bg = "white",
lty = c(1, 1, 2, 1, 1, 1, 1), lwd = c(2, 2, 1, 1, 1, 1, 1),
col = c("darkred", "darkblue", "darkred",
"darkorchid", "darkorange", "gold3", "forestgreen"),
legend = c("FN rate", "FP rate", "FN threshold", "sensSpec", "OR", "FM", "MCC"))
}
if(simplify){
return(
c(sensSpec = out["sensSpec", "FN_crit"],
OR = out["OR", "FN_crit"],
FM = out["FM", "FN_crit"],
MCC = out["MCC", "FN_crit"])
)
} else{
return(list(optim = out,
optim_FN = c(sensSpec = out["sensSpec", "FN_crit"],
OR = out["OR", "FN_crit"],
FM = out["FM", "FN_crit"],
MCC = out["MCC", "FN_crit"])
)
)
}
} else if(length(FN_crit) == 1 & length(alpha.test) > 1){
if(any(alpha.test > 1) | any(alpha.test <= 0)){
stop("alpha.test values must be between 0 and 1")
}
z <- as.data.frame(matrix(ncol = length(paramList) + 13,
nrow = 1, data = rep(NA, length(paramList) + 13)))
names(z) <- c(paramList, "FN_crit", "TP", "FN", "TN", "FP", "pFN", "pFP",
"sens", "spec", "OR", "FM", "MCC", "alpha.test")
tmp <- suppressWarnings(expr = {
dplyr::bind_rows(z,
sapply(alpha.test, function(x) {
data.frame(FPM(data = data, paramList = paramList, FN_crit = FN_crit, alpha.test = x, ...)[["FPM"]], alpha.test = x)
}))[-1,]
})
pFN <- (tmp[,"pFN"])
pFP <- (tmp[,"pFP"])
sens <- (tmp[,"sens"])
spec <- (tmp[,"spec"])
OR <- (tmp[,"OR"])
FM <- (tmp[,"FM"])
MCC <- (tmp[,"MCC"])
sensSpec <- numeric()
for(i in 1:length(sens)){
sensSpec[i] <- min(c(sens[i], spec[i]))/max(c(sens[i], spec[i]))
}
pick1 <- alpha.test[which.min(1 - sensSpec)]
pick2 <- alpha.test[which.min(1 - OR)]
pick3 <- alpha.test[which.min(1 - FM)]
pick4 <- alpha.test[which.min(1 - MCC)]
out <- suppressWarnings(expr = {
do.call(dplyr::bind_rows,
list(z,
data.frame(FPM(data = data, paramList = paramList, alpha.test = pick1, ...)[["FPM"]], alpha.test = pick1),
data.frame(FPM(data = data, paramList = paramList, alpha.test = pick2, ...)[["FPM"]], alpha.test = pick2),
data.frame(FPM(data = data, paramList = paramList, alpha.test = pick3, ...)[["FPM"]], alpha.test = pick3),
data.frame(FPM(data = data, paramList = paramList, alpha.test = pick4, ...)[["FPM"]], alpha.test = pick4)
)
)[-1,]
})## force data into standard template then remove template row
row.names(out) <- c("sensSpec", "OR", "FM", "MCC")
if(plot){
plot(x = alpha.test, y = OR, ylim = c(0,1),
xlab = "Alpha", ylab = "Error rate / Metric Value", type = "n")
lines(x = alpha.test, y = pFN, col = "darkred", type = "l", lwd = 2)
lines(x = alpha.test, y = pFP, col = "darkblue", type = "l", lwd = 2)
lines(x = alpha.test, y = sensSpec, col = "darkorchid")
lines(x = alpha.test, y = OR, col = "darkorange")
lines(x = alpha.test, y = FM, col = "gold3")
lines(x = alpha.test, y = MCC, col = "forestgreen")
legend("top", inset = -0.1, xpd = TRUE, cex = 0.8, ncol = 2, bg = "white",
lty = c(1, 1, 1, 1, 1, 1), lwd = c(2, 2, 1, 1, 1, 1),
col = c("darkred", "darkblue",
"darkorchid", "darkorange", "gold3", "forestgreen"),
legend = c("FN rate", "FP rate", "sensSpec", "OR", "FM", "MCC"))
}
if(simplify){
return(
c(sensSpec = out["sensSpec", "alpha.test"],
OR = out["OR", "alpha.test"],
FM = out["FM", "alpha.test"],
MCC = out["MCC", "alpha.test"])
)
} else{
return(
list(optim = out,
optim_alpha = c(sensSpec = out["sensSpec", "alpha.test"],
OR = out["OR", "alpha.test"],
FM = out["FM", "alpha.test"],
MCC = out["MCC", "alpha.test"]))
)
}
}
}## end code
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.