Nothing
#' Validate Model Predictions
#'
#' @description This function is designed to perform model validation when the corresponding outcome variable is available. It facilitates the comparison of model predictions
#' with the provided outcome variable, which can be continuous, binary, or time-to-event.
#'
#' @details This function is invoked by multiple prediction functions within this package when an outcome variable is available for a new dataset.
#' However, users can also directly call this function if needed.
#'
#' @param predicted A vector of model prediction values, which can be generated by prediction functions in this package such as LASSO2_predict and XGBtraining_predict.
#' For continuous outcomes, this vector represents model scores; for binary outcomes, it represents the probability of the positive class;
#' for time-to-event outcomes, it contains risk scores, which will later be transformed into estimated survival probabilities corresponding to times in the new data.
#'
#' @param outcomeType Outcome variable type. There are three choices: "binary" (default), "continuous", and "time-to-event".
#' @param trueY A vector of the outcome variable when it is continuous or binary.
#' @param time A vector of time for time-to-event outcome.
#' @param trueEvent A vector of the event for time-to-event outcome.
#' @param baseHz A table for accumulating baseline hazard for multiple time points, usually generated based on a training data set.
#' @param u A single numeric follow-up time for survival outcomes.
#' @param outfile A string for the output file, including the path if necessary but without the file type extension.
#' @import rms
#' @import survival
#' @import Hmisc
#' @importFrom utils write.csv
#' @importFrom grDevices dev.off
#' @importFrom grDevices pdf
#'
#' @return A vector of model prediction values from the input
#' @references
#' Harrell Jr F (2023). rms: Regression Modeling Strategies_. R package version 6.7-1, <https://CRAN.R-project.org/package=rms>
#'
#' Harrell Jr F (2023). Hmisc: Harrell Miscellaneous_. R package version 5.1-1, <https://CRAN.R-project.org/package=Hmisc>
#'
#' @examples
#' # Load in data sets:
#' data("datlist", package = "csmpv")
#' tdat = datlist$training
#' vdat = datlist$validation
#'
#' # The function saves files locally. You can define your own temporary directory.
#' # If not, tempdir() can be used to get the system's temporary directory.
#' temp_dir = tempdir()
#' # As an example, let's define Xvars, which will be used later:
#' Xvars = c("highIPI", "B.Symptoms", "MYC.IHC", "BCL2.IHC", "CD10.IHC", "BCL6.IHC")
#'
#' # The function can work with multiple models and multiple outcome types.
#' # Here, we use XGBoost model with binary outcome as an example:
#' bxfit = XGBtraining(data = tdat, biomks = Xvars, Y = "DZsig",
#' outfile = paste0(temp_dir, "/binary_XGBoost"))
#' testdat = vdat[,bxfit$XGBoost_model$feature_names]
#' test = xgboost::xgb.DMatrix(data.matrix(testdat))
#' scores = stats::predict(bxfit$XGBoost_model, test)
#' names(scores) = rownames(vdat)
#' Y = bxfit$Y
#' outs = validation(predicted = scores, outcomeType = "binary", trueY = vdat[,Y],
#' outfile = paste0(temp_dir, "/binary_XGBoost_validate"))
#' # You might save the files to the directory you want.
#'
#' # To delete the "temp_dir", use the following:
#' unlink(temp_dir)
#' @export
validation = function(predicted =NULL, outcomeType = c("binary","continuous","time-to-event"), trueY = NULL, time = NULL, trueEvent = NULL,
baseHz = NULL,u = 2, outfile = "nameWithPath"){
if(outcomeType == "continuous"){
newplot = validate_continuous(yhat = predicted, yobs = trueY)
pdf(paste0(outfile, "_newData_validation.pdf"))
print(newplot)
dev.off()
print(newplot)
}else if(outcomeType == "binary"){
minV = min(c(predicted, trueY))
maxV = max(c(predicted, trueY))
# newplot = CalibrationCurves::val.prob.ci.2(predicted, trueY, CL.smooth = TRUE, logistic.cal = TRUE, lty.log = 9,
# col.log = "red", lwd.log = 1.5, col.ideal = colors()[10], lwd.ideal = 0.5,
# xlim = c(minV, maxV), ylim = c(minV, maxV))
pdf(paste0(outfile, "_newData_validation.pdf"))
rms::val.prob(predicted, trueY)
dev.off()
rms::val.prob(predicted, trueY)
}else if(outcomeType == "time-to-event"){
## for this part, I do need baseHz to get survival probability
## notice that predicted here is risk score, or exp(linear model score)
## in order to get surv: exp(-bestMatchedTimeCumHaz * exp(linear model score))
## the trick is how to get matched CumHaz
# Calculate absolute differences
diffs = abs(baseHz[, "time"] - u)
# Find rows where X - u > 0
filtered_rows = which(baseHz[, "time"] - u > 0)
# Find the index with minimum difference
min_diff_index = filtered_rows[which.min(diffs[filtered_rows])]
cumhaz = baseHz[min_diff_index,"hazard"]
## in order to get surv: exp(-bestMatchedTimeCumHaz * exp(linear model score)), should do more
ss = exp(-cumhaz * predicted) ## predicted is risk score, ie: exp(linear model score)
survs = survival::Surv(time = time, event = trueEvent)
cindex = Hmisc::rcorr.cens(x=ss, S = survs)
write.csv(cindex, paste0(outfile, "_newData_Cindex.csv"))
newsurv = rms::val.surv(S = survs, est.surv = ss)
pdf(paste0(outfile, "_newData_validation.pdf"))
plot(newsurv, xlab = "Predicted Probability",
ylab = "Actual Probability",
main = paste0("C-index: ", format(cindex[1], digits = 4)))
dev.off()
plot(newsurv, xlab = "Predicted Probability",
ylab = "Actual Probability",
main = paste0("C-index: ", format(cindex[1], digits = 4)))
}
return(predicted)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.