#' Find best-fitted model
#'
#' This utility function used by the 'rwToT' package returns the best-fitted
#' parametric distribution model for the given Survival/Treatment data. This
#' function uses `flexsurv` package for parametric modeling for time-to-event
#' data. Parametric model using built-in distribution such as Weibull(scale),
#' Gamma(rate), Exponential(rate), Log-logistic(scale), Log-normal(meanlog),
#' and Gompertz(rate) gets evaluated and best model is selected based on lowest
#' AIC value. The function also returns the restricted mean of the KM curve up
#' to chosen time \code{t}.
#'
#' @param df_cohort A dataframe. Minimum required columns:
#' \itemize{
#' \item{`STATUS` : Censor status (0 = censored, 1 = event)}
#' \item{`DD` : The follow up time }}
#'
#' @param t A numeric value: the time point of interest
#' @return Best-fitted model 'output_model' and restricted mean of the KM curve
#' up to chosen time \code{t}
#' @examples
#' # Find best-fitted model for AML dataset.
#' best_model(rwToT_aml,20)
#' @export
#'
#'
best_model <- function(df_cohort, t) {
#creates survival time object and adds to dataframe
df_cohort <- df_cohort %>%
mutate(SurvObj = Surv(DD, STATUS))
#flexible parametric models fitted with different distributions
fit_model <- function(model_dist){
model <- tryCatch(flexsurvreg(SurvObj ~ 1, data = df_cohort, dist = model_dist),
error = function(e) NA)
invisible(model)
}
model <- c("Exp", "Weibull", "LogNorm", "LogLogist", "Gompertz")
model_dist <- c("exponential", "weibull", "lnorm", "llogis", "gompertz")
list_of_model <- setNames(map( {model_dist}, .f = fit_model), model)
model_aic <- purrr::map_dbl(paste0("list_of_model$", {model},"$AIC"), function(i) tryCatch(eval(parse(text=i)),
error = function(e) NA))
list2env(list_of_model,globalenv())
model_sum <- as.data.frame(cbind(model, model_aic), stringsAsFactors = FALSE)
names(model_sum) <- c("type", "AIC")
#best model with min AIC value
best_model <- model_sum %>%
dplyr::filter(!is.na(as.numeric(AIC))) %>%
dplyr::filter(as.numeric(AIC)==min(as.numeric(AIC), na.rm=TRUE)) %>%
dplyr::select(c(type))
#best model, restricted mean for output table
if (best_model == "Exp") {
output_model <- Exp
mean_predicted <- round(rmst_exp(t, rate = Exp$res[1, 1], start = 0), 1)
mean_u95 <- round(rmst_exp(t, rate = Exp$res[1, 2], start = 0), 1)
mean_l95 <- round(rmst_exp(t, rate = Exp$res[1, 3], start = 0), 1)
mean_sum <- paste(mean_predicted, " (", mean_l95, " - ", mean_u95, ")", sep = "")
best_model_rmst_mean <- list(mean_sum, "Exp")
} else if (best_model == "Weibull") {
output_model <- Weibull
mean_predicted <- round(rmst_weibull(t, shape = Weibull$res[1, 1], scale = Weibull$res[2, 1], start = 0), 1)
mean_l95 <- round(rmst_weibull(t, shape = Weibull$res[1, 2], scale = Weibull$res[2, 2], start = 0), 1)
mean_u95 <- round(rmst_weibull(t, shape = Weibull$res[1, 3], scale = Weibull$res[2, 3], start = 0), 1)
mean_sum <- paste(mean_predicted, " (", mean_l95, " - ", mean_u95, ")", sep = "")
best_model_rmst_mean <- list(mean_sum, "Weibull")
} else if (best_model == "LogNorm") {
output_model <- LogNorm
mean_predicted <- round(rmst_lnorm(t, meanlog = LogNorm$res[1, 1], sdlog = LogNorm$res[2, 1], start = 0), 1)
mean_l95 <- round(rmst_lnorm(t, meanlog = LogNorm$res[1, 2], sdlog = LogNorm$res[2, 2], start = 0), 1)
mean_u95 <- round(rmst_lnorm(t, meanlog = LogNorm$res[1, 3], sdlog = LogNorm$res[2, 3], start = 0), 1)
mean_sum <- paste(mean_predicted, " (", mean_l95, " - ", mean_u95, ")", sep = "")
best_model_rmst_mean <- list(mean_sum, "LogNorm")
} else if (best_model == "LogLogist") {
output_model <- LogLogist
mean_predicted <- round(rmst_llogis(t, shape = LogLogist$res[1, 1], scale = LogLogist$res[2, 1], start = 0), 1)
mean_l95 <- round(rmst_llogis(t, shape = LogLogist$res[1, 2], scale = LogLogist$res[2, 2], start = 0), 1)
mean_u95 <- round(rmst_llogis(t, shape = LogLogist$res[1, 3], scale = LogLogist$res[2, 3], start = 0), 1)
mean_sum <- paste(mean_predicted, " (", mean_l95, " - ", mean_u95, ")", sep = "")
best_model_rmst_mean <- list(mean_sum, "LogLogist")
} else if (best_model == "Gompertz") {
output_model <- Gompertz
mean_predicted <- round(rmst_gompertz(t, shape = Gompertz$res[1, 1], rate = Gompertz$res[2, 1], start = 0), 1)
mean_u95 <- round(rmst_gompertz(t, shape = Gompertz$res[1, 2], rate = Gompertz$res[2, 2], start = 0), 1)
mean_l95 <- round(rmst_gompertz(t, shape = Gompertz$res[1, 3], rate = Gompertz$res[2, 3], start = 0), 1)
mean_sum <- paste(mean_predicted, " (", mean_l95, " - ", mean_u95, ")", sep = "")
best_model_rmst_mean <- list(mean_sum, "Gompertz")
}
return(list("output_model" = output_model,
"best_model_rmst_mean" = best_model_rmst_mean))
}
# Count Proportion
#
# Utility function that returns the count and proportion of Censor data.
count_proportion <- function(x) {
stat_count <- table(x$STATUS)
if(length(stat_count)<2){
if(!is.na(stat_count['1'])){
stat_count['0'] = 0
}else {
stat_count['1'] = 0
}
}
stat_prop <- round((stat_count / sum(stat_count)) * 100, 1)
return(paste(stat_count, " (", stat_prop, "%)", sep = ""))
}
# Dataframe Validation
#
# Validates the input dataframe to check required columns to run the model
validate_df_columns <- function(df_cohort) {
columns <- c("STATUS", "DD", "START_DATE", "LAST_ACTIVITY_DATE")
if (!is.data.frame(df_cohort)) {
stop(paste("Argument", deparse(substitute(df_cohort)), "must be a data.frame."))
}
if (!all(i <- rlang::has_name(df_cohort, columns)))
stop(sprintf(
"%s doesn't contain: %s",
deparse(substitute(df_cohort)),
paste(columns[!i], collapse = ", ")))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.