#' Create ROC for Test and Train Set
#'
#' @param data Dataframe with two columns
#' `obs`: factor, response label
#' `yes`: numeric, predicted probability of reference class (yes)
#'
#' @return A pROC::roc object
#' @export
#'
#' @importFrom pROC roc
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(data = pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' out_sample_prediction <- train_predict_all(folds = folds, X_y = X_y)
#' in_sample_prediction <- test_predict(fit)
#' create_roc(out_sample_prediction)
#' create_roc(in_sample_prediction)
create_roc <- function(data) {
pROC::roc(response = data$obs, predictor = data$yes)
}
#' Create Test/Train AUC Plot
#'
#' @param trainROC A roc obj (on in-sample data)
#' @param testROC A roc obj (on out-sample data)
#'
#' @return A plot
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(data = pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' out_sample_prediction <- train_predict_all(folds = folds, X_y = X_y)
#' in_sample_prediction <- test_predict(fit)
#' out_sample_auc <- create_roc(out_sample_prediction)
#' in_sample_auc <- create_roc(in_sample_prediction)
#' plot_train_test(trainROC = in_sample_auc, testROC = out_sample_auc)
#'
plot_train_test <- function(trainROC, testROC) {
pROC::smooth(trainROC) %>% plot(col = "blue", legacy.axis = FALSE, las = 1, ylim = c(0, 1))
pROC::smooth(testROC) %>% plot(add = TRUE, col = "red", lty = 2)
legend(x = .4, y = .4, legend = c("Training ROC", "Test ROC"), col = c("blue", "red"), lty = 1:2, lwd = 3)
abline(h = 1, lty = 3, col = "grey")
abline(h = 0, lty = 3, col = "grey")
}
#' Get the AUC value from the training fit object
#' LogReg on training data
#' Return numeric AUC result
#'
#' @param folds An integer vectors or a list containing integer vectors
#' @param X_y A list containing the predictor matrix `X` and the outcome vector `y`
#'
#' @export
#'
#' @return The AUC value
#'
train_auc <- function(folds, X_y) {
X <- X_y$X
y <- X_y$y
# Check if list, if it is map over elements
if(is.list(folds)) {
# Check if list contains integer vectors (representing fold idx)
if(!purrr::is_integer(folds[[1]])) {
stop("Fold-List must contain integers")
}
df <- purrr::map(.x = folds, .f = train_auc_helper, X = X, y = y)
df <- tibble::enframe(unlist(df))
df <- df %>% dplyr::rename("fold" = name, "auc" = value)
purrr::map(df, c)
dplyr::as_tibble(df)
tidyr::gather(df)
return(df)
} else if (is.numeric(folds)){
train_auc_helper(idx = folds, X = X, y = y)
} else {
stop("Check Folds, are they integers?")
}
fit <- glm(y[idx] ~ as.matrix(X[idx, ]), family = "binomial")
y_pred <- predict(fit, type = "response")
roc_obj <- pROC::roc(y[idx], y_pred)
auc_result <- pROC::auc(roc_obj)
return(auc_result)
}
#' Extract AUC values from `roc` Object
#' LogReg on training data
#' Return numeric AUC result
#'
#' @param folds An integer vectors or a list containing integer vectors
#' @param X_y A list containing the predictor matrix `X` and the outcome vector `y`
#'
#' @return The AUC value
#'
train_auc_helper <- function(idx, X, y) {
fit <- glm(y[idx] ~ as.matrix(X[idx, ]), family = "binomial")
y_pred <- predict(fit, type = "response")
roc_obj <- pROC::roc(y[idx], y_pred, levels = c("no", "yes"), direction = "<")
auc_result <- pROC::auc(roc_obj)
auc_result <- as.numeric(auc_result)
return(auc_result)
}
#' Extract AUC from multiple (train) folds
#'
#' @param folds A list with integer vectors
#' @param X_y A list with predictor matrix and response vector
#'
#' @return A numeric vector containing AUC values
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' map_train_auc(folds = folds, X_y = X_y)
#'
train_predict <- function(folds, X_y) {
X <- X_y$X
y <- X_y$y
# Check if list, if it is map over elements
if(is.list(folds)) {
# Check if list contains integer vectors (representing fold idx)
if(!purrr::is_integer(folds[[1]])) {
stop("Fold-List must contain integers")
}
df <- purrr::map_df(.x = folds, .f = train_auc_helper, X = X, y = y, .id = "fold")
df <- df %>% dplyr::rename("yes" = predictor, "obs" = response)
return(df)
} else if (is.numeric(folds)){
train_auc_helper(idx = folds, X = X, y = y)
} else {
stop("Check Folds, are they integers?")
}
}
#' Calculate the Test-AUC
#'
#' @param fit A caret logistic regression model
#'
#' @return The AUC value as numeric vector with length 1
#'
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' test_auc(fit)
test_auc <- function(fit) {
purrr::pluck(fit, "resample") %>% pull("ROC")
}
#' Create a ROC
#'
#' @param data A dataframe with columns `yes` and `obs`
#'
#' @return
#' @export
#'
#' @examples
roc <- function(data) {
predictor <- data$yes
response <- data$obs
pROC::roc(response = response, predictor = predictor, direction = "<", levels = c("no", "yes"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.