#' Create Folds for (repeated) Cross Validation
#'
#' Create folds used in the pedis train function
#'
#' @param y The outcome vector (as produced by prepare_X_y)
#' @param k Optional integer, number of folds, default = 5
#' @param times Optional integer, number of repeats, default = 5
#' @param seed Optional
#'
#' @return List containing the folds
#' @export
#'
#' @importFrom caret createMultiFolds
#'
#' @examples
#' predis <- data(pedis)
#' X_y <- prepare_X_y(pedis, outcome = "minor_amputation", vars = c("p", "e"))
#' folds <- create_folds(X_y$y)
#'
create_folds <- function(y, k = 5, times = 5, seed = 93864) {
# Create Folds to reproduce fitting process
## Repeated cross validation
folds <- caret::createMultiFolds(y, k = k, times = times)
return(folds)
}
#' Train CV Log. Regression Model
#'
#' @param X Matrix containing the predictor variables
#' @param y Vector containing the outcome variables
#'
#' @return A train object of class pedis_train
#' @export
#'
#' @importFrom caret trainControl
#' @importFrom caret train
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(pedis, outcome = "minor_amputation", var = c("p", "d"))
#' folds <- create_folds(X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' class(fit)
train_pedis <- function(X, y, folds) {
# Edit training options
## implicitly conducting repeated cross-validation
tr_control <- caret::trainControl(classProbs = TRUE,
savePredictions = "all",
returnData = TRUE,
returnResamp = "all",
index = folds,
summaryFunction = caret::twoClassSummary)
# Fit the binomial model
fit <- caret::train(x = X,
y = y,
method = "glm",
family = "binomial",
metric = "ROC",
preProcess = c("scale", "center"),
trControl = tr_control)
return(fit)
}
#' Predict Test Data
#'
#' Returns the predicted probability value and the actual outcome
#'
#' @param fit A caret model
#'
#' @return A `tibble` with two variables
#' 1. The probability of the event and
#' 2. the actual, observed outcome
#'
#' @import dplyr
#' @import tibble
#'
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(pedis, outcome = "minor_amputation", var = c("p", "d"))
#' fit <- train_pedis(X = X_y$X, y = X_y$y)
#' test_predict(fit)
#'
test_predict <- function(fit) {
data <- fit %>%
purrr::pluck("pred") %>%
dplyr::select(yes, obs) %>%
tibble::as_tibble()
return(data)
}
#' Get a dataframe with predictor (pred) and response variable (actual observations)
#'
#'
#' @param idx Folds
#' @param X A matrix with predictors
#' @param y A vecotr of outcomes
#'
#' @return A dataframe
#'
train_predict_helper <- function(idx, X, y) {
fit <- glm(y[idx] ~ as.matrix(X[idx, ]), family = "binomial")
y_pred <- predict(fit, type = "response")
data <- tibble(predictor = y_pred, response = y[idx])
data
}
#' Train predict
#'
#' Function takes a list with integer vectors representing fold indicies
#' or a single integer vector representing these indicies and predict
#' outcomes based on the training results
#'
#' @param folds An integer vectors or a list containing integer vectors
#' @param X_y A list containing the predictor matrix `X` and the outcome vector `y`
#'
#' @import purrr
#' @import dplyr
#'
#' @return A dataframe containing the actual outcome and the predicted value
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(data = pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' fold_idx <- 1:10
#' train_predict(folds = 1:10, X_y = X_y)
#' folds <- list(1:10, 11:20)
#' train_predict(folds = folds, X_y = X_y)
train_predict <- function(folds, X_y) {
X <- X_y$X
y <- X_y$y
if(is.list(folds)) {
if(!purrr::is_integer(folds[[1]])) {
stop("Fold-List must contain integers")
}
df <- purrr::map_df(.x = folds, .f = train_predict_helper, X = X, y = y, .id = "fold")
df <- df %>% dplyr::rename("yes" = predictor, "obs" = response)
return(df)
} else if (is.numeric(folds)){
train_predict_helper(idx = folds, X = X, y = y)
} else {
stop("Check Folds, I assume they do not contain integers")
}
}
# #' Wrapper to predict over all folds (train)
# #'
# #' @param folds A list with k * times integer vectors containing the fold index
# #' @param X_y A list with predictor matrix and output vector
# #'
# #' @return A list
# #' @export
# #'
# #' @examples
# #' data(pedis)
# #' X_y <- prepare_X_y(data = pedis, outcome = "minor_amputation", vars = c("p", "d"))
# #' folds <- create_folds(y = X_y$y)
# #' map_train_predict(folds = folds, X_y = X_y)
# map_train_predict <- function(folds, X_y, fun = train_predict) {
# X <- X_y$X
# y <- X_y$y
#
# }
# testPredict <- test_predict(fit)
# testROC <- roc(response = testPredict$obs, predictor = testPredict$yes)
# trainROC <- roc(response = trainPredict$response, trainPredict$predictor)
#
# # Create Plot
# plot_train_test <- function(trainROC, testROC) {
# smooth(trainROC) %>% plot(col = "blue", legacy.axis = FALSE, las = 1, ylim = c(0, 1))
# smooth(testROC) %>% plot(add = TRUE, col = "red", lty = 2)
# legend(x = .4, y = .4, legend = c("Training ROC", "Test ROC"), col = c("blue", "red"), lty = 1:2, lwd = 3)
# abline(h = 1, lty = 3, col = "grey")
# abline(h = 0, lty = 3, col = "grey")
# }
#
#
# # # LogReg on training data
# # # Return numeric AUC result
# trainAUC <- function(idx, X, y) {
# fit <- glm(y[idx] ~ as.matrix(X[idx, ]), family = "binomial")
# y_pred <- predict(fit, type = "response")
# roc_obj <- pROC::roc(y[idx], y_pred)
# auc_result <- pROC::auc(roc_obj)
# return(auc_result)
# }
#
# # # Training AUC metric
# # ## A vector with AUC for every Training fold
# train_auc <- map_dbl(folds, trainAUC, X, y)
# #
# # # Extracting test metrics from train object
# test_auc <- pluck(fit, "resample", "ROC")
# names(test_auc) <- pluck(fit, "resample", "Resample")
#
# # Summary of train and test metrics
# test_train_auc <- tibble(train_auc, test_auc)
#
# differences <- test_train_auc %>% gather() %>% diff_table()
#
# if (bayes) {
# # Compare Test and Training AUC-Metric using Bayesian Methods
# b_t_test <- BayesianFirstAid::bayes.t.test(test_auc, train_auc, n.iter = 3000)
# } else {
# b_t_test <- NULL
# }
# # Logistic Regression
# LReg_full_fit <- glm(y ~ as.matrix(X), family = "binomial")
#
# # Create Summary Table
# train_test_diff <- test_train_auc %>% create_summary_table()
#
# list(full_fit = LReg_full_fit,
# bayes_ttest = b_t_test,
# test_train_auc = test_train_auc,
# plot_rocs = function() plot_train_test(trainROC, testROC),
# differences = differences,
# train_test_diff = train_test_diff
# )
#
# }
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.