R/pedis-roc.R

#' Create ROC for Test and Train Set
#'
#' @param data Dataframe with two columns
#' `obs`: factor, response label
#' `yes`: numeric, predicted probability of reference class (yes)
#'
#' @return A pROC::roc object
#' @export
#'
#' @importFrom pROC roc
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(data = pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' out_sample_prediction <- train_predict_all(folds = folds, X_y = X_y)
#' in_sample_prediction <- test_predict(fit)
#' create_roc(out_sample_prediction)
#' create_roc(in_sample_prediction)
create_roc <- function(data) {
  pROC::roc(response = data$obs, predictor = data$yes)
}

#' Create Test/Train AUC Plot
#'
#' @param trainROC A roc obj (on in-sample data)
#' @param testROC A roc obj (on out-sample data)
#'
#' @return A plot
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(data = pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' out_sample_prediction <- train_predict_all(folds = folds, X_y = X_y)
#' in_sample_prediction <- test_predict(fit)
#' out_sample_auc <- create_roc(out_sample_prediction)
#' in_sample_auc <- create_roc(in_sample_prediction)
#' plot_train_test(trainROC = in_sample_auc, testROC = out_sample_auc)
#'
plot_train_test <- function(trainROC, testROC) {
  pROC::smooth(trainROC) %>% plot(col = "blue", legacy.axis = FALSE, las = 1, ylim = c(0, 1))
  pROC::smooth(testROC) %>% plot(add = TRUE, col = "red", lty = 2)
  legend(x = .4, y =  .4, legend = c("Training ROC", "Test ROC"), col = c("blue", "red"), lty = 1:2, lwd = 3)
  abline(h = 1, lty = 3, col = "grey")
  abline(h = 0, lty = 3, col = "grey")
}

#' Get the AUC value from the training fit object
#' LogReg on training data
#' Return numeric AUC result
#'
#' @param folds An integer vectors or a list containing integer vectors
#' @param X_y A list containing the predictor matrix `X` and the outcome vector `y`
#'
#' @export
#'
#' @return The AUC value
#'
train_auc <- function(folds, X_y) {

  X <- X_y$X
  y <- X_y$y

  # Check if list, if it is map over elements
  if(is.list(folds)) {
    # Check if list contains integer vectors (representing fold idx)
    if(!purrr::is_integer(folds[[1]])) {
      stop("Fold-List must contain integers")
    }
    df <- purrr::map(.x = folds, .f = train_auc_helper, X = X, y = y)
    df <- tibble::enframe(unlist(df))
    df <- df %>% dplyr::rename("fold" = name, "auc" = value)

    purrr::map(df, c)
    dplyr::as_tibble(df)
    tidyr::gather(df)
    return(df)
  } else if (is.numeric(folds)){
    train_auc_helper(idx = folds, X = X, y = y)
  } else {
    stop("Check Folds, are they integers?")
  }

  fit <- glm(y[idx] ~ as.matrix(X[idx, ]), family = "binomial")
  y_pred <- predict(fit, type = "response")
  roc_obj <- pROC::roc(y[idx], y_pred)
  auc_result <- pROC::auc(roc_obj)
  return(auc_result)
}

#' Extract AUC values from `roc` Object
#' LogReg on training data
#' Return numeric AUC result
#'
#' @param folds An integer vectors or a list containing integer vectors
#' @param X_y A list containing the predictor matrix `X` and the outcome vector `y`
#'
#' @return The AUC value
#'
train_auc_helper <- function(idx, X, y) {
  fit <- glm(y[idx] ~ as.matrix(X[idx, ]), family = "binomial")
  y_pred <- predict(fit, type = "response")
  roc_obj <- pROC::roc(y[idx], y_pred, levels = c("no", "yes"), direction = "<")
  auc_result <- pROC::auc(roc_obj)
  auc_result <- as.numeric(auc_result)
  return(auc_result)
}

#' Extract AUC from multiple (train) folds
#'
#' @param folds A list with integer vectors
#' @param X_y A list with predictor matrix and response vector
#'
#' @return A numeric vector containing AUC values
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' map_train_auc(folds = folds, X_y = X_y)
#'
train_predict <- function(folds, X_y) {

  X <- X_y$X
  y <- X_y$y

  # Check if list, if it is map over elements
  if(is.list(folds)) {
    # Check if list contains integer vectors (representing fold idx)
    if(!purrr::is_integer(folds[[1]])) {
      stop("Fold-List must contain integers")
    }
    df <- purrr::map_df(.x = folds, .f = train_auc_helper, X = X, y = y, .id = "fold")
    df <- df %>% dplyr::rename("yes" = predictor, "obs" = response)
    return(df)
  } else if (is.numeric(folds)){
    train_auc_helper(idx = folds, X = X, y = y)
  } else {
    stop("Check Folds, are they integers?")
  }
}

#' Calculate the Test-AUC
#'
#' @param fit A caret logistic regression model
#'
#' @return The AUC value as numeric vector with length 1
#'
#' @export
#'
#' @examples
#' data(pedis)
#' X_y <- prepare_X_y(pedis, outcome = "minor_amputation", vars = c("p", "d"))
#' folds <- create_folds(y = X_y$y)
#' fit <- train_pedis(X = X_y$X, y = X_y$y, folds = folds)
#' test_auc(fit)
test_auc <- function(fit) {
  purrr::pluck(fit, "resample") %>% pull("ROC")
}

#' Create a ROC
#'
#' @param data A dataframe with columns `yes` and `obs`
#'
#' @return
#' @export
#'
#' @examples
roc <- function(data) {
  predictor <- data$yes
  response <- data$obs
  pROC::roc(response = response, predictor = predictor, direction = "<", levels = c("no", "yes"))
}
jnshsrs/PEDISdata documentation built on June 24, 2019, 12:07 p.m.