R/code.R

Defines functions add_model add_prob suggest_transformation suggest_accuracy suggest_auc suggest_category suggest_variable suggest_probPop suggest_probCut suggest_gain

Documented in add_model add_prob suggest_accuracy suggest_auc suggest_category suggest_gain suggest_probCut suggest_probPop suggest_transformation suggest_variable

#' Add predictive models to Summary list.
#'
#' \code{add_model} Conduct model training and add each model fit to a summary list.
#' The function ensures that the model and data supports classification.
#' If model supports class probability, then the best model is choosen based on
#' AUC. If model doesnt support class probabilities, the best model is
#' choosen based on accuracy. We use 10-fold cross validation by default.
#'
#' @param addTo Summary list that will contain all model fit results.
#' @param x A dataframe of input variables.
#' @param y A vector or a dataframe of output variable.
#' @param model A vector of model names to train.
#' @param tuneLength the maximum number of tuning parameter combinations
#' that will be generated by the random search
#' @param modelTag A charactor value of tag that to be added to model name.
#' @param tf A single charactor value for transformation options. tf1, tf2, and tf3.
#' For details, see the suggest_transformation function.
#' @param sampling A single character value to pass caret::trainControl.
#' Values are "none", "down", "up", "smote", or "rose". The latter two values require
#' the DMwR and ROSE packages, respectively.
#' @import knitr
#' @import mlbench
#' @import e1071
#' @importFrom magrittr %>%
#' @importFrom purrr is_list
#' @importFrom purrr is_vector
#' @importFrom purrr is_character
#' @importFrom purrr as_vector
#' @importFrom purrr map_df
#' @importFrom purrr map_lgl
#' @importFrom purrr map
#' @importFrom stats predict
#' @importFrom stats reorder
#' @importFrom stats median
#' @importFrom stats loess
#' @importFrom caret train
#' @importFrom caret trainControl
#' @importFrom caret postResample
#' @importFrom caret sensitivity
#' @importFrom caret specificity
#' @importFrom caret getModelInfo
#' @importFrom dplyr bind_cols
#' @importFrom dplyr distinct
#' @importFrom dplyr bind_rows
#' @importFrom dplyr setdiff
#' @importFrom ModelMetrics auc
#' @importFrom utils install.packages
#' @importFrom utils installed.packages
#' @importFrom stringr str_c
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 geom_boxplot
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 geom_density
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 geom_label
#' @importFrom ggplot2 geom_rect
#' @importFrom ggplot2 geom_tile
#' @importFrom ggplot2 geom_line
#' @importFrom ggplot2 geom_smooth
#' @importFrom ggplot2 geom_text
#' @importFrom ggplot2 margin
#' @importFrom ggplot2 facet_wrap
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 ylab
#' @importFrom ggplot2 xlim
#' @importFrom ggplot2 ylim
#' @importFrom ggplot2 ggtitle
#' @importFrom ggplot2 coord_flip
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 element_blank
#' @importFrom ggplot2 scale_fill_gradient
#' @importFrom ggplot2 element_text
#' @return If input model and data supports classification, then the results of
#' each model fittings are added to the summary list.
#' @export
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' 
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
add_model <- function(addTo, x, y, model = NULL, tuneLength = 5L, modelTag = NULL, tf = NULL, sampling = NULL) {
  
  `.` <- NULL # for CMD check pass
  
  if (is.null(model)) {
    model = c("glm", "lda", "bayesglm", "knn", "nnet", "qda", "svmLinear", "svmRadial", "rf", "rpart", "treebag")
  }

  # input argument control
  stopifnot(purrr::is_list(addTo), is.null(model) || (purrr::is_vector(model) && purrr::is_character(model) && NCOL(model) == 1),
            purrr::is_null(tuneLength) || (purrr::is_numeric(tuneLength) && tuneLength %% 1 == 0),
            purrr::is_character(modelTag) || purrr::is_null(modelTag),
            is.data.frame(x) && !is.null(n <- nrow(x)),
            !is.matrix(y) && purrr::is_vector(y) && NCOL(y) == 1 && NROW(y) == NROW(y),
            length(levels(factor((purrr::as_vector(y))))) == 2 # two class factor only
            )

  x <- as.data.frame(x)
  y <- as.data.frame(y)
  colnames(y) <- "Class"
  trainingData <- dplyr::bind_cols(x, y)

  # check required packages and install if required
  model %>%
    purrr::map(~caret::getModelInfo()[[.]]$library) %>%
    purrr::map_lgl(~length(.) > 0) %>% model[.] %>%
    purrr::map(~caret::getModelInfo()[[.]]$library) %>%
    purrr::map(tibble::tibble) %>% purrr::map_df(dplyr::bind_rows) %>%
    dplyr::distinct() %>% purrr::as_vector() %>%
    dplyr::setdiff(installed.packages()[, 1]) -> packageToInstall
  if (length(packageToInstall) > 0) install.packages(packageToInstall, repos = "http://cran.us.r-project.org")

  # classification only
  model %>%
    purrr::map(~caret::getModelInfo()[[.]]$type) %>%
    purrr::map(~ . %in% "Classification") %>%
    purrr::map_lgl(any) -> classificationAvailable
  modelToCompute <- model[classificationAvailable]
  modelNotToCompute <- model[!classificationAvailable]

  # check availability of class probability
  modelToCompute %>%
    purrr::map(~caret::getModelInfo()[[.]]$prob) %>%
    purrr::map_lgl(is.null) -> classProbNotAvailable
  modelWithoutClassProb <- modelToCompute[classProbNotAvailable]
  modelWithClassProb <- modelToCompute[!classProbNotAvailable]

  # data transformation
  if(is.null(tf)) {
    tfArg <- NULL
  } else {
    tfArg <- switch(tf,
                    "tf1" = c("BoxCox"),
                    "tf2" = c("center", "scale", "YeoJohnson"),
                    "tf3" = c("center", "scale", "pca")
                    )
  }

  modelTag <- stringr::str_c(tf, modelTag, sep = "_")

  # combine caret::defaultSummary and caret::twoClasssummary into one
  togetherSummary <- function(data, lev = NULL, model = NULL) {

    # Accuracy - !minor modification only. source from caret::defaultSummary
    data1 <- data
    lev1 <- lev
    model1 <- model
    if (is.character(data1$obs))
      data1$obs <- factor(data1$obs, levels = lev1)
    out1 <- caret::postResample(data1[, "pred"], data1[, "obs"])

    # AUC - !minor modification only. source from caret::twoClasssummary
    lvls <- levels(data$obs)
    if (!all(levels(data[, "pred"]) == lvls))
      stop("levels of observed and predicted data do not match")
    data$y = as.numeric(data$obs == lvls[2])
    rocAUC <- ModelMetrics::auc(ifelse(data$obs == lev[2], 0, 1), data[, lvls[1]])
    out2 <- c(rocAUC,
              caret::sensitivity(data[, "pred"], data[, "obs"], lev[1]),
              caret::specificity(data[, "pred"], data[, "obs"], lev[2])
    )
    names(out2) <- c("ROC", "Sens", "Spec")

    out <- c(out1, out2)
    out
  }

  # trainControl set-up
  trainControlWithClassProb <- caret::trainControl(verboseIter = TRUE,
                                                   method="cv",
                                                   number=10,
                                                   search = "random",
                                                   returnResamp = "final",
                                                   savePredictions = "final",
                                                   classProbs = TRUE,
                                                   summaryFunction = togetherSummary,
                                                   timingSamps = 1,
                                                   sampling = sampling
                                                   )

  trainControlWithoutClassProb <- caret::trainControl(verboseIter = TRUE,
                                                      method="cv",
                                                      number=10,
                                                      search = "random",
                                                      returnResamp = "final",
                                                      savePredictions = "final",
                                                      summaryFunction = caret::defaultSummary,
                                                      timingSamps = 1,
                                                      sampling = sampling
                                                      )

  modelWithCP <- modelWithoutCP <- NULL

  # train models w/ ROC(primary) and Accuracy & Kappa(reference)
  if (length(modelWithClassProb) > 0) {
    modelWithCP <- modelWithClassProb %>%
      purrr::map(~caret::train(method = .,
                               Class ~ .,
                               data = trainingData,
                               metric = "ROC", # if prob available, choose best model with ROC
                               tuneLength = tuneLength,
                               trControl = trainControlWithClassProb,
                               preProcess = tfArg)
      )
    names(modelWithCP) <- stringr::str_c(modelWithClassProb, modelTag, sep = "_")
  }

  # train models (w/ Accuracy & metric)
  if (length(modelWithoutClassProb) > 0) {
    modelWithoutCP <- modelWithoutClassProb %>%
      purrr::map(~caret::train(method = .,
                               Class ~ .,
                               data = trainingData,
                               metric = "Accuracy",
                               tuneLength = tuneLength,
                               trControl = trainControlWithoutClassProb,
                               preProcess = tfArg)
      )
    names(modelWithoutCP) <- stringr::str_c(modelWithoutClassProb, modelTag, sep = "_")
  }

  # overwrite if same name of object is added
  for(i in seq_along(modelWithCP)) {
    addTo[[names(modelWithCP)[i]]] <- modelWithCP[[i]]
  }
  for(i in seq_along(modelWithoutCP)) {
    addTo[[names(modelWithoutCP)[i]]] <- modelWithoutCP[[i]]
  }

  if (length(modelWithoutClassProb) > 0) {
    cat("\n Model: ", stringr::str_c(names(modelWithoutCP), collapse = ", "),
        "\n >> Accuracy and Kappa metrics are available.",
        "\n >> Best model is selected based on accuracy. \n\n",
        collapse = "")
  }
  if (length(modelWithClassProb) > 0) {
    cat("\n Model: ", stringr::str_c(names(modelWithCP), collapse = ", "),
        "\n >> ROC, Sens, Spec, Accuracy and Kappa metrics are available.",
        "\n >> Best model is selected based on AUC. \n\n",
        collapse = "")
  }
  if (length(modelNotToCompute) > 0) {
    cat("\n Model: ", stringr::str_c(modelNotToCompute, collapse = ", "),
        "\n >> Model(s) not support classification problem.\n\n",
        collapse = "")
  }
  return(addTo)
}

#' Add Class Probability
#'
#' \code{add_prob} Adds class probability results with given dataset (x, y)
#' to the summary list for the all model fits exists in it. Like
#' \code{\link{add_model}}, it also ensures to conduct prediction for the
#' model and data supports classification.
#'
#' @param addTo Summary list that will contain all model fit results.
#' @param x A dataframe of input variables.
#' @param y A vector or a dataframe of output variable.
#' @param outChar A charactor value of output class name.
#' @param predTag A charactor value of tag that to be added to the pred result.
#' @export
#' @return Results of each model fitting are added to the summary list.
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' 
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#' sSummary <- add_prob(sSummary, x_test, y_test, outChar = "pos")
#' sSummary$glm$pred_test
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
add_prob <- function(addTo, x, y, outChar, predTag = NULL) {
  # input argument control
  stopifnot(purrr::is_list(addTo), is.data.frame(x) && !is.null(n <- nrow(x)),
            !is.matrix(y) && purrr::is_vector(y) && NCOL(y) == 1 && NROW(y) == NROW(y),
            purrr::is_character(predTag) || purrr::is_null(predTag),
            length(levels(factor((purrr::as_vector(y))))) == 2 # two class factor only
            )

  x <- as.data.frame(x)
  y <- as.data.frame(y)
  colnames(y) <- "Class"
  testData <- dplyr::bind_cols(x, y)

  addTo %>%
    purrr::map_chr(~.$method) -> model

  # following section is used at add_model but still need to repeat here becuase...
  #   enable add_prob for any trained object that are created by add_model function

  # classification only
  model %>%
    purrr::map(~caret::getModelInfo()[[.]]$type) %>%
    purrr::map(~ . %in% "Classification") %>%
    purrr::map_lgl(any) -> classificationAvailable
  modelToCompute <- model[classificationAvailable]

  # check availability of class probability
  modelToCompute %>%
    purrr::map(~caret::getModelInfo()[[.]]$prob) %>%
    purrr::map_lgl(is.null) -> classProbNotAvailable

  modelWithClassProb <- names(modelToCompute[!classProbNotAvailable])
  modelWithoutClassProb <- names(modelToCompute[classProbNotAvailable])

  # add pred result to addTo object
  if (length(modelWithClassProb)) {
    for(i in seq_along(modelWithClassProb)) {
      pred_test <- data.frame(testData["Class"])
      cat(stringr::str_c("Calculating prediction: ", modelWithClassProb[i],"\n", collapse = TRUE))
      pred_test[outChar] <- predict(addTo[[modelWithClassProb[i]]], testData, type = "prob")[, outChar]
      if (is.null(predTag)) {
        addTo[[modelWithClassProb[i]]]$pred_test <- pred_test
      } else {
        addTo[[modelWithClassProb[i]]][[predTag]] <- pred_test
      }
    }
  }
  return(addTo)
}

#' Plot Skewness and Kurtosis and suggest transformations
#'
#' \code{suggest_transformation} Evaluate the normality of the data by
#' calculating skew and kurtosis. The user is provided guidance regarding
#' whether data tranformation is advised, and how well each of three
#' tranformation options (Box-Cox, Yeo-Johnson, or PCA) does in reducing
#' non-normality as compared to each other and the original, untransformed
#' data set.
#'
#' @param x A dataframe of input variables.
#' @export
#' @return Four plots depicting skew and kurtosis of each variable. The four
#' plots are (1) the original, untransformed data, (2) transformed data
#' applying Box-Cox (tagged "tf1"), (3) tranformed data applying Yeo-Johnson
#' (tagged applying "tf2"), and (4) transformed data using Principal
#' Components (tagged "tf3"). Additionally, the three transformed data sets
#' generated and ready to be called with other functions using the tags.
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' suggest_transformation(x)
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_transformation <- function(x) {
  kurtosis <- skew <- varNames <- NULL # for CMD check pass
  
  x <- x[, purrr::map_lgl(x, is.numeric)]
  ### Check for Skewness, Kurtosis and plot
  cat("\n Consider transforming data if skew or kurtosis of any variable is > 2 or < -2 \n")
  cat("\n BoxCox      : The distribution of an attribute can be shifted to reduce the skew and make it more Gaussian. \n")
  cat("\n Yeo-Johnson : Like the Box-Cox transform, but it supports raw values that are equal to zero and negative. \n")
  cat("\n PCA         : Transform the data to the principal components. \n")
  
  varaibleTest <- function(x, title) {
    checkSK <- psych::describe(x) # Put results of 'describe' into a data frame
    checkSK <- data.frame(checkSK)
    checkSK$varNames <- rownames(checkSK)
    
    ggplot(checkSK, aes(y = kurtosis, x = skew)) +
      xlab("Skewness") +
      ylab("Kurtosis") +
      geom_label(aes(label = varNames), nudge_x = .01, nudge_y = 0, size = 3) +
      geom_point(data = checkSK[checkSK$skew > 2 | checkSK$skew < -2,],
                 aes(x=skew, y=kurtosis), size = 4, color = "red", alpha = 0.2) +
      geom_point(data = checkSK[checkSK$kurtosis > 2 | checkSK$kurtosis < -2,],
                 aes(x=skew, y=kurtosis), size = 4, color = "blue", alpha = 0.2) +
      ggtitle(title)+
      theme(plot.title = element_text(hjust = 0.5, lineheight=.8, face="bold")) +
      geom_rect(aes(xmin = -2, ymin = -2, xmax = 2, ymax = 2), color = "gray", alpha = 0.002)
  }
  
  tf1 <- caret::preProcess(x, method = c("BoxCox"))
  tf1 <- predict(tf1, newdata = x)
  
  tf2 <- caret::preProcess(x, method = c("center", "scale", "YeoJohnson"))
  tf2 <- predict(tf2, newdata = x)
  
  tf3 <- caret::preProcess(x, method = c("center", "scale", "pca"))
  tf3<- predict(tf3, newdata = x)
  
  summary <- list(varaibleTest(x, "No transformation"),
                  varaibleTest(tf1, "tf1: BoxCox"),
                  varaibleTest(tf2, "tf2: Yeo-Johnson" ),
                  varaibleTest(tf3, "tf3: Principal Component")
  )
  return(gridExtra::grid.arrange(grobs = summary))
}

#' Compare performance based on accuracy metrics.
#'
#' \code{suggest_accuracy} Compare performance for all model fits in the
#' Summary list based on Accuracy and Kappa metrics, together with training
#' time for a single tuning of each model fit.
#'
#' @param addTo Summary list that contains model fits to compare.
#' @param modelTag Select model fits that contains modelTag in their name.
#' @param time If TRUE, calculates average time to train model for a single
#' tuning.
#' @export
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#'  
#' suggest_accuracy(sSummary)
#' suggest_accuracy(sSummary, time = TRUE)
#' suggest_accuracy(sSummary, time = TRUE, modelTag = "glm|svm")
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_accuracy <- function(addTo, modelTag = NULL, time = FALSE) {
   `.` <- Model <- Accuracy <- Kappa <- Time <- TuneLength <- TimeSingle <- TimeTotal <- NULL #for CMD check pass
  
  stopifnot(purrr::is_list(addTo),
            purrr::is_character(modelTag) || purrr::is_null(modelTag)
            )

  if (!is.null(modelTag)) {
    grepIndex <- grep(modelTag, names(addTo))
    addTo <- addTo[grepIndex]
  }

  addToAcc <- addTo %>% purrr::map_lgl(~(all(c("Accuracy", "Kappa") %in% .$perfNames))) %>% addTo[.]

  # performance comparision based on Accuracy
  if (length(addToAcc)) {
    gridAcc <- list(Model = names(addToAcc),
                    Accuracy = addToAcc %>% purrr::map(~.$resample$Accuracy),
                    Kappa = addToAcc %>% purrr::map(~.$resample$Kappa),
                    Time = addToAcc %>% purrr::map(~.$times$everything["elapsed"]),
                    TuneLength = addToAcc %>% purrr::map(~.$results) %>% purrr::map(nrow)
                    )
    tableAcc <- purrr::pmap(gridAcc, tibble::tibble) %>% dplyr::bind_rows()

    tableAcc <- tableAcc %>%
      dplyr::mutate(TimeSingle = Time / 60 / TuneLength) %>%
      dplyr::mutate(TimeTotal = Time / 60)

    perf_Acc <- ggplot(data = tableAcc, mapping = aes(x = reorder(Model, Accuracy, FUN = stats::median), y = Accuracy)) +
      geom_boxplot(alpha = 0.8) +
      xlab("") + ylab("") +
      ggtitle("Accuracy") +
      coord_flip()

    perf_Kap <- ggplot(data = tableAcc, mapping = aes(x = reorder(Model, Accuracy, FUN = stats::median), y = Kappa)) +
      geom_boxplot(alpha = 0.8) +
      xlab("") + ylab("") +
      ggtitle("Kappa") +
      theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
      coord_flip()

    time_sig <- ggplot(data = tableAcc, mapping = aes(x = reorder(Model, Accuracy, FUN = stats::median), y = TimeSingle)) +
      geom_bar(stat = "identity", alpha = 0.8) +
      xlab("") + ylab("") +
      ggtitle("Time/Tuning (min)") +
      theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
      coord_flip()

    if (time) {
      layout <- rbind(c(1, 1, 2))
      withTime <- gridExtra::grid.arrange(grobs = list(perf_Acc, time_sig), layout_matrix = layout)
    } else {
      layout <- rbind(c(1, 1, 1, 1, 2, 2, 2))
      withKappa <- gridExtra::grid.arrange(grobs = list(perf_Acc, perf_Kap), layout_matrix = layout)
    }
  }
}

#' Compare performance based on Area Under the Curve, Sensitivity, and
#' Specificity.
#'
#' \code{suggest_auc} Compare performance for all model fits in Summary list
#' based on AUC, Sensitivity and Specificity, together with training time
#' for a single tuning for each model fit.
#'
#' @param addTo Summary list that contains model fits to compare.
#' @param modelTag Select model fits that contains modelTag on their name.
#' @param time If TRUE, calculates average  time to train model for a single
#' tuning.
#' @export
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#'  
#' suggest_auc(sSummary)
#' suggest_auc(sSummary, time = TRUE)
#' suggest_auc(sSummary, time = TRUE, modelTag = "glm|svm")
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_auc <- function(addTo, modelTag = NULL, time = FALSE) {
  `.` <- Time <- TuneLength <- Model <- ROC <- TimeSingle <- TimeTotal <- Sens <- Spec <- NULL # for CMD check pass
  
  stopifnot(purrr::is_list(addTo),
            purrr::is_character(modelTag) || purrr::is_null(modelTag)
            )

  if (!is.null(modelTag)) {
    grepIndex <- grep(modelTag, names(addTo))
    addTo <- addTo[grepIndex]
  }

  addToROC <- addTo %>% purrr::map_lgl(~(all(c("ROC", "Sens", "Spec") %in% .$perfNames))) %>% addTo[.]

  # performance comparision based on AUC
  if (length(addToROC)) {
    gridAcc <- list(Model = names(addToROC),
                    ROC = addToROC %>% purrr::map(~.$resample$ROC),
                    Sens = addToROC %>% purrr::map(~.$resample$Sens),
                    Spec = addToROC %>% purrr::map(~.$resample$Spec),
                    Time = addToROC %>% purrr::map(~.$times$everything["elapsed"]),
                    TuneLength = addToROC %>% purrr::map(~.$results) %>% purrr::map(nrow)
                    )
    tableROC <- purrr::pmap(gridAcc, tibble::tibble) %>% dplyr::bind_rows()

    tableROC <- tableROC %>%
      dplyr::mutate(TimeSingle = Time / 60 / TuneLength) %>%
      dplyr::mutate(TimeTotal = Time / 60)

    perf_ROC <- ggplot(data = tableROC, mapping = aes(x = reorder(Model, ROC, FUN = stats::median), y = ROC)) +
      geom_boxplot(alpha = 0.8) +
      xlab("") + ylab("") +
      ggtitle("AUC") +
      coord_flip()

    perf_Sens <- ggplot(data = tableROC, mapping = aes(x = reorder(Model, ROC, FUN = stats::median), y = Sens)) +
      geom_boxplot(alpha = 0.8) +
      xlab("") + ylab("") +
      ggtitle("Sensitivity") +
      theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
      coord_flip()

    perf_Spec <- ggplot(data = tableROC, mapping = aes(x = reorder(Model, ROC, FUN = stats::median), y = Spec)) +
      geom_boxplot(alpha = 0.8) +
      xlab("") + ylab("") +
      ggtitle("Specificity") +
      theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
      coord_flip()

    time_sig <- ggplot(data = tableROC, mapping = aes(x = reorder(Model, ROC, FUN = stats::median), y = TimeSingle)) +
      geom_bar(stat = "identity", alpha = 0.8) +
      xlab("") + ylab("") +
      ggtitle("Time/Tuning (min)") +
      theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
      coord_flip()

    if (time) {
      layout <- rbind(c(1, 1, 2))
      gridExtra::grid.arrange(grobs = list(perf_ROC, time_sig), layout_matrix = layout)
    } else {
      layout <- rbind(c(1, 1, 1, 2, 2, 3, 3))
      gridExtra::grid.arrange(grobs = list(perf_ROC, perf_Sens, perf_Spec), layout_matrix = layout)
    }
  }
}

#' Compare model performance by model category from \pkg{caret} package
#'
#' \code{suggest_category} collects AUC and accuracy results by model category
#' and presents average performance using the model category tags as definded
#' by \pkg{caret}. This helps the user identify the most promising category
#' of models for further exploration.
#'
#' @param addTo Summary list that contains model fits to compare.
#' @param modelTag Select model fits containing \code{modelTag} in their name.
#' @export
#' @return Plots of AUC and accuracy, organized by category tags.
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#' 
#' suggest_category(sSummary)
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_category <- function(addTo, modelTag = NULL) {
  `.` <- median <- Method <- Tag <- Accuracy <- ROC <- NULL # for CMD check pass
  
  stopifnot(purrr::is_list(addTo),
            purrr::is_character(modelTag) || purrr::is_null(modelTag)
            )

  if (!is.null(modelTag)) {
    grepIndex <- grep(modelTag, names(addTo))
    addTo <- addTo[grepIndex]
  }

  addToAcc <- addTo %>% purrr::map_lgl(~(all(c("Accuracy", "Kappa") %in% .$perfNames))) %>%addTo[.]
  addToROC <- addTo %>% purrr::map_lgl(~(all(c("ROC", "Sens", "Spec") %in% .$perfNames))) %>% addTo[.]

  gridAcc <- list(Method = addToAcc %>% purrr::map(~.$method),
                  Accuracy = addToAcc %>% purrr::map(~.$resample$Accuracy) %>% purrr::map(stats::median),
                  Tag = addToAcc %>% purrr::map(~.$modelInfo$tags)
                  )
  tableAcc <- purrr::pmap(gridAcc, tibble::tibble) %>% dplyr::bind_rows() %>%
    dplyr::group_by(Method, Tag) %>% dplyr::summarise(Accuracy = mean(Accuracy, na.rm = TRUE)) %>% dplyr::ungroup()


  gridROC <- list(Method = addToROC %>% purrr::map(~.$method),
                  ROC = addToROC %>% purrr::map(~.$resample$ROC) %>% purrr::map(stats::median),
                  Tag = addToROC %>% purrr::map(~.$modelInfo$tags)
                  )
  tableROC <- purrr::pmap(gridROC, tibble::tibble) %>% dplyr::bind_rows() %>%
    dplyr::group_by(Method, Tag) %>% dplyr::summarise(ROC = mean(ROC, na.rm = TRUE)) %>% dplyr::ungroup()

  tableAcc$Tag <- tableAcc$Tag %>%
    stringr::str_replace_all("L1 Regularization Models", "L1 Regularization") %>%
    stringr::str_replace_all("L2 Regularization Models", "L2 Regularization") %>%
    stringr::str_replace_all("Generalized Linear Models", "Generalized Linear Model") %>%
    stringr::str_replace_all("Discriminant Analysis Models", "Discriminant Analysis") %>%
    stringr::str_replace_all("Feature Extraction Models", "Feature Extraction") %>%
    stringr::str_replace_all("Linear Classifier Models", "Linear Classifier") %>%
    stringr::str_replace_all("Linear Regression Models", "Linear Regression")

  tableROC$Tag <- tableROC$Tag %>%
    stringr::str_replace_all("L1 Regularization Models", "L1 Regularization") %>%
    stringr::str_replace_all("L2 Regularization Models", "L2 Regularization") %>%
    stringr::str_replace_all("Generalized Linear Models", "Generalized Linear Model") %>%
    stringr::str_replace_all("Discriminant Analysis Models", "Discriminant Analysis") %>%
    stringr::str_replace_all("Feature Extraction Models", "Feature Extraction") %>%
    stringr::str_replace_all("Linear Classifier Models", "Linear Classifier") %>%
    stringr::str_replace_all("Linear Regression Models", "Linear Regression")

  tableAcc$Accuracy <- round(tableAcc$Accuracy * 100, digits = 0)
  tableROC$ROC <- round(tableROC$ROC * 100, digits = 0)

  ctg_acc <- ggplot(data = tableAcc, mapping = aes(x = reorder(Tag, Accuracy, FUN = stats::median),
                                                   y = reorder(Method, dplyr::desc(Accuracy), FUN = stats::median),
                                                   fill = Accuracy)
                    ) +
    geom_tile(colour="gray", size=1, stat="identity") +
    scale_fill_gradient(low = "white", high = "gray", space = "Lab") +
    geom_text(data = tableAcc, mapping = aes(x = reorder(Tag, Accuracy, FUN = stats::median),
                                             y = reorder(Method, dplyr::desc(Accuracy), FUN = stats::median),
                                             label = Accuracy)
              ) +
    xlab("") + ylab("") +
    ggtitle("Tag-Model Grid with Accuracy(%)") + guides(fill = "none") +
    coord_flip()

  ctg_roc <- ggplot(data = tableROC, mapping = aes(x = reorder(Tag, ROC, FUN = stats::median),
                                                   y = reorder(Method, dplyr::desc(ROC), FUN = stats::median),
                                                   fill = ROC)
                    ) +
    geom_tile(colour="gray", size=1, stat="identity") +
    scale_fill_gradient(low = "white", high = "gray", space = "Lab") +
    geom_text(data = tableROC, mapping = aes(x = reorder(Tag, dplyr::desc(ROC), FUN = stats::median),
                                             y = reorder(Method, ROC, FUN = stats::median),
                                             label = ROC)
              ) +
    xlab("") + ylab("") +
    ggtitle("Tag-Model Grid with AUC(%)") + guides(fill = "none") +
    coord_flip()

  return(gridExtra::grid.arrange(grobs = list(ctg_roc, ctg_acc)))
}

#' Compare variables by importance.
#'
#' \code{suggest_variable} Collect and compare variable importance metrics
#' if available.
#'
#' @param addTo Summary list that contains model fits to compare.
#' @param modelTag Select model fits that contains modelTag on their name.
#' @param sep If TRUE, show variable importance results by each model fit.
#' @export
#' @return Plots of variable importance results from all available models.
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#' 
#' suggest_variable(sSummary)
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_variable <- function(addTo, modelTag = NULL, sep = FALSE) {
  `.` <- varImp <- Variables <- variableImportance <- median <- NULL # for CMD check pass
  
  stopifnot(purrr::is_list(addTo),
            purrr::is_character(modelTag) || purrr::is_null(modelTag)
            )

  if (!is.null(modelTag)) {
    grepIndex <- grep(modelTag, names(addTo))
    addTo <- addTo[grepIndex]
  }

  noVarImp <- addTo %>% purrr::map(~.$modelInfo$varImp) %>% purrr::map_lgl(is.null)
  addToVar <- addTo[!noVarImp]

  vi <- list(variableImportance = addToVar %>% purrr::map(caret::varImp) %>% purrr::map(~.$importance[,1]),
             Model = addToVar %>% purrr::map(caret::varImp) %>% names(),
             Variables = addToVar %>% purrr::map(caret::varImp) %>% purrr::map(~.$importance) %>% purrr::map(rownames)
             )

  viSummary <- purrr::pmap(vi, tibble::tibble) %>% dplyr::bind_rows()
  viConsolidation <- ggplot(data = viSummary,
                            mapping = aes(x = reorder(Variables,
                                                      variableImportance,
                                                      FUN = stats::median),
                                          y = variableImportance)
                            ) +
    geom_boxplot(alpha = 0.8) +
    xlab("") + ylab("") +
    ggtitle("Variable Importance\nfrom all models") +
    coord_flip()

  vicomparison <- ggplot(data = viSummary,
                         mapping = aes(x = reorder(Variables,
                                                   variableImportance,
                                                   FUN = stats::median),
                                       y = variableImportance)
                         ) +
    geom_bar(stat = "identity", alpha = 0.7) +
    xlab("") + ylab("") +
    ggtitle("Variable Importance by model") +
    coord_flip() +
    facet_wrap(~ Model)

  viCompare = list(viConsolidation, vicomparison)
  viSeparate = viSummary %>% split(.$Model)
  for (i in seq_along(viSeparate)) {
    j = match(names(viSeparate), names(addToVar))[i]
    viSeparate[[i]] = ggplot(data = viSeparate[[i]],
                             mapping = aes(x = reorder(Variables,
                                                       variableImportance,
                                                       FUN = stats::median),
                                           y = variableImportance)
                             ) +
      geom_bar(stat = "identity", alpha = 0.7) +
      xlab("") + ylab("") +
      ggtitle(stringr::str_c(names(viSeparate)[[i]], " | ", addToVar[[j]]$modelInfo$label)) +
      coord_flip()
  }

  if (sep) {
    gridExtra::grid.arrange(grobs = viSeparate)
  } else {
    lay <- rbind(c(1,1,2,2,2))
    gridExtra::grid.arrange(grobs = viCompare, layout_matrix = lay)
  }
}

#' Compare class probability distributions based on the population.
#'
#' \code{suggest_probPop} Compare class probability distribution based on
#' the population of observations, sorted by probability values.
#' x-axis: percentile of class probability.
#' y-axis: observation density.
#'
#' @param addTo Summary list that contains model fits to compare.
#' @param outChar A charactor value of output class name.
#' @param predTag Select prediction results that contains predTag on their name.
#' @param modelTag Select model fits that contains modelTag on their name.
#' @export
#' @return Plot indicating density distribution by observed population.
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#' sSummary <- add_prob(sSummary, x_test, y_test, outChar = "pos")
#' 
#' suggest_probPop(sSummary, "pos")
#' suggest_probPop(sSummary, "pos", modelTag = "glm|svm")
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_probPop <- function(addTo, outChar, predTag = "pred_test", modelTag = NULL) {
  `.` <- obs <- population <- NULL # for CMD check pass
  
  stopifnot(purrr::is_list(addTo),
            purrr::is_character(modelTag) || purrr::is_null(modelTag),
            purrr::is_character(predTag) || purrr::is_null(predTag)
            )

  if (!is.null(modelTag)) {
    grepIndex <- grep(modelTag, names(addTo))
    addTo <- addTo[grepIndex]
  }

  addToROC <- addTo %>% purrr::map_lgl(~(all(c("ROC", "Sens", "Spec") %in% .$perfNames))) %>% addTo[.]

  addToROC <- addToROC %>% purrr::map_lgl(~(predTag %in% names(.))) %>% addToROC[.]
  densityGrid <- list(obs = addToROC %>% purrr::map(~.[[predTag]]$Class),
                      model = addToROC %>% names(),
                      pred = addToROC %>% purrr::map(~.[[predTag]][,outChar]),
                      nTotal = addToROC %>% purrr::map(~.[[predTag]]) %>% purrr::map(nrow) %>% purrr::map(as.character),
                      probCut = addToROC %>% purrr::map(~.[[predTag]][,outChar]),
                      population = addToROC %>% purrr::map(~.[[predTag]][,outChar]) %>% purrr::map(dplyr::percent_rank)
                      )

  densitySummary <- purrr::pmap(densityGrid, tibble::tibble)
  density <- densitySummary %>% dplyr::bind_rows()

  ggplot(data = density, aes(x = population, fill = obs)) +
    xlab("Probability percent rankon") + ylab("Density") +
    ggtitle("Density distribution by obs population") +
    geom_density(alpha = 0.2) + ylim(0, 5) +
    facet_wrap(~model)
}

#' Compare Class Probability Distribution based on Probability Cut-off
#'
#' \code{suggest_probCut} Compare class probability distribution
#' based on probability cut-off.
#' x-axis: probability value.
#' y-axis: observation density.
#'
#' @param addTo Summary list that contains model fits to compare.
#' @param outChar A charactor value of output class name.
#' @param predTag Select prediction results that contains predTag on their name.
#' @param modelTag Select model fits that contains modelTag on their name.
#' @export
#' @return Plot indicating density distribution by probability cut-off.
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#' sSummary <- add_prob(sSummary, x_test, y_test, outChar = "pos")
#' 
#' suggest_probCut(sSummary, "pos")
#' suggest_probCut(sSummary, "pos", modelTag = "glm|svm")
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_probCut <- function(addTo, outChar, predTag = "pred_test", modelTag = NULL) {
  `.` <- probCut <- obs <- NULL # for CMD check pass
  
  stopifnot(purrr::is_list(addTo),
            purrr::is_character(modelTag) || purrr::is_null(modelTag),
            purrr::is_character(predTag) || purrr::is_null(predTag)
            )

  if (!is.null(modelTag)) {
    grepIndex <- grep(modelTag, names(addTo))
    addTo <- addTo[grepIndex]
  }

  addToROC <- addTo %>% purrr::map_lgl(~(all(c("ROC", "Sens", "Spec") %in% .$perfNames))) %>% addTo[.]

  addToROC <- addToROC %>% purrr::map_lgl(~(predTag %in% names(.))) %>% addToROC[.]
  densityGrid <- list(obs = addToROC %>% purrr::map(~.[[predTag]]$Class),
                      model = addToROC %>% names(),
                      pred = addToROC %>% purrr::map(~.[[predTag]][,outChar]),
                      nTotal = addToROC %>% purrr::map(~.[[predTag]]) %>% purrr::map(nrow) %>% purrr::map(as.character),
                      probCut = addToROC %>% purrr::map(~.[[predTag]][,outChar]),
                      population = addToROC %>% purrr::map(~.[[predTag]][,outChar]) %>% purrr::map(dplyr::percent_rank)
                      )

  densitySummary <- purrr::pmap(densityGrid, tibble::tibble)
  density <- densitySummary %>% dplyr::bind_rows()

  ggplot(data = density, aes(x = probCut, fill = obs)) +
    xlab("Probability cut off") + ylab("Density") +
    ggtitle("Density distribution by probability cut off") +
    geom_density(alpha = 0.2) + ylim(0, 5) +
    facet_wrap(~model)
}

#' Plot Gain Chart and Lift Chart
#'
#' \code{suggest_gain} Gain and Lift charts are widely used in marketing and related contexts.
#' They indicate the effectiveness of predictive models compared to the results obtained
#' without the predictive model.
#'
#' @param addTo Summary list that contains model fits to compare.
#' @param outChar A charactor value of output class name.
#' @param predTag Select prediction results that contains predTag on their name.
#' @param modelTag Select model fits that contains modelTag on their name.
#' @param cuts Integer indicating the number of splits of probability buckets.
#' @param type Plot different type of charts. "Gain" for gain chart.
#' "Lift" for lift chart. "PctAcc" for accumulated event percent. "Pct" for event
#' percent.
#' @export
#' @examples
#' \dontrun{
#' library(mlbench)
#' data(PimaIndiansDiabetes)
#' index <- sample(seq_len(nrow(PimaIndiansDiabetes)), 500)
#' trainingSet <- PimaIndiansDiabetes[index, ]
#' testSet <- PimaIndiansDiabetes[-index, ]
#' x <- trainingSet[, -9]
#' y <- trainingSet[, 9]
#' x_test <- testSet[, -9]
#' y_test <- testSet[, 9]
#' sSummary <- list()
#' sSummary <- add_model(sSummary, x, y)
#' sSummary <- add_model(sSummary, x, y, model = c("C5.0Cost", "glmnet"), modelTag = "others")
#' sSummary <- add_prob(sSummary, x_test, y_test, outChar = "pos")
#' 
#' suggest_gain(sSummary, outChar = "pos")
#' suggest_gain(sSummary, outChar = "pos", modelTag = "glm|svm", type = "Lift")
#' suggest_gain(sSummary, outChar = "pos", modelTag = "glm|svm", type = "PctAcc")
#' suggest_gain(sSummary, outChar = "pos", modelTag = "glm|svm", type = "Pct")
#' suggest_gain(sSummary, outChar = "pos", modelTag = "glm|svm", type = "Gain")
#' suggest_gain(sSummary, outChar = "pos", modelTag = "glm|svm", type = "Gain") + xlim(0, 0.5)
#' 
#' # vignette("modeval") #check a vignette for further details
#' }
suggest_gain <- function(addTo, outChar, predTag = "pred_test", modelTag = NULL, cuts = 51, type = NULL) {
  `.` <- obs <- bucket <- model <- nTotal <- n <- event <- eventCum <- nCum <- pctCum <- lift <- pct <- NULL # for CMD check pass
  
  stopifnot(purrr::is_list(addTo),
            purrr::is_character(modelTag) || purrr::is_null(modelTag),
            purrr::is_character(predTag) || purrr::is_null(predTag)
            )

  if (!is.null(modelTag)) {
    grepIndex <- grep(modelTag, names(addTo))
    addTo <- addTo[grepIndex]
  }

  addToROC <- addTo %>% purrr::map_lgl(~(all(c("ROC", "Sens", "Spec") %in% .$perfNames))) %>% addTo[.]

  addToROC <- addToROC %>% purrr::map_lgl(~(predTag %in% names(.))) %>% addToROC[.]
  liftGrid <- list(obs = addToROC %>% purrr::map(~.[[predTag]]$Class),
                   model = addToROC %>% names(),
                   pred = addToROC %>% purrr::map(~.[[predTag]][,outChar]),
                   nTotal = addToROC %>% purrr::map(~.[[predTag]]) %>% purrr::map(nrow) %>% purrr::map(as.character),
                   probCut = addToROC %>% purrr::map(~.[[predTag]][,outChar]),
                   population = addToROC %>% purrr::map(~.[[predTag]][,outChar]) %>% purrr::map(dplyr::percent_rank)
                   )

  liftSummary <- purrr::pmap(liftGrid, tibble::tibble)
  distribution <- liftSummary %>% dplyr::bind_rows()

  for (i in seq_along(liftSummary)) {
    liftSummary[[i]]$bucket = dplyr::ntile(liftSummary[[i]]$pred, cuts)
    liftSummary[[i]] = liftSummary[[i]] %>%
      dplyr::mutate(obs = ifelse(obs==outChar, 1, 0)) %>%
      dplyr::group_by(bucket, model, nTotal) %>%
      dplyr::summarize(event = sum(obs), n = n()) %>%
      dplyr::arrange(dplyr::desc(bucket)) %>% as.data.frame() %>%
      dplyr::mutate(eventCum = cumsum(event), nCum = cumsum(n)) %>%
      dplyr::mutate(pctCum = eventCum / nCum, pct = event / n,
                    xAxis = nCum / as.integer(nTotal),
                    lift = pctCum /(max(eventCum)/max(as.integer(nTotal)))
                    )
  }

  eventMax <- max(liftSummary[[1]]$eventCum)
  nMax <- max(liftSummary[[1]]$nCum)
  baseline <- eventMax / nMax

  liftObjectDataToPlot <- liftSummary %>% dplyr::bind_rows() %>% as.data.frame() %>%
    dplyr::filter(nCum <= nMax) %>% dplyr::filter(nCum >= nMax * 0.001)

  lift_gain <- ggplot(data = liftObjectDataToPlot) +
    xlim(0.0001, 1) + ggtitle("Gain Chart") +
    xlab("Population of observation") + ylab("Proportion of event") +
    geom_line(mapping = aes(x = nCum / nMax, y = eventCum / eventMax, color = model)) +
    geom_line(mapping = aes(x = nCum / nMax, y = nCum / nMax))

  lift_lift <- ggplot(data = liftObjectDataToPlot) +
    xlim(0.0001, 1) + ggtitle("Lift Chart") +
    xlab("Population of observation") + ylab("Lift Value") +
    geom_line(mapping = aes(x = nCum / nMax, y = lift, color = model)) +
    geom_line(mapping = aes(x = nCum / nMax, y = 1))

  lift_PctAcc <- ggplot(data = liftObjectDataToPlot) +
    xlim(0.0001, 1) + ggtitle("Accumulated percent of event(%)") +
    xlab("Population of observation") + ylab("Percent of event") +
    geom_line(mapping = aes(x = nCum / nMax, y = pctCum, color = model)) +
    geom_line(mapping = aes(x = nCum / nMax, y = baseline))

  lift_Pct <- ggplot(data = liftObjectDataToPlot) +
    ggtitle("Percent of event(%)") +
    xlab("Population of observation") + ylab("Percent of event") +
    geom_line(mapping = aes(x = nCum / nMax, y = pct, color = model), alpha = 0.1) +
    geom_smooth(mapping = aes(x = nCum / nMax, y = pct, color = model), alpha = 0.1, method = "loess") +
    geom_line(mapping = aes(x = nCum / nMax, y = baseline))

  if (is.null(type)) {
    lay <- rbind(c(1,2), c(3,4))
    gridExtra::grid.arrange(grobs = list(lift_gain, lift_lift, lift_PctAcc, lift_Pct), layout_matrix = lay)
  } else {
    switch (type,
            "Gain" = lift_gain,
            "Lift" = lift_lift,
            "PctAcc" = lift_PctAcc,
            "Pct" = lift_Pct,
            stop("Type doesn't match")
    )
  }
}

Try the modeval package in your browser

Any scripts or data that you put into this service are public.

modeval documentation built on May 29, 2017, 10:54 a.m.