Nothing
#' @title Get modeling metrics
#' @description modelingSummary is an automatic function for modeling data, it returns a dataframe containing the metrics of the modeling using five machine learning algorithms: KNN, SVM, RF, NNET, and Bcart. This function is based on spliData, tuneTrain, predict, and getMetrics functions.
#' @param data object of class "data.frame" with target variable and predictor variables.
#' @param y character. Target variable.
#' @param p numeric. Proportion of data to be used for training. Default: 0.7
#' @param length integer. Number of values to output for each tuning parameter. If \code{search = "random"} is passed to \code{\link[caret]{trainControl}} through \code{...}, this becomes the maximum number of tuning parameter combinations that are generated by the random search. Default: 10.
#' @param control character. Resampling method to use. Choices include: "boot", "boot632", "optimism_boot", "boot_all", "cv", "repeatedcv", "LOOCV", "LGOCV", "none", "oob", timeslice, "adaptive_cv", "adaptive_boot", or "adaptive_LGOCV". Default: "repeatedcv". See \code{\link[caret]{train}} for specific details on the resampling methods.
#' @param number integer. Number of cross-validation folds or number of resampling iterations. Default: 10.
#' @param repeats integer. Number of folds for repeated k-fold cross-validation if "repeatedcv" is chosen as the resampling method in \code{control}. Default: 10.
#' @param summary expression. Computes performance metrics across resamples. For numeric \code{y}, the mean squared error and R-squared are calculated. For factor \code{y}, the overall accuracy and Kappa are calculated. See \code{\link[caret]{trainControl}} and \code{\link[caret]{defaultSummary}} for details on specification and summary options. Default: multiClassSummary.
#' @param process character. Defines the pre-processing transformation of predictor variables to be done. Options are: "BoxCox", "YeoJohnson", "expoTrans", "center", "scale", "range", "knnImpute", "bagImpute", "medianImpute", "pca", "ica", or "spatialSign". See \code{\link[caret]{preProcess}} for specific details on each pre-processing transformation. Default: c('center', 'scale').
#' @param positive character. The positive class for the target variable if \code{y} is factor. Usually, it is the first level of the factor.
#' @param parallelComputing logical. indicates whether to also use the parallel processing. Default: False
#' @param classtype integer.indicates the number of classes of the traits.
#' @param ... additional arguments to be passed to \code{createDataPartition}, \code{trainControl} and \code{train} functions in the package \code{caret}.
#' @return A dataframe contains the metrics of the modeling of five machine learning algorithms: KNN, SVM, RF, NNET, and Bcart.
#'
#' \code{tuneTrain} relies on package \code{caret} to perform the modeling.
#' @details Types of classification and regression models available for use with \code{tuneTrain} can be found using \code{names(getModelInfo())}. The results given depend on the type of model used.
#'
#' @author Zakaria Kehel, Khadija Aziz
#' @examples
#' if(interactive()){
#' data(septoriaDurumWC)
#' models <- modelingSummary(data = septoriaDurumWC, y = "ST_S", positive = "R", classtype = 2)
#' }
#' @seealso
#' \code{\link[caret]{createDataPartition}},
#' \code{\link[caret]{trainControl}},
#' \code{\link[caret]{train}},
#' \code{\link[caret]{predict.train}},
#' \code{\link[caret]{confusionMatrix}}
#' @rdname modelingSummary
#' @export
#' @importFrom caret createDataPartition trainControl train predict.train confusionMatrix
#' @importFrom utils View
#' @importFrom stats predict xtabs
modelingSummary <- function (data, y, p = 0.7,
length = 10, control = "repeatedcv", number = 10,
repeats = 10, process = c('center', 'scale'),
summary= multiClassSummary,positive, parallelComputing = FALSE,
classtype, ...){
#### Tuning ####
knn.mod <- tuneTrain(data = data,y = y,method = 'knn',positive = positive, parallelComputing = parallelComputing)
svm.mod <- tuneTrain(data = data,y = y,method = 'svmLinear2',positive = positive, parallelComputing = parallelComputing)
rf.mod <- tuneTrain(data = data,y = y,method = 'rf',positive = positive, parallelComputing = parallelComputing)
nnet.mod <- tuneTrain(data = data,y = y,method = 'nnet',positive = positive, parallelComputing = parallelComputing)
bcart.mod <- tuneTrain(data = data,y = y,method = 'treebag',positive = positive, parallelComputing = parallelComputing)
#### PREDICTING ####
data.knn.pred <- stats::predict(knn.mod$Model, knn.mod$`Test Data`[ , -1])
cm.knn <- caret::confusionMatrix(stats::xtabs(~ data.knn.pred + unlist(knn.mod$`Test Data`[1])))
data.svm.pred <- stats::predict(svm.mod$Model, svm.mod$`Test Data`[ , -1])
cm.svm <- caret::confusionMatrix(stats::xtabs(~ data.svm.pred + unlist(svm.mod$`Test Data`[1])))
data.rf.pred <- stats::predict(rf.mod$Model, rf.mod$`Test Data`[ , -1])
cm.rf <- caret::confusionMatrix(stats::xtabs(~ data.rf.pred + unlist(rf.mod$`Test Data`[1])))
data.nnet.pred <- stats::predict(nnet.mod$Model, nnet.mod$`Test Data`[ , -1])
cm.nnet <- caret::confusionMatrix(stats::xtabs(~ data.nnet.pred + unlist(nnet.mod$`Test Data`[1])))
data.bcart.pred <- stats::predict(bcart.mod$Model, bcart.mod$`Test Data`[ , -1])
cm.bcart <- caret::confusionMatrix(stats::xtabs(~ data.bcart.pred + unlist(bcart.mod$`Test Data`[1])))
# Apply metrics function to yhat and y from each model
data.knn.metrics <- getMetrics(unlist(knn.mod$`Test Data`[1]), data.knn.pred, classtype = classtype)
data.svm.metrics <- getMetrics(unlist(svm.mod$`Test Data`[1]), data.svm.pred, classtype = classtype)
data.rf.metrics <- getMetrics(unlist(rf.mod$`Test Data`[1]), data.rf.pred, classtype = classtype)
data.nnet.metrics <- getMetrics(unlist(nnet.mod$`Test Data`[1]), data.nnet.pred, classtype = classtype)
data.bcart.metrics <- getMetrics(unlist(bcart.mod$`Test Data`[1]), data.bcart.pred, classtype = classtype)
metrics.models <- cbind(data.knn.metrics$Metrics, data.svm.metrics$Metrics, data.rf.metrics$Metrics, data.nnet.metrics$Metrics, data.bcart.metrics$Metrics)
names(metrics.models) <- c("K-NN", "SVM", "RF", "NNET", "BCART")
if (classtype == 2) {
CM.knn.df <- as.data.frame(c(as.numeric(data.knn.metrics$CM[,1]),as.numeric(data.knn.metrics$CM[,2][c(2,1)])))
CM.svm.df <- as.data.frame(c(as.numeric(data.svm.metrics$CM[,1]),as.numeric(data.svm.metrics$CM[,2][c(2,1)])))
CM.rf.df <- as.data.frame(c(as.numeric(data.rf.metrics$CM[,1]),as.numeric(data.rf.metrics$CM[,2][c(2,1)])))
CM.nnet.df <- as.data.frame(c(as.numeric(data.nnet.metrics$CM[,1]),as.numeric(data.nnet.metrics$CM[,2][c(2,1)])))
CM.bcart.df <- as.data.frame(c(as.numeric(data.bcart.metrics$CM[,1]),as.numeric(data.bcart.metrics$CM[,2][c(2,1)])))
metrics.CM <- cbind(CM.knn.df,CM.svm.df,CM.rf.df,CM.nnet.df,CM.bcart.df)
metrics.CM <- as.data.frame(lapply(metrics.CM[,1:5], as.factor))
row.names(metrics.CM) <- c("True Positive","False Positive","True Negative","False Negative")
names(metrics.CM) <- c("K-NN", "SVM", "RF", "NNET", "BCART")
final.metrics <- rbind(metrics.models,metrics.CM)
}
else if(classtype > 2) {
final.metrics <- metrics.models
}
View(final.metrics)
#return(final.metrics)
x = list(knn.mod = knn.mod,
svm.mod = svm.mod,
rf.mod = rf.mod,
nnet.mod = nnet.mod,
bcart.mod = bcart.mod,
metrics = final.metrics)
return(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.