R/modsearch.R

##' @title Generate a list of \code{\linkS4class{ROCit}} objects or \code{\linkS4class{RMSEit}} for different datasets
##' @param fit a \code{\linkS4class{train}} object generated by \code{\link{ROCtest}} or \code{\link{RMSEtest}}
##' @param datatype a named character representing the accuracy object be built on either "train" or "test" data, 
##' user can include both
##' @param testdata a list of length two containing a named slot for the matrix of predictors 
##' (pred) and the vector of classes (class)
##' @param modelKeep a logical indicating whether the original model object should be stored, default is FALSE
##' @param ... additional arguments passed to \code{\link{modSearch}}
##' @return a list with the following:
##' \itemize{
##' \item{method - the \code{\link{train}} method used to fit the model}
##' \item{summaryTr - the \code{\linkS4class{ROCit}} or \code{\linkS4class{RMSEit}} for the training data}
##' \item{summaryTe - the \code{\linkS4class{ROCit}} \code{\linkS4class{RMSEit}} for the test data}
##' \item{time - the time reported for the model to run, taken from the \code{\link{train}} object if available}
##' \item{metricSD - the standard deviation of the metric for the best model reported by train}
##' }
##' @note The values presented are for the optimal threshold as computed by the \code{\link{roc}} function for ROC objects.
##' @export
modAcc <- function(fit, datatype = c("test", "train"), testdata = NULL, modelKeep = FALSE, ...){
  if (missing(modelKeep)){
    modelKeep <- FALSE
  }
  if(!exists("metric", where = fit)){
    if(class(fit)[1] == "glm" | class(fit)[1] == "lm"){
      if(length(unique(fit$y)) == 2){
        fit$metric <- "ROC"
        warning("ROC selected by default. To use another, refit model with train")
      } else{
        fit$metric <- "RMSE"
      }
    }
  }
  if(missing(datatype)){
    datatype <- "train"
    message("Training data only being used. Specify datatype = 'test' and give test data to testdata.")
  }
  if("test" %in% datatype & missing(testdata)){
    stop("Please provide testdata")
  }
  if(fit$metric == "ROC"){
    if(class(fit)[1] == "train"){
      SD <- fit$results$ROCSD[fit$results$ROC == max(fit$results$ROC)]
    } else{
      message("Metric SD not available for glm fit by glm. Try fitting by train")
      SD <- NA
    }
    if (length(datatype) > 1){
      train <- ROCtest(fit, ...)
      test <-  ROCtest(fit, testdata=list(preds = testdata$preds, 
                                          class = testdata$class), ...)    
    } else if(length(datatype) < 2 & datatype=="test"){
      test <- ROCtest(fit, testdata=list(preds = testdata$preds, 
                                         class = testdata$class), ...)
      train <- NULL
    } else if (length(datatype) < 2 & datatype=="train"){
      train <- ROCtest(fit, ...)
      test <- NULL
    }
  } else if(fit$metric == "Dist"){
    if(class(fit)[1] == "train"){
      SD <- fit$results$DistSD[fit$results$Dist == min(fit$results$Dist)]
    } else{
      message("Metric SD not available for glm fit by glm. Try fitting by train")
      SD <- NA
    }
    if (length(datatype) > 1){
      train <- DIStest(fit, ...)
      test <-  DIStest(fit, testdata=list(preds = testdata$preds, 
                                          class = testdata$class), ...)    
    } else if(length(datatype) < 2 & datatype=="test"){
      test <- DIStest(fit, testdata=list(preds = testdata$preds, 
                                         class = testdata$class), ...)
      train <- NULL
    } else if (length(datatype) < 2 & datatype=="train"){
      train <- DIStest(fit, ...)
      test <- NULL
    }
  } else if(fit$metric == "RMSE"){
    if(class(fit)[1] == "train"){
      SD <- fit$results$"RMSE SD"[fit$results$RMSE == max(fit$results$RMSE)]
    } else{
      message("Metric SD not available for glm fit by glm. Try fitting by train")
      SD <- NA
    }
    if (length(datatype) > 1){
      train <- RMSEtest(fit)
      test <-  RMSEtest(fit, testdata=list(preds = testdata$preds, 
                                             class = testdata$class))    
    } else if(length(datatype) < 2 & datatype=="test"){
      test <- RMSEtest(fit, testdata=list(preds = testdata$preds, 
                                            class = testdata$class))
      train <- NULL
    } else if (length(datatype) < 2 & datatype=="train"){
      train <- RMSEtest(fit)
      test <- NULL
    }
  }
  if(modelKeep == TRUE){
    return(list(model=fit, summaryTr = train, summaryTe = test, method=fit$method, 
                time = fit$times$everything[3], metric = fit$metric, metricSD = SD))
  } else if(modelKeep == FALSE){
    return(list(method=fit$method, summaryTr = train, summaryTe = test, 
                time = fit$times$everything[3], metric = fit$metric, metricSD = SD))
  }
}

##' @title Generate a dataframe from \code{\link{modAcc}} lists
##' @description Used for generating the data to make good looking RMSE plots for 
##' training and test data simultaneously
##' @param mod a list resulting from a call to \code{\link{modAcc}}
##' @return a \code{\link{data.frame}} with the following columns:
##' \itemize{
##' \item{RMSE - the RMSE for the model on the test or training data}
##' \item{RMSEsd - the standard deviation of the RMSE reported by training with caret}
##' \item{Rsquare - The R squared reported for the best-fit in caret}
##' \item{RsquareSD - the model method}
##' \item{method - the area under the curve}
##' \item{grp - the area under the curve}
##' \item{elapsedTime - the time reported for the model to run}
##' }
##' @note The measures come from the \code{\linkS4class{RMSEit}} object
##' @export 
dfExtractRMSE <- function(mod){
  if(class(mod$summaryTr) != "NULL"){
    newdatTr <- data.frame(RMSE= mod$summaryTr@RMSE, 
                           RMSEsd = max(mod$summaryTr@bestFit$RMSESD), 
                           Rsquare = max(mod$summaryTr@bestFit$Rsquared), 
                           RsquareSD = max(mod$summaryTr@bestFit$RsquaredSD), 
                           method = mod$method, 
                           grp = "train", 
                           elapsedTime = ifelse(is.null(mod$time), NA, mod$time), 
                           check.rows = FALSE, row.names = NULL)
  }
  if(class(mod$summaryTe) != "NULL"){
    newdatTe <- data.frame(RMSE= mod$summaryTe@RMSE, 
                           RMSEsd = max(mod$summaryTe@bestFit$RMSESD), 
                           Rsquare = max(mod$summaryTe@bestFit$Rsquared), 
                           RsquareSD = max(mod$summaryTe@bestFit$RsquaredSD), 
                           method = mod$method, 
                           grp = "test", 
                           elapsedTime = ifelse(is.null(mod$time), NA, mod$time), 
                           check.rows = FALSE, row.names = NULL)
  }
  if(class(mod$summaryTr) != "NULL" & class(mod$summaryTe) != "NULL"){
    tmp <- rbind(newdatTr, newdatTe)
  } else if(class(mod$summaryTr) != "NULL"){
    tmp <- newdatTr
  } else if(class(mod$summaryTe) != "NULL"){
    tmp <- newdatTe
  }
  tmp$RMSE <- as.numeric(tmp$RMSE)
  tmp$RMSEsd <- as.numeric(tmp$RMSEsd)
  tmp$Rsquare <- as.numeric(tmp$Rsquare)
  tmp$RsquareSD <- as.numeric(tmp$RsquareSD)
  tmp$method <- as.character(tmp$method)
  tmp$grp <- as.character(tmp$grp)
  tmp$elapsedTime <- as.numeric(tmp$elapsedTime)
  return(tmp)
}

##' @title Generate a dataframe from \code{\link{modAcc}} lists
##' @description Used for generating dataframes of distance fits for binary 
##' classification models on training and test data
##' @param mod a list resulting from a call to \code{\link{modAcc}}
##' @return a \code{\link{data.frame}} with the following columns:
##' \itemize{
##' \item{DIST - the distance from perfect classification}
##' \item{DISTSD - the standard deviation of the distance}
##' \item{SENS - The sensitivity of the classifier}
##' \item{SPEC - The specificity of the classifier}
##' \item{method - the area under the curve}
##' \item{grp - the area under the curve}
##' \item{elapsedTime - the time reported for the model to run}
##' }
##' @note The measures come from the \code{\linkS4class{DISit}} object
##' @export 
dfExtractDIS <- function(mod){
  if(class(mod$summaryTr) != "NULL"){
    newdatTr <- data.frame(DIST= mod$summaryTr@dist, 
                           DISTSD = mod$metricSD, 
                           SENS = mod$summaryTr@confusematrix$byClass[["Sensitivity"]], 
                           SPEC = mod$summaryTr@confusematrix$byClass[["Specificity"]], 
                           method = mod$method, 
                           grp = "train", 
                           elapsedTime = ifelse(is.null(mod$time), NA, mod$time), 
                           check.rows = FALSE, row.names = NULL)
  }
  if(class(mod$summaryTe) != "NULL"){
    newdatTe <- data.frame(DIST= mod$summaryTe@dist, 
                           DISTSD = mod$metricSD, 
                           SENS = mod$summaryTe@confusematrix$byClass[["Sensitivity"]], 
                           SPEC = mod$summaryTe@confusematrix$byClass[["Specificity"]], 
                           method = mod$method, 
                           grp = "test", 
                           elapsedTime = ifelse(is.null(mod$time), NA, mod$time), 
                           check.rows = FALSE, row.names = NULL)
  }
  if(class(mod$summaryTr) != "NULL" & class(mod$summaryTe) != "NULL"){
    tmp <- rbind(newdatTr, newdatTe)
  } else if(class(mod$summaryTr) != "NULL"){
    tmp <- newdatTr
  } else if(class(mod$summaryTe) != "NULL"){
    tmp <- newdatTe
  }
  tmp$DIST <- as.numeric(tmp$DIST)
  tmp$DISTSD <- as.numeric(tmp$DISTSD)
  tmp$SENS <- as.numeric(tmp$SENS)
  tmp$SPEC <- as.numeric(tmp$SPEC)
  tmp$method <- as.character(tmp$method)
  tmp$grp <- as.character(tmp$grp)
  tmp$elapsedTime <- as.numeric(tmp$elapsedTime)
  tmp <- tmp[!duplicated(tmp)]
  return(tmp)
}

##'@title Generate a dataframe from \code{\link{modAcc}} lists
##'@description Generic function to extract either ROC data or RMSE data from models
##'@param mod a list resulting from a call to \code{\link{modAcc}}
##'@return a data.frame with columns relating to RMSE or ROC fits
##'@note See dfExtractRMSE or dfExtractROC for details
##'@export
dfExtract <- function(mod){
  if(mod$metric == "ROC"){
    tmp <- dfExtractROC(mod)
  } else if(mod$metric == "RMSE"){
    tmp <- dfExtractRMSE(mod)
  } else if(mod$metric == "Dist"){
    tmp <- dfExtractDIS(mod)
  }
  return(tmp)
}


##' @title Generate a dataframe from \code{\link{modAcc}} lists
##' @description Used for generating the data to make good looking ROC curves of 
##' training and test data.
##' @param mod a list resulting from a call to \code{\link{modAcc}}
##' @return a \code{\link{data.frame}} with the following columns:
##' \itemize{
##' \item{sens - the sensitivities of the model at various thresholds}
##' \item{spec - the specificities of the model at various thresholds}
##' \item{grp - whether the model is using training or test data}
##' \item{method - the model method}
##' \item{auc - the area under the curve}
##' \item{aucSD - the standard deviation of the AUC for the best model reported by train}
##' \item{elapsedTime - the time reported for the model to run}
##' }
##' @note The sensitivities and specificities come from the \code{\link{roc}} object stored in the 
##' \code{\linkS4class{ROCit}} object
##' @export
dfExtractROC <- function(mod){
  if(class(mod$summaryTr) != "NULL"){
      newdatB <- data.frame(sens = as.numeric(mod$summaryTr@rocobj$sensitivities), 
                            spec = as.numeric(mod$summaryTr@rocobj$specificities), 
                            grp="train", 
                            auc = mod$summaryTr@auc,
                            aucSD = mod$metricSD,
                            method = ifelse(is.null(mod$method), NA, mod$method),
                            elapsedTime = ifelse(is.null(mod$time), NA, mod$time), 
                            check.rows=FALSE, 
                            row.names=NULL)
      }
    if(class(mod$summaryTe) != "NULL"){
      newdatA <- data.frame(sens = as.numeric(mod$summaryTe@rocobj$sensitivities), 
                            spec = as.numeric(mod$summaryTe@rocobj$specificities), 
                            grp="test",
                            auc  = mod$summaryTe@auc,
                            aucSD = mod$metricSD,
                            method = ifelse(is.null(mod$method), NA, mod$method), 
                            elapsedTime =ifelse(is.null(mod$time), NA, mod$time), 
                            check.rows=FALSE, 
                            row.names=NULL)
      }    
    if(class(mod$summaryTr) != "NULL" & class(mod$summaryTe) != "NULL"){
      tmp <- rbind(newdatA, newdatB)
    } else if(class(mod$summaryTr) != "NULL"){
      tmp <- newdatB
    } else if(class(mod$summaryTe) != "NULL"){
      tmp <- newdatA
    }
    tmp$sens <- as.numeric(tmp$sens)
    tmp$spec <- as.numeric(tmp$spec)
    tmp$auc <- as.numeric(tmp$auc)
    tmp$method <- as.character(tmp$method)
    tmp$auc <- as.numeric(tmp$auc)
    tmp$grp <- as.character(tmp$grp)
    tmp$aucSD <- as.numeric(tmp$aucSD)
    tmp$elapsedTime <- as.numeric(tmp$elapsedTime)
    tmp <- tmp[, c("sens", "spec", "auc", "aucSD","method", "grp", "elapsedTime")]
    return(tmp)
}

##' @title Train a model and store \code{\linkS4class{ROCit}} tests on different datasets
##' @description This function wraps the \code{train} function in the \code{caret} package with model accuracy reports. 
##' It also allows for errors in fitting models to be caught to make it easier to use in a loop. 
##' @param method a a string specifying which classification or regression model to use. Possible values are found using \code{names(getModelInfo())}.
##' @param datatype a named character representing the accuracy object be built on either "train" or "test" data, 
##' user can include both
##' @param traindata a list of length two containing a named slot for the matrix of predictors 
##' (pred) and the vector of classes (class)
##' @param testdata a list of length two containing a named slot for the matrix of predictors 
##' (pred) and the vector of classes (class)
##' @param modelKeep a logical indicating whether the original model object should be stored
##' @param length an integer denoting the number of levels of each tuning parameter 
##' that should be generated to be passed to \code{tuneLength} in the \code{train} call
##' @param fitControl an object generated by \code{trainControl} to control the
##'  behavior of \code{train}. If none is given a default is selected.
##' @param metric a character string passed to \code{train}. a string that specifies what 
##' summary metric will be used to select the optimal model. By default, possible 
##' values are "RMSE" and "Rsquared" for regression and "Accuracy" and "Kappa" for 
##' classification. If custom performance metrics are used (via the \code{summaryFunction}
##' argument in trainControl, the value of metric should match one of the arguments. 
##' If it does not, a warning is issued and the first metric given by the 
##' summaryFunction is used. 
##' @param cores An integer representing the number of cores to use on Windows. If not on windows, a warning is issued. 
##' @param ... Additional arguments to be passed to \code{\link{train}}
##' @return A character string with an error if unsuccessful. The result of the \code{modAcc} call if successful: 
##' \itemize{
##' \item{method - the \code{\link{train}} method used to fit the model}
##' \item{summaryTr - the \code{\linkS4class{ROCit}} for the training data}
##' \item{summaryTe - the \code{\linkS4class{ROCit}} for the test data}
##' \item{time - the time reported for the model to run, taken from the \code{\link{train}} object if available}
##' }
##' @note The values presented are for the optimal threshold as computed by the \code{\link{roc}} function.
##' For some model types linear combos of predictors may be omitted.
##' @export
##' @importFrom parallel makeCluster
##' @importFrom parallel stopCluster
##' @importFrom doParallel stopImplicitCluster
##' @importFrom doParallel registerDoParallel
##' @import caret
modTest <- function(method, datatype=c("train", "test"), traindata, 
                    testdata=NULL, 
                      modelKeep=FALSE, length = NULL, fitControl = NULL, 
                    metric = NULL, cores = NULL, ...){
  args <- eval(substitute(alist(...)))
  args <- lapply(args, eval, parent.frame())
    if("omit" %in% names(args)){
      stop("Cannot omit an index of variables. Instead see caret::findLinearCombos")
    }
  # Let's dump out some defaults
  # Set up cores for Windows
  if("cores" %in% names(args)){
    cores <- args$cores
  }
  if(!missing(cores) & !is.null(cores)){
    myOS <- Sys.info()['sysname']
    if(myOS!="Windows"){  
      warning("Only declare cores on Windows machines. On Linux 
                             you can declare parallel outside of the modTest 
                             or modSearch call.")
    } else {
      myclus <- parallel::makeCluster(cores)
      doParallel::registerDoParallel(myclus)
    } 
  }
  datD <- c('rda', 'lda2', 'hda', 'mda', 'mlp', 'mlpWeightDecay', 
            'rbf', 'rpart2', 'C5.0Rules', 'pda2', 'rda', 'glm',
            'treebag', 'rf', 'plr', 'lda', 'xyf', 'sddaLDA', 'sddaQDA', 
            'LogitBoost', 'C5.0', 'bag', 'C5.0Tree')
  if(method %in% datD & metric %in% c("ROC", "Dist")){
    omit <- findLinearCombos(traindata$preds)$remove
    cols <- 1:ncol(traindata$preds)
    keep <- cols[!cols %in% omit]
  } else if(metric == "RMSE"){
    omit <- findLinearCombos(traindata$preds)$remove
    cols <- 1:ncol(traindata$preds)
    keep <- cols[!cols %in% omit]
  } else{
    keep <-  1:ncol(traindata$preds)
  }
  
  callList <- list("method" = method, 
                  "trControl" = quote(fitControl), 
                   "tuneLength" = length, 
                   "metric" = metric, 
                   "x" = quote(traindata$preds[, keep]), 
                   "y" = quote(traindata$class))
  if(!is.null(names(args))){
    callList <- c(callList, args)
    callList <- Filter(Negate(is.null), callList)
  }
    fit <- tryCatch({
      do.call("train", callList)},
       error = function(e) 
              message(paste0("Model failed to run: ", method)))
  # multicore
  if(!missing(cores) & !is.null(cores)){
    if(myOS == "Windows"){
      try(parallel::stopCluster(myclus))
      try(doParallel::stopImplicitCluster())
    }
  }
  if(class(fit) == "character"){
    message(paste0("Model failed to run: ", method))
    
  } else if(class(fit) == "train"){
    callList2 <- list("fit" = quote(fit), 
                      "datatype" = datatype, 
                      "testdata" = quote(list(preds = testdata$preds[, keep], 
                                              class = testdata$class )),
                      "modelKeep" = modelKeep)
    
    fitSum <- do.call(modAcc, callList2)
    } 
  return(fitSum)
}

##' @title Generate an empty dataframe to match \code{\link{modAcc}} lists
##' @description Used for generating the data to make good looking ROC curves of 
##' training and test data.
##' @param methods a list of \code{train} method names to generate the dataframe for
##' @return a \code{\link{data.frame}} with the following columns:
##' \itemize{
##' \item{sens - the sensitivities of the model at various thresholds}
##' \item{spec - the specificities of the model at various thresholds}
##' \item{auc - the area under the curve}
##' \item{auc - the standard deviation of the AUC for the best method reported by train}
##' \item{method - the model method}
##' \item{grp - whether the model is using training or test data}
##' \item{elapsedTime - the time reported for the model to run}
##' }
##' @note The sensitivities and specificities come from the \code{\link{roc}} object stored in the 
##' \code{\linkS4class{ROCit}} object
buildROCcurveFrame <- function(methods){
  ModelFits <- expand.grid(sens = NA, spec = NA,  auc = NA, aucSD = NA,
                           method = rep(methods, each = 1028), grp = NA,
                           elapsedTime = NA)
  
  # Class variables correctly to avoid errors
  ModelFits$grp <- as.character(ModelFits$grp)
  ModelFits$method <- as.character(ModelFits$method)
  ModelFits$sens <- as.numeric(ModelFits$sens)
  ModelFits$spec <- as.numeric(ModelFits$spec)
  ModelFits$auc <- as.numeric(ModelFits$auc)
  ModelFits$aucSD <- as.numeric(ModelFits$aucSD)
  return(ModelFits)
  
}

##' @title Generate an empty dataframe to match \code{\link{modAcc}} lists
##' @description Used for generating the data to make comparative RMSE plots.
##' @param methods a list of \code{train} method names to generate the dataframe for
##' @return a \code{\link{data.frame}} with the following columns:
##' \itemize{
##' \item{RMSE - the RMSE for the model on the test or training data}
##' \item{RMSEsd - the standard deviation of the RMSE reported by training with caret}
##' \item{Rsquare - The R squared reported for the best-fit in caret}
##' \item{RsquareSD - the model method}
##' \item{method - the area under the curve}
##' \item{grp - the area under the curve}
##' \item{elapsedTime - the time reported for the model to run}
##' }
##' @note The measures come from the \code{\linkS4class{RMSEit}} object
buildRMSEFrame <- function(methods){
  ModelFits <- expand.grid(RMSE = NA, RMSEsd = NA, Rsquare = NA, RsquareSD = NA, 
                           method = rep(methods, each = 1), grp = NA, 
                           elapsedTime = NA)
  # Class variables correctly to avoid errors
  ModelFits$grp <- as.character(ModelFits$grp)
  ModelFits$method <- as.character(ModelFits$method)
  ModelFits$RMSE <- as.numeric(ModelFits$RMSE)
  ModelFits$RMSEsd <- as.numeric(ModelFits$RMSEsd)
  ModelFits$Rsquare <- as.numeric(ModelFits$Rsquare)
  ModelFits$RsquareSD <- as.numeric(ModelFits$RsquareSD)
  return(ModelFits)
  
}

##' @title Generate an empty dataframe to match \code{\link{modAcc}} lists
##' @description Used for generating the data to make comparative distance statistics of binary classifiers.
##' @param methods a list of \code{train} method names to generate the dataframe for
##' @return a \code{\link{data.frame}} with the following columns:
##' \itemize{
##' \item{DIST - the distance from perfect classification}
##' \item{DISTSD - the standard deviation of the distance}
##' \item{SENS - The sensitivity of the classifier}
##' \item{SPEC - The specificity of the classifier}
##' \item{method - the area under the curve}
##' \item{grp - the area under the curve}
##' \item{elapsedTime - the time reported for the model to run}
##' }
##' @note The measures come from the \code{\linkS4class{DISit}} object
buildDISFrame <- function(methods){
  ModelFits <- expand.grid(DIST = NA, DISTSD = NA, SENS = NA, SPEC = NA, 
                           method = rep(methods, each = 1), grp = NA, 
                           elapsedTime = NA)
  # Class variables correctly to avoid errors
  ModelFits$grp <- as.character(ModelFits$grp)
  ModelFits$method <- as.character(ModelFits$method)
  ModelFits$DIST <- as.numeric(ModelFits$DIST)
  ModelFits$DISTSD <- as.numeric(ModelFits$DISTSD)
  ModelFits$SENS <- as.numeric(ModelFits$SENS)
  ModelFits$SPEC <- as.numeric(ModelFits$SPEC)
  return(ModelFits)
  
}

##' @title Generate an empty dataframe to match \code{\link{modAcc}} lists
##' @description Used for generating the data to make good looking ROC curves of 
##' training and test data.
##' @param methods a list of \code{train} method names to generate the dataframe for
##' @param ... additional arguments passed to \code{\link{modTest}}
##' @return a \code{\link{data.frame}} with the following columns:
##' \itemize{
##' \item{sens - the sensitivities of the model at various thresholds}
##' \item{spec - the specificities of the model at various thresholds}
##' \item{grp - whether the model is using training or test data}
##' \item{auc - the area under the curve}
##' \item{method - the model method}
##' \item{elapsedTime - the time reported for the model to run}
##' }
##' @note Currently the arguments passed to modSearch are evaluated in the parent frame. 
##' This means that you cannot pass list elements or data.frame elements to the parameters 
##' of modSearch that get passed along to \code{\link{modTest}} via \code{...}. 
##' Instead, you need to declare them as separate variables in the parent 
##' environment and save them there. 
##' @details The sensitivities and specificities come from the \code{\link{roc}} object stored in the 
##' \code{\linkS4class{ROCit}} object
##' @export
##' @import caret
modSearch <- function(methods, ...){
  # parse ellipsis for modTest
  args <- eval(substitute(alist(...)))
  args <- lapply(args, eval, parent.frame())
  # sanitize arguments
  if(exists("metric", args)){
      metric <- args$metric
    } else {
    warning("No metric defined, default will be ROC")
    args$metric <- "ROC"
    metric <- "ROC"
    }
  if(exists("datatype", args)){
      datatype <- args$datatype
    } else {
    warning("Parameter datatype is undefined, default is training")
    args$datatype <- "train"
    datatype <- "train"
  }
  if(exists("length", args)){
    length <- args$length
    } else {
    warning("Parameter length is undefined, default is 4 for tuneLength")
    args$length <- 4
    length <- 4
  }
  if(exists("cores", args)){
    cores <- args$cores
    } else {
    message("Cores is not defined. To run in parallel define cores or construct parallel outside of function call.")
    cores <- NULL
  }
  if(metric == "ROC"){
    if(length(datatype) > 1){
      ModelFits <-rbind(buildROCcurveFrame(methods), buildROCcurveFrame(methods))
    } else{
      ModelFits <- buildROCcurveFrame(methods)
    }
  } else if(metric == "RMSE"){
    if(length(datatype) > 1){
      ModelFits <- rbind(buildRMSEFrame(methods), buildRMSEFrame(methods))
    } else {
      ModelFits <- buildRMSEFrame(methods)
    }
    
  } else if(metric == "Dist"){
    if(length(datatype) > 1){
      ModelFits <- rbind(buildDISFrame(methods), buildDISFrame(methods))
    } else {
      ModelFits <- buildDISFrame(methods)
    }
  } else{
    stop("No custom performance frame defined for metric")
  }
  pb <- txtProgressBar(min = 0, max = length(methods), style = 3)
  for(i in methods){
    message(paste0("Started method ", i, " at ", Sys.time(), "."))
    processStartTime <- Sys.time()
    p <- match(i, methods)
    z <- list(method = i)
    z <- c(z, args)
    fit <- try(do.call(modTest, z), silent = TRUE)
    tmp <- tryCatch(dfExtract(fit), error = function(e) "No Model Ran")
    #
    if(class(tmp) == "data.frame"){
      ModelFits <- ModelFits[ModelFits$method != i, ] 
      ModelFits <- rbind(ModelFits, tmp)
      rm(tmp)
    } else{
      ModelFits <- ModelFits
      message(paste(tmp, "failure for model type:", i, sep=" "))
    }
      message(paste0("Processed method ", i, " for ", 
      round(as.numeric(difftime(Sys.time(), processStartTime, u = 'mins')), 2),
      " minutes."))
      rm(processStartTime)
      setTxtProgressBar(pb, p)      
  }
  ModelFits <- ModelFits[!duplicated(ModelFits),] # drop duplicates
  return(ModelFits)
}
jknowles/EWStools documentation built on May 19, 2019, 11:42 a.m.