R/ranger_train_predict.R

#' Train and get predictions from a 2-class random forest classifier (`ranger` object).
#'
#' @param train_df Training data.frame.
#' @param target_var Name of the target variable.
#' @param params Parameters passed to the `ranger` training function.
#' @param eval_df Evaluation data.frame.
#' @param ... Additional parameters passed to the `ranger` training function.
#'
#' @return The vector of predictions.
#' @importFrom  ranger ranger
#' @importFrom dplyr coalesce
#' @importFrom purrr "%||%"
#' @note Only binary classification is currently supported.
ranger_classifier <- 
  function(
    train_df, 
    target_var, 
    params, 
    eval_df, 
    ...){
    
    args <- list(...)
    
    if('num.trees' %in% names(args)) params$num.trees <- args$num.trees
    if('mtry' %in% names(args)) params$mtry <- args$mtry
    if('min.node.size' %in% names(args)) params$min.node.size <- args$min.node.size
    if('sample.fraction' %in% names(args)) params$sample.fraction <- args$sample.fraction
    
    train_df[[target_var]] <- factor(train_df[[target_var]])
    
    model <- 
      ranger(
        data = train_df,
        dependent.variable.name = target_var,
        num.trees = params[['num.trees']] %||% 50,
        mtry = pmin(params[['mtry']], ncol(train_df)),
        min.node.size = pmax((params[['min.node.size']] %||% 10), 1),
        sample.fraction = params[['sample.fraction']] %||% 1,
        probability = TRUE)
  
    # FIX THIS: Here we assume this is binary classification.
    predict(model, eval_df)$predictions[, '1']
}



#' Train and get predictions from a random forest regressor (`ranger` object).
#'
#' @param train_df Training data.frame.
#' @param target_var Name of the target variable.
#' @param params Parameters passed to the `ranger` training function.
#' @param eval_df Evaluation data.frame.
#' @param ... Additional parameters passed to the `ranger` training function.
#'
#' @return The vector of predictions.
#' @importFrom  ranger ranger
#' @importFrom dplyr coalesce
#' @importFrom purrr "%||%"
#' @note Only binary classification is currently supported.
ranger_regressor <- 
  function(
    train_df, 
    target_var, 
    params = list(num.trees = 50), 
    eval_df, 
    ...){
    
    if(is.factor(train_df[[target_var]])){
      stop('Ranger regressor used, but target variable is a factor.')
    }
    
    args <- list(...)
    
    if('num.trees' %in% names(args)) params$num.trees <- args$num.trees
    if('mtry' %in% names(args)) params$mtry <- args$mtry
    if('min.node.size' %in% names(args)) params$min.node.size <- args$min.node.size
    if('sample.fraction' %in% names(args)) params$sample.fraction <- args$sample.fraction
    
    train_df <- as.data.frame(train_df)
    
    model <- 
      ranger(
        data = train_df,
        dependent.variable.name = target_var,
        num.trees = params[['num.trees']] %||% 50,
        mtry = params[['mtry']],
        min.node.size = pmax((params[['min.node.size']] %||% 10), 1),
        sample.fraction = params[['sample.fraction']] %||% 1)

    predict(model, eval_df)$predictions
  }
artichaud1/cook documentation built on May 21, 2019, 9:23 a.m.