Nothing
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py
# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details)
#'
# -------------------------- Decision Tree Model in H2O -------------------------- #
#'
#' Build a Decision Tree model
#'
#' Builds a Decision Tree model on an H2OFrame.
#'
#' @param x (Optional) A vector containing the names or indices of the predictor variables to use in building the model.
#' If x is missing, then all columns except y are used.
#' @param y The name or column index of the response variable in the data.
#' The response must be either a numeric or a categorical/factor variable.
#' If the response is numeric, then a regression model will be trained, otherwise it will train a classification model.
#' @param training_frame Id of the training data frame.
#' @param model_id Destination id for this model; auto-generated if not specified.
#' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
#' @param categorical_encoding Encoding scheme for categorical features Must be one of: "AUTO", "Enum", "OneHotInternal", "OneHotExplicit",
#' "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited". Defaults to AUTO.
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
#' Defaults to -1 (time-based random number).
#' @param max_depth Max depth of tree. Defaults to 20.
#' @param min_rows Fewest allowed (weighted) observations in a leaf. Defaults to 10.
#' @return Creates a \linkS4class{H2OModel} object of the right type.
#' @seealso \code{\link{predict.H2OModel}} for prediction
#' @examples
#' \dontrun{
#' library(h2o)
#' h2o.init()
#'
#' # Import the airlines dataset
#' f <- "https://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv"
#' data <- h2o.importFile(f)
#'
#' # Set predictors and response; set response as a factor
#' data["CAPSULE"] <- as.factor(data["CAPSULE"])
#' predictors <- c("AGE","RACE","DPROS","DCAPS","PSA","VOL","GLEASON")
#' response <- "CAPSULE"
#'
#' # Train the DT model
#' h2o_dt <- h2o.decision_tree(x = predictors, y = response, training_frame = data, seed = 1234)
#' }
#' @export
h2o.decision_tree <- function(x,
y,
training_frame,
model_id = NULL,
ignore_const_cols = TRUE,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
seed = -1,
max_depth = 20,
min_rows = 10)
{
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
# Validate other required args
# If x is missing, then assume user wants to use all columns as features.
if (missing(x)) {
if (is.numeric(y)) {
x <- setdiff(col(training_frame), y)
} else {
x <- setdiff(colnames(training_frame), y)
}
}
# Build parameter list to send to model builder
parms <- list()
parms$training_frame <- training_frame
args <- .verify_dataxy(training_frame, x, y)
parms$ignored_columns <- args$x_ignore
parms$response_column <- args$y
if (!missing(model_id))
parms$model_id <- model_id
if (!missing(ignore_const_cols))
parms$ignore_const_cols <- ignore_const_cols
if (!missing(categorical_encoding))
parms$categorical_encoding <- categorical_encoding
if (!missing(seed))
parms$seed <- seed
if (!missing(max_depth))
parms$max_depth <- max_depth
if (!missing(min_rows))
parms$min_rows <- min_rows
# Error check and build model
model <- .h2o.modelJob('dt', parms, h2oRestApiVersion=3, verbose=FALSE)
return(model)
}
.h2o.train_segments_decision_tree <- function(x,
y,
training_frame,
ignore_const_cols = TRUE,
categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
seed = -1,
max_depth = 20,
min_rows = 10,
segment_columns = NULL,
segment_models_id = NULL,
parallelism = 1)
{
# formally define variables that were excluded from function parameters
model_id <- NULL
verbose <- NULL
destination_key <- NULL
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
# Validate other required args
# If x is missing, then assume user wants to use all columns as features.
if (missing(x)) {
if (is.numeric(y)) {
x <- setdiff(col(training_frame), y)
} else {
x <- setdiff(colnames(training_frame), y)
}
}
# Build parameter list to send to model builder
parms <- list()
parms$training_frame <- training_frame
args <- .verify_dataxy(training_frame, x, y)
parms$ignored_columns <- args$x_ignore
parms$response_column <- args$y
if (!missing(ignore_const_cols))
parms$ignore_const_cols <- ignore_const_cols
if (!missing(categorical_encoding))
parms$categorical_encoding <- categorical_encoding
if (!missing(seed))
parms$seed <- seed
if (!missing(max_depth))
parms$max_depth <- max_depth
if (!missing(min_rows))
parms$min_rows <- min_rows
# Build segment-models specific parameters
segment_parms <- list()
if (!missing(segment_columns))
segment_parms$segment_columns <- segment_columns
if (!missing(segment_models_id))
segment_parms$segment_models_id <- segment_models_id
segment_parms$parallelism <- parallelism
# Error check and build segment models
segment_models <- .h2o.segmentModelsJob('dt', segment_parms, parms, h2oRestApiVersion=3)
return(segment_models)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.