R/aggregator.R

Defines functions h2o.aggregated_frame .h2o.train_segments_aggregator h2o.aggregator

Documented in h2o.aggregated_frame h2o.aggregator

# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details) 
#'
# -------------------------- H2O Aggregator Model -------------------------- #
#'
#' Build an Aggregated Frame
#' 
#' Builds an Aggregated Frame of an H2OFrame.
#'
#' @param training_frame Id of the training data frame.
#' @param x A vector containing the \code{character} names of the predictors in the model.
#' @param model_id Destination id for this model; auto-generated if not specified.
#' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
#' @param target_num_exemplars Targeted number of exemplars Defaults to 5000.
#' @param rel_tol_num_exemplars Relative tolerance for number of exemplars (e.g, 0.5 is +/- 50 percents) Defaults to 0.5.
#' @param transform Transformation of training data Must be one of: "NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE".
#'        Defaults to NORMALIZE.
#' @param categorical_encoding Encoding scheme for categorical features Must be one of: "AUTO", "Enum", "OneHotInternal", "OneHotExplicit",
#'        "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited". Defaults to AUTO.
#' @param save_mapping_frame \code{Logical}. Whether to export the mapping of the aggregated frame Defaults to FALSE.
#' @param num_iteration_without_new_exemplar The number of iterations to run before aggregator exits if the number of exemplars collected didn't change
#'        Defaults to 500.
#' @param export_checkpoints_dir Automatically export generated models to this directory.
#' @examples
#' \dontrun{
#' library(h2o)
#' h2o.init()
#' df <- h2o.createFrame(rows = 100, 
#'                       cols = 5, 
#'                       categorical_fraction = 0.6, 
#'                       integer_fraction = 0,
#'                       binary_fraction = 0, 
#'                       real_range = 100, 
#'                       integer_range = 100, 
#'                       missing_fraction = 0)
#' target_num_exemplars = 1000
#' rel_tol_num_exemplars = 0.5
#' encoding = "Eigen"
#' agg <- h2o.aggregator(training_frame = df,
#'                      target_num_exemplars = target_num_exemplars,
#'                      rel_tol_num_exemplars = rel_tol_num_exemplars,
#'                      categorical_encoding = encoding)
#' }
#' @export
h2o.aggregator <- function(training_frame,
                           x,
                           model_id = NULL,
                           ignore_const_cols = TRUE,
                           target_num_exemplars = 5000,
                           rel_tol_num_exemplars = 0.5,
                           transform = c("NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE"),
                           categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
                           save_mapping_frame = FALSE,
                           num_iteration_without_new_exemplar = 500,
                           export_checkpoints_dir = NULL)
{
  # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
  training_frame <- .validate.H2OFrame(training_frame, required=TRUE)

  # Build parameter list to send to model builder
  parms <- list()
  parms$training_frame <- training_frame
  if(!missing(x))
    parms$ignored_columns <- .verify_datacols(training_frame, x)$cols_ignore

  if (!missing(model_id))
    parms$model_id <- model_id
  if (!missing(ignore_const_cols))
    parms$ignore_const_cols <- ignore_const_cols
  if (!missing(target_num_exemplars))
    parms$target_num_exemplars <- target_num_exemplars
  if (!missing(rel_tol_num_exemplars))
    parms$rel_tol_num_exemplars <- rel_tol_num_exemplars
  if (!missing(transform))
    parms$transform <- transform
  if (!missing(categorical_encoding))
    parms$categorical_encoding <- categorical_encoding
  if (!missing(save_mapping_frame))
    parms$save_mapping_frame <- save_mapping_frame
  if (!missing(num_iteration_without_new_exemplar))
    parms$num_iteration_without_new_exemplar <- num_iteration_without_new_exemplar
  if (!missing(export_checkpoints_dir))
    parms$export_checkpoints_dir <- export_checkpoints_dir

  # Error check and build model
  model <- .h2o.modelJob('aggregator', parms, h2oRestApiVersion=99, verbose=FALSE)

  model@model$aggregated_frame_id <- model@model$output_frame$name
  return(model)
}
.h2o.train_segments_aggregator <- function(training_frame,
                                           x,
                                           ignore_const_cols = TRUE,
                                           target_num_exemplars = 5000,
                                           rel_tol_num_exemplars = 0.5,
                                           transform = c("NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE"),
                                           categorical_encoding = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited"),
                                           save_mapping_frame = FALSE,
                                           num_iteration_without_new_exemplar = 500,
                                           export_checkpoints_dir = NULL,
                                           segment_columns = NULL,
                                           segment_models_id = NULL,
                                           parallelism = 1)
{
  # formally define variables that were excluded from function parameters
  model_id <- NULL
  verbose <- NULL
  destination_key <- NULL
  # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
  training_frame <- .validate.H2OFrame(training_frame, required=TRUE)

  # Build parameter list to send to model builder
  parms <- list()
  parms$training_frame <- training_frame
  if(!missing(x))
    parms$ignored_columns <- .verify_datacols(training_frame, x)$cols_ignore

  if (!missing(ignore_const_cols))
    parms$ignore_const_cols <- ignore_const_cols
  if (!missing(target_num_exemplars))
    parms$target_num_exemplars <- target_num_exemplars
  if (!missing(rel_tol_num_exemplars))
    parms$rel_tol_num_exemplars <- rel_tol_num_exemplars
  if (!missing(transform))
    parms$transform <- transform
  if (!missing(categorical_encoding))
    parms$categorical_encoding <- categorical_encoding
  if (!missing(save_mapping_frame))
    parms$save_mapping_frame <- save_mapping_frame
  if (!missing(num_iteration_without_new_exemplar))
    parms$num_iteration_without_new_exemplar <- num_iteration_without_new_exemplar
  if (!missing(export_checkpoints_dir))
    parms$export_checkpoints_dir <- export_checkpoints_dir

  # Build segment-models specific parameters
  segment_parms <- list()
  if (!missing(segment_columns))
    segment_parms$segment_columns <- segment_columns
  if (!missing(segment_models_id))
    segment_parms$segment_models_id <- segment_models_id
  segment_parms$parallelism <- parallelism

  # Error check and build segment models
  segment_models <- .h2o.segmentModelsJob('aggregator', segment_parms, parms, h2oRestApiVersion=99)
  return(segment_models)
}


#' Retrieve an aggregated frame from an Aggregator model
#'
#' Retrieve an aggregated frame from the Aggregator model and use it to create a new frame.
#'
#' @param model an \linkS4class{H2OClusteringModel} corresponding from a \code{h2o.aggregator} call.
#' @examples
#' \dontrun{
#' library(h2o)
#' h2o.init()
#' df <- h2o.createFrame(rows = 100, 
#'                       cols = 5, 
#'                       categorical_fraction = 0.6, 
#'                       integer_fraction = 0,
#'                       binary_fraction = 0, 
#'                       real_range = 100, 
#'                       integer_range = 100, 
#'                       missing_fraction = 0)
#' target_num_exemplars = 1000
#' rel_tol_num_exemplars = 0.5
#' encoding = "Eigen"
#' agg <- h2o.aggregator(training_frame = df,
#'                      target_num_exemplars = target_num_exemplars,
#'                      rel_tol_num_exemplars = rel_tol_num_exemplars,
#'                      categorical_encoding = encoding)
#' # Use the aggregated frame to create a new dataframe
#' new_df <- h2o.aggregated_frame(agg)
#' }
#' @export
h2o.aggregated_frame <- function(model) {
  key <- model@model$aggregated_frame_id
  h2o.getFrame(key)
}

Try the h2o package in your browser

Any scripts or data that you put into this service are public.

h2o documentation built on Aug. 9, 2023, 9:06 a.m.