R/svd.R
In h2o: R Interface for the 'H2O' Scalable Machine Learning Platform

Documented in h2o.svd

# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py
# Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details) 
#'
# -------------------------- Singular Value Decomposition -------------------------- #
#'
#' Singular value decomposition of an H2O data frame using the power method
#'
#' @param training_frame Id of the training data frame.
#' @param x A vector containing the \code{character} names of the predictors in the model.
#' @param destination_key (Optional) The unique key assigned to the resulting model.
#'        Automatically generated if none is provided.
#' @param model_id Destination id for this model; auto-generated if not specified.
#' @param validation_frame Id of the validation data frame.
#' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
#' @param score_each_iteration \code{Logical}. Whether to score during each iteration of model training. Defaults to FALSE.
#' @param transform Transformation of training data Must be one of: "NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE".
#'        Defaults to NONE.
#' @param svd_method Method for computing SVD (Caution: Randomized is currently experimental and unstable) Must be one of:
#'        "GramSVD", "Power", "Randomized". Defaults to GramSVD.
#' @param nv Number of right singular vectors Defaults to 1.
#' @param max_iterations Maximum iterations Defaults to 1000.
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
#'        Defaults to -1 (time-based random number).
#' @param keep_u \code{Logical}. Save left singular vectors? Defaults to TRUE.
#' @param u_name Frame key to save left singular vectors
#' @param use_all_factor_levels \code{Logical}. Whether first factor level is included in each categorical expansion Defaults to TRUE.
#' @param max_runtime_secs Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to 0.
#' @param export_checkpoints_dir Automatically export generated models to this directory.
#' @return an object of class \linkS4class{H2ODimReductionModel}.
#' @references N. Halko, P.G. Martinsson, J.A. Tropp. {Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions}[https://arxiv.org/abs/0909.4061]. SIAM Rev., Survey and Review section, Vol. 53, num. 2, pp. 217-288, June 2011.
#' @examples
#' \dontrun{
#' library(h2o)
#' h2o.init()
#' australia_path <- system.file("extdata", "australia.csv", package = "h2o")
#' australia <- h2o.uploadFile(path = australia_path)
#' h2o.svd(training_frame = australia, nv = 8)
#' }
#' @export
h2o.svd <- function(training_frame,
                    x,
                    destination_key,
                    model_id = NULL,
                    validation_frame = NULL,
                    ignore_const_cols = TRUE,
                    score_each_iteration = FALSE,
                    transform = c("NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE"),
                    svd_method = c("GramSVD", "Power", "Randomized"),
                    nv = 1,
                    max_iterations = 1000,
                    seed = -1,
                    keep_u = TRUE,
                    u_name = NULL,
                    use_all_factor_levels = TRUE,
                    max_runtime_secs = 0,
                    export_checkpoints_dir = NULL)
{
  # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
  training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
  validation_frame <- .validate.H2OFrame(validation_frame, required=FALSE)

  # Build parameter list to send to model builder
  parms <- list()
  parms$training_frame <- training_frame
  if(!missing(x))
    parms$ignored_columns <- .verify_datacols(training_frame, x)$cols_ignore
  if(!missing(destination_key)) {
    warning("'destination_key' is deprecated; please use 'model_id' instead.")
    if(missing(model_id)) {
      parms$model_id <- destination_key
    }
  }

  if (!missing(model_id))
    parms$model_id <- model_id
  if (!missing(validation_frame))
    parms$validation_frame <- validation_frame
  if (!missing(ignore_const_cols))
    parms$ignore_const_cols <- ignore_const_cols
  if (!missing(score_each_iteration))
    parms$score_each_iteration <- score_each_iteration
  if (!missing(transform))
    parms$transform <- transform
  if (!missing(svd_method))
    parms$svd_method <- svd_method
  if (!missing(nv))
    parms$nv <- nv
  if (!missing(max_iterations))
    parms$max_iterations <- max_iterations
  if (!missing(seed))
    parms$seed <- seed
  if (!missing(keep_u))
    parms$keep_u <- keep_u
  if (!missing(u_name))
    parms$u_name <- u_name
  if (!missing(use_all_factor_levels))
    parms$use_all_factor_levels <- use_all_factor_levels
  if (!missing(max_runtime_secs))
    parms$max_runtime_secs <- max_runtime_secs
  if (!missing(export_checkpoints_dir))
    parms$export_checkpoints_dir <- export_checkpoints_dir

  # Error check and build model
  model <- .h2o.modelJob('svd', parms, h2oRestApiVersion=99, verbose=FALSE)
  return(model)
}
.h2o.train_segments_svd <- function(training_frame,
                                    x,
                                    validation_frame = NULL,
                                    ignore_const_cols = TRUE,
                                    score_each_iteration = FALSE,
                                    transform = c("NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE"),
                                    svd_method = c("GramSVD", "Power", "Randomized"),
                                    nv = 1,
                                    max_iterations = 1000,
                                    seed = -1,
                                    keep_u = TRUE,
                                    u_name = NULL,
                                    use_all_factor_levels = TRUE,
                                    max_runtime_secs = 0,
                                    export_checkpoints_dir = NULL,
                                    segment_columns = NULL,
                                    segment_models_id = NULL,
                                    parallelism = 1)
{
  # formally define variables that were excluded from function parameters
  model_id <- NULL
  verbose <- NULL
  destination_key <- NULL
  # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
  training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
  validation_frame <- .validate.H2OFrame(validation_frame, required=FALSE)

  # Build parameter list to send to model builder
  parms <- list()
  parms$training_frame <- training_frame
  if(!missing(x))
    parms$ignored_columns <- .verify_datacols(training_frame, x)$cols_ignore
  if(!missing(destination_key)) {
    warning("'destination_key' is deprecated; please use 'model_id' instead.")
    if(missing(model_id)) {
      parms$model_id <- destination_key
    }
  }

  if (!missing(validation_frame))
    parms$validation_frame <- validation_frame
  if (!missing(ignore_const_cols))
    parms$ignore_const_cols <- ignore_const_cols
  if (!missing(score_each_iteration))
    parms$score_each_iteration <- score_each_iteration
  if (!missing(transform))
    parms$transform <- transform
  if (!missing(svd_method))
    parms$svd_method <- svd_method
  if (!missing(nv))
    parms$nv <- nv
  if (!missing(max_iterations))
    parms$max_iterations <- max_iterations
  if (!missing(seed))
    parms$seed <- seed
  if (!missing(keep_u))
    parms$keep_u <- keep_u
  if (!missing(u_name))
    parms$u_name <- u_name
  if (!missing(use_all_factor_levels))
    parms$use_all_factor_levels <- use_all_factor_levels
  if (!missing(max_runtime_secs))
    parms$max_runtime_secs <- max_runtime_secs
  if (!missing(export_checkpoints_dir))
    parms$export_checkpoints_dir <- export_checkpoints_dir

  # Build segment-models specific parameters
  segment_parms <- list()
  if (!missing(segment_columns))
    segment_parms$segment_columns <- segment_columns
  if (!missing(segment_models_id))
    segment_parms$segment_models_id <- segment_models_id
  segment_parms$parallelism <- parallelism

  # Error check and build segment models
  segment_models <- .h2o.segmentModelsJob('svd', segment_parms, parms, h2oRestApiVersion=99)
  return(segment_models)
}