Nothing
# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py
# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details)
#'
# -------------------------- Principal Components Analysis -------------------------- #
#'
#' Principal component analysis of an H2O data frame
#'
#' Principal components analysis of an H2O data frame using the power method
#' to calculate the singular value decomposition of the Gram matrix.
#'
#' @param training_frame Id of the training data frame.
#' @param x A vector containing the \code{character} names of the predictors in the model.
#' @param model_id Destination id for this model; auto-generated if not specified.
#' @param validation_frame Id of the validation data frame.
#' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
#' @param score_each_iteration \code{Logical}. Whether to score during each iteration of model training. Defaults to FALSE.
#' @param transform Transformation of training data Must be one of: "NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE".
#' Defaults to NONE.
#' @param pca_method Specify the algorithm to use for computing the principal components: GramSVD - uses a distributed computation
#' of the Gram matrix, followed by a local SVD; Power - computes the SVD using the power iteration method
#' (experimental); Randomized - uses randomized subspace iteration method; GLRM - fits a generalized low-rank
#' model with L2 loss function and no regularization and solves for the SVD using local matrix algebra
#' (experimental) Must be one of: "GramSVD", "Power", "Randomized", "GLRM". Defaults to GramSVD.
#' @param pca_impl Specify the implementation to use for computing PCA (via SVD or EVD): MTJ_EVD_DENSEMATRIX - eigenvalue
#' decompositions for dense matrix using MTJ; MTJ_EVD_SYMMMATRIX - eigenvalue decompositions for symmetric matrix
#' using MTJ; MTJ_SVD_DENSEMATRIX - singular-value decompositions for dense matrix using MTJ; JAMA - eigenvalue
#' decompositions for dense matrix using JAMA. References: JAMA - http://math.nist.gov/javanumerics/jama/; MTJ -
#' https://github.com/fommil/matrix-toolkits-java/ Must be one of: "MTJ_EVD_DENSEMATRIX", "MTJ_EVD_SYMMMATRIX",
#' "MTJ_SVD_DENSEMATRIX", "JAMA".
#' @param k Rank of matrix approximation Defaults to 1.
#' @param max_iterations Maximum training iterations Defaults to 1000.
#' @param use_all_factor_levels \code{Logical}. Whether first factor level is included in each categorical expansion Defaults to FALSE.
#' @param compute_metrics \code{Logical}. Whether to compute metrics on the training data Defaults to TRUE.
#' @param impute_missing \code{Logical}. Whether to impute missing entries with the column mean Defaults to FALSE.
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
#' Defaults to -1 (time-based random number).
#' @param max_runtime_secs Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to 0.
#' @param export_checkpoints_dir Automatically export generated models to this directory.
#' @return an object of class \linkS4class{H2ODimReductionModel}.
#' @seealso \code{\link{h2o.svd}}, \code{\link{h2o.glrm}}
#' @references N. Halko, P.G. Martinsson, J.A. Tropp. {Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions}[http://arxiv.org/abs/0909.4061]. SIAM Rev., Survey and Review section, Vol. 53, num. 2, pp. 217-288, June 2011.
#' @examples
#' \dontrun{
#' library(h2o)
#' h2o.init()
#' australia_path <- system.file("extdata", "australia.csv", package = "h2o")
#' australia <- h2o.uploadFile(path = australia_path)
#' h2o.prcomp(training_frame = australia, k = 8, transform = "STANDARDIZE")
#' }
#' @export
h2o.prcomp <- function(training_frame,
x,
model_id = NULL,
validation_frame = NULL,
ignore_const_cols = TRUE,
score_each_iteration = FALSE,
transform = c("NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE"),
pca_method = c("GramSVD", "Power", "Randomized", "GLRM"),
pca_impl = c("MTJ_EVD_DENSEMATRIX", "MTJ_EVD_SYMMMATRIX", "MTJ_SVD_DENSEMATRIX", "JAMA"),
k = 1,
max_iterations = 1000,
use_all_factor_levels = FALSE,
compute_metrics = TRUE,
impute_missing = FALSE,
seed = -1,
max_runtime_secs = 0,
export_checkpoints_dir = NULL)
{
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
validation_frame <- .validate.H2OFrame(validation_frame, required=FALSE)
# Build parameter list to send to model builder
parms <- list()
parms$training_frame <- training_frame
if(!missing(x))
parms$ignored_columns <- .verify_datacols(training_frame, x)$cols_ignore
if (!missing(model_id))
parms$model_id <- model_id
if (!missing(validation_frame))
parms$validation_frame <- validation_frame
if (!missing(ignore_const_cols))
parms$ignore_const_cols <- ignore_const_cols
if (!missing(score_each_iteration))
parms$score_each_iteration <- score_each_iteration
if (!missing(transform))
parms$transform <- transform
if (!missing(pca_method))
parms$pca_method <- pca_method
if (!missing(pca_impl))
parms$pca_impl <- pca_impl
if (!missing(k))
parms$k <- k
if (!missing(max_iterations))
parms$max_iterations <- max_iterations
if (!missing(use_all_factor_levels))
parms$use_all_factor_levels <- use_all_factor_levels
if (!missing(compute_metrics))
parms$compute_metrics <- compute_metrics
if (!missing(impute_missing))
parms$impute_missing <- impute_missing
if (!missing(seed))
parms$seed <- seed
if (!missing(max_runtime_secs))
parms$max_runtime_secs <- max_runtime_secs
if (!missing(export_checkpoints_dir))
parms$export_checkpoints_dir <- export_checkpoints_dir
# Error check and build model
model <- .h2o.modelJob('pca', parms, h2oRestApiVersion=3, verbose=FALSE)
return(model)
}
.h2o.train_segments_prcomp <- function(training_frame,
x,
validation_frame = NULL,
ignore_const_cols = TRUE,
score_each_iteration = FALSE,
transform = c("NONE", "STANDARDIZE", "NORMALIZE", "DEMEAN", "DESCALE"),
pca_method = c("GramSVD", "Power", "Randomized", "GLRM"),
pca_impl = c("MTJ_EVD_DENSEMATRIX", "MTJ_EVD_SYMMMATRIX", "MTJ_SVD_DENSEMATRIX", "JAMA"),
k = 1,
max_iterations = 1000,
use_all_factor_levels = FALSE,
compute_metrics = TRUE,
impute_missing = FALSE,
seed = -1,
max_runtime_secs = 0,
export_checkpoints_dir = NULL,
segment_columns = NULL,
segment_models_id = NULL,
parallelism = 1)
{
# formally define variables that were excluded from function parameters
model_id <- NULL
verbose <- NULL
destination_key <- NULL
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
validation_frame <- .validate.H2OFrame(validation_frame, required=FALSE)
# Build parameter list to send to model builder
parms <- list()
parms$training_frame <- training_frame
if(!missing(x))
parms$ignored_columns <- .verify_datacols(training_frame, x)$cols_ignore
if (!missing(validation_frame))
parms$validation_frame <- validation_frame
if (!missing(ignore_const_cols))
parms$ignore_const_cols <- ignore_const_cols
if (!missing(score_each_iteration))
parms$score_each_iteration <- score_each_iteration
if (!missing(transform))
parms$transform <- transform
if (!missing(pca_method))
parms$pca_method <- pca_method
if (!missing(pca_impl))
parms$pca_impl <- pca_impl
if (!missing(k))
parms$k <- k
if (!missing(max_iterations))
parms$max_iterations <- max_iterations
if (!missing(use_all_factor_levels))
parms$use_all_factor_levels <- use_all_factor_levels
if (!missing(compute_metrics))
parms$compute_metrics <- compute_metrics
if (!missing(impute_missing))
parms$impute_missing <- impute_missing
if (!missing(seed))
parms$seed <- seed
if (!missing(max_runtime_secs))
parms$max_runtime_secs <- max_runtime_secs
if (!missing(export_checkpoints_dir))
parms$export_checkpoints_dir <- export_checkpoints_dir
# Build segment-models specific parameters
segment_parms <- list()
if (!missing(segment_columns))
segment_parms$segment_columns <- segment_columns
if (!missing(segment_models_id))
segment_parms$segment_models_id <- segment_models_id
segment_parms$parallelism <- parallelism
# Error check and build segment models
segment_models <- .h2o.segmentModelsJob('pca', segment_parms, parms, h2oRestApiVersion=3)
return(segment_models)
}
.h2o.fill_pca <- function(model, parameters, allparams) {
model$variable_importances <- model$importance
return(model)
}
#' Scree Plot
#' @param model A PCA model
#' @param type Type of the plot. Either "barplot" or "lines".
#' @export
h2o.screeplot <- function(model, type=c("barplot", "lines")) {
type <- match.arg(type)
if (type == "barplot") {
graphics::barplot(t(model@model$importance)[,1], xlab = "Components", ylab = "Variances", main = "Scree Plot")
} else {
graphics::plot(t(model@model$importance)[,1], xlab = "Components", ylab = "Variances", main = "Scree Plot",
type = "l", lty = "dashed", col = "blue", lwd = 2)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.