# R/plspm.r In plspm: Tools for Partial Least Squares Path Modeling (PLS-PM)

#' @title PLS-PM: Partial Least Squares Path Modeling
#'
#' @description
#' Estimate path models with latent variables by partial least squares approach
#' (for both metric and non-metric data)
#'
#' @details
#' The function \code{plspm} estimates a path model by partial least squares
#' approach providing the full set of results. \cr
#'
#' The argument \code{path_matrix} is a matrix of zeros and ones that indicates
#' the structural relationships between latent variables. \code{path_matrix}
#' must be a lower triangular matrix; it contains a 1 when column \code{j}
#' affects row \code{i}, 0 otherwise. \cr
#'
#' @param Data matrix or data frame containing the manifest variables.
#' @param path_matrix A square (lower triangular) boolean matrix representing
#' the inner model (i.e. the path relationships between latent variables).
#' @param blocks list of vectors with column indices or column names
#' from \code{Data} indicating the sets of manifest variables forming
#' each block (i.e. which manifest variables correspond to each block).
#' @param scaling optional argument for runing the non-metric approach;
#' it is a list of string vectors indicating the type of
#' measurement scale for each manifest variable specified in \code{blocks}.
#' \code{scaling} must be specified when working with non-metric variables.
#' Possible values: \code{"num"} (linear transformation,
#' suitable for numerical variables), \code{"raw"} (no transformation),
#' \code{"nom"} (non-monotonic transformation, suitable for nominal variables),
#' and \code{"ord"} (monotonic transformation, suitable for ordinal variables).
#' @param modes character vector indicating the type of measurement for each
#' block. Possible values are: \code{"A", "B", "newA", "PLScore", "PLScow"}.
#' The length of \code{modes} must be equal to the length of \code{blocks}.
#' @param scheme string indicating the type of inner weighting
#' scheme. Possible values are \code{"centroid"}, \code{"factorial"}, or
#' \code{"path"}.
#' @param scaled whether manifest variables should be standardized.
#' Only used when \code{scaling = NULL}. When (\code{TRUE}, data is
#' scaled to standardized values (mean=0 and variance=1).
#' The variance is calculated dividing by \code{N} instead of \code{N-1}).
#' @param tol decimal value indicating the tolerance criterion for the
#' iterations (\code{tol=0.000001}). Can be specified between 0 and 0.001.
#' @param maxiter integer indicating the maximum number of iterations
#' (\code{maxiter=100} by default). The minimum value of \code{maxiter} is 100.
#' @param plscomp optional vector indicating the number of PLS components
#' (for each block) to be used when handling non-metric data
#' (only used if \code{scaling} is provided)
#' @param boot.val whether bootstrap validation should be performed.
#' (\code{FALSE} by default).
#' @param br number bootstrap resamples. Used only
#' when \code{boot.val=TRUE}. When \code{boot.val=TRUE}, the default number of
#' re-samples is 100.
#' @param dataset whether the data matrix used in the computations should be
#' retrieved (\code{TRUE} by default).
#' @return An object of class \code{"plspm"}.
#' @return \item{outer_model}{Results of the outer model. Includes:
#' @return \item{inner_model}{Results of the inner (structural) model.
#' Includes: path coeffs and R-squared for each endogenous latent variable}
#' @return \item{scores}{Matrix of latent variables used to estimate the inner
#' model. If \code{scaled=FALSE} then \code{scores} are latent variables
#' calculated with the original data (non-stardardized).}
#' @return \item{path_coefs}{Matrix of path coefficients
#' (this matrix has a similar form as \code{path_matrix})}
#' @return \item{inner_summary}{Summarized results of the inner model.
#' Includes: type of LV, type of measurement, number of indicators, R-squared,
#' average communality, average redundancy, and average variance
#' extracted}
#' @return \item{effects}{Path effects of the structural relationships.
#' Includes: direct, indirect, and total effects}
#' @return \item{unidim}{Results for checking the unidimensionality of blocks
#' (These results are only meaningful for reflective blocks)}
#' @return \item{gof}{Goodness-of-Fit index}
#' @return \item{data}{Data matrix containing the manifest variables used in the
#' model. Only available when \code{dataset=TRUE}}
#' @return \item{boot}{List of bootstrapping results; only available
#' when argument \code{boot.val=TRUE}}
#' @author Gaston Sanchez, Giorgio Russolillo
#'
#' @references Tenenhaus M., Esposito Vinzi V., Chatelin Y.M., and Lauro C.
#' (2005) PLS path modeling. \emph{Computational Statistics & Data Analysis},
#' \bold{48}, pp. 159-205.
#'
#' Lohmoller J.-B. (1989) \emph{Latent variables path modeling with partial
#' least squares.} Heidelberg: Physica-Verlag.
#'
#' Wold H. (1985) Partial Least Squares. In: Kotz, S., Johnson, N.L. (Eds.),
#' \emph{Encyclopedia of Statistical Sciences}, Vol. 6. Wiley, New York, pp.
#' 581-591.
#'
#' Wold H. (1982) Soft modeling: the basic design and some extensions. In: K.G.
#' Joreskog & H. Wold (Eds.), \emph{Systems under indirect observations:
#' Causality, structure, prediction}, Part 2, pp. 1-54. Amsterdam: Holland.
#'
#' Russolillo, G. (2012) Non-Metric Partial Least Squares. \emph{Electronic
#' Journal of Statistics}, \bold{6}, pp. 1641-1669.
#' \url{http://projecteuclid.org/euclid.ejs/1348665231}
#' @export
#' @examples
#' \dontrun{
#' ## typical example of PLS-PM in customer satisfaction analysis
#' ## model with six LVs and reflective indicators
#'
#' data(satisfaction)
#'
#' # path matrix
#' IMAG = c(0,0,0,0,0,0)
#' EXPE = c(1,0,0,0,0,0)
#' QUAL = c(0,1,0,0,0,0)
#' VAL = c(0,1,1,0,0,0)
#' SAT = c(1,1,1,1,0,0)
#' LOY = c(1,0,0,0,1,0)
#' sat_path = rbind(IMAG, EXPE, QUAL, VAL, SAT, LOY)
#'
#' # plot diagram of path matrix
#' innerplot(sat_path)
#'
#' # blocks of outer model
#' sat_blocks = list(1:5, 6:10, 11:15, 16:19, 20:23, 24:27)
#'
#' # vector of modes (reflective indicators)
#' sat_mod = rep("A", 6)
#'
#' # apply plspm
#' satpls = plspm(satisfaction, sat_path, sat_blocks, modes = sat_mod,
#'    scaled = FALSE)
#'
#' # plot diagram of the inner model
#' innerplot(satpls)
#'
#'
#' # plot outer weights
#' outerplot(satpls, what = "weights")
#' }
plspm <-
function(Data, path_matrix, blocks, modes = NULL, scaling = NULL,
scheme = "centroid", scaled = TRUE, tol = 0.000001, maxiter = 100,
plscomp = NULL, boot.val = FALSE, br = NULL,
dataset = TRUE)
{
# =======================================================================
# checking arguments
# =======================================================================
valid = check_args(Data=Data, path_matrix=path_matrix, blocks=blocks,
scaling=scaling, modes=modes, scheme=scheme,
scaled=scaled, tol=tol, maxiter=maxiter,
plscomp=plscomp, boot.val=boot.val, br=br,
dataset=dataset)

Data = valid$Data path_matrix = valid$path_matrix
blocks = valid$blocks specs = valid$specs
boot.val = valid$boot.val br = valid$br
dataset = valid$dataset # ======================================================================= # Preparing data and blocks indexification # ======================================================================= # building data matrix 'MV' MV = get_manifests(Data, blocks) check_MV = test_manifest_scaling(MV, specs$scaling)
# generals about obs, mvs, lvs
gens = get_generals(MV, path_matrix)
# blocks indexing
names(blocks) = gens$lvs_names block_sizes = lengths(blocks) blockinds = indexify(blocks) # transform to numeric if there are factors in MV if (test_factors(MV)) { numeric_levels = get_numerics(MV) MV = numeric_levels$MV
categories = numeric_levels$categories } # apply corresponding treatment (centering, reducing, ranking) X = get_treated_data(MV, specs) # ======================================================================= # Outer weights and LV scores # ======================================================================= metric = get_metric(specs$scaling)
if (metric) {
# object 'weights' contains outer w's, W, ODM, iter
weights = get_weights(X, path_matrix, blocks, specs)
ok_weights = test_null_weights(weights, specs)
outer_weights = weights$w LV = get_scores(X, weights$W)
} else {
# object 'weights' contains outer w's, W, Y, QQ, ODM, iter
weights = get_weights_nonmetric(X, path_matrix, blocks, specs)
ok_weights = test_null_weights(weights, specs)
outer_weights = weights$w LV = weights$Y
X = weights$QQ # quantified MVs colnames(X) = gens$mvs_names
}

# =======================================================================
# Path coefficients and total effects
# =======================================================================
inner_results = get_paths(path_matrix, LV)
inner_model = inner_results[]
Path = inner_results[]
R2 = inner_results[]
Path_effects = get_effects(Path)

# =======================================================================
# =======================================================================
xloads = cor(X, LV, use = 'pairwise.complete.obs')
loadings = rowSums(xloads * weights$ODM) communality = loadings^2 R2_aux = rowSums(weights$ODM %*% diag(R2, gens$lvs, gens$lvs))
redundancy = communality * R2_aux
crossloadings = data.frame(xloads, row.names=1:gens$mvs) crossloadings$name = factor(gens$mvs_names, levels = unique(gens$mvs_names))
crossloadings$block = factor(rep(gens$lvs_names, block_sizes),
levels = gens$lvs_names) crossloadings = crossloadings[,c('name','block',colnames(xloads))] # outer model data frame outer_model = data.frame( name = factor(gens$mvs_names, levels = unique(gens$mvs_names)), block = factor(rep(gens$lvs_names, block_sizes),
levels = gens$lvs_names), weight = outer_weights, loading = loadings, communality = communality, redundancy = redundancy, row.names = 1:gens$mvs)

# Unidimensionality
unidim = get_unidim(DM = MV, blocks = blocks, modes = specs$modes) # Summary Inner model inner_summary = get_inner_summary(path_matrix, blocks, specs$modes,
communality, redundancy, R2)

# GoF Index
gof = get_gof(communality, R2, blocks, path_matrix)

# =======================================================================
# Results
# =======================================================================
# deliver dataset?
if (dataset) data = MV else data = NULL
# deliver bootstrap validation results?
bootstrap = FALSE
if (boot.val)
{
if (nrow(X) <= 10) {
warning("Bootstrapping stopped: very few cases.")
} else {
bootstrap = get_boots(MV, path_matrix, blocks, specs, br)
}
}

# list with model specifications
model = list(IDM=path_matrix, blocks=blocks, specs=specs,
iter=weights$iter, boot.val=boot.val, br=br, gens=gens) ## output res = list(outer_model = outer_model, inner_model = inner_model, path_coefs = Path, scores = LV, crossloadings = crossloadings, inner_summary = inner_summary, effects = Path_effects, unidim = unidim, gof = gof, boot = bootstrap, data = data, manifests = X, model = model) class(res) = "plspm" return(res) } #'@S3method print plspm print.plspm <- function(x, ...) { cat("Partial Least Squares Path Modeling (PLS-PM)", "\n") cat(rep("-", 45), sep="") cat("\n NAME ", "DESCRIPTION") cat("\n1$outer_model    ", "outer model")
cat("\n2  $inner_model ", "inner model") cat("\n3$path_coefs     ", "path coefficients matrix")
cat("\n4  $scores ", "latent variable scores") if (!inherits(x, "plspm.fit")) { cat("\n5$crossloadings  ", "cross-loadings")
cat("\n6  $inner_summary ", "summary inner model") cat("\n7$effects        ", "total effects")
cat("\n8  $unidim ", "unidimensionality") cat("\n9$gof            ", "goodness-of-fit")
cat("\n10 $boot ", "bootstrap results") cat("\n11$data           ", "data matrix")
}
cat("\n")
cat(rep("-", 45), sep="")
cat("\nYou can also use the function 'summary'", "\n\n")
invisible(x)
}


## Try the plspm package in your browser

Any scripts or data that you put into this service are public.

plspm documentation built on May 2, 2019, 7:05 a.m.