#' @title Performs Factor Analysis to Select a Subset of the 24 Measures
#' @description Performs a factor analysis to reduce the set of 24 measures into a smaller set of measures that captures the main features of the trajectories.
#' @param trajMeasures List generated by \code{step1mesures}. Contains
#'original data, original time and 24 measures.
#' @param discard Vector containing names or numerical
#'positions of measures to discard during factor analysis.
#' @param num.factors Numerical value specifying the number
#'of factors to choose. Defaults to \code{NULL}.
#'See details.Defaults to \code{NULL}.
#' @param verbose Logical indicating if the function should
#'print information on screen. Defaults to \code{TRUE}.
#' @param \dots Arguments to be passed to \code{principal}. See details.
#' @return trajFactors Object containing the measures chosen as factors, the eigenvalues of the correlation matrix of the 24 measures, the list generated by the
#'\code{principal} function used for the factor analysis and the data stored in the \code{trajMeasures} object.
#'
#' @details If \code{num.factor} is \code{NULL},the function will select the number of factors as the number of eigenvalues greater than 1.
#'
#'The \code{\link[psych]{principal}} function is used in order to choose the measure that will represent each factor. \code{varimax} is used to rotate the data during
#'the execution of the\ code{principal} function. Any other parameter can be passed through \code{\dots} in order to further control the \code{principal} function.
#'
#'If any measures that happen to be extremely correlated among themselves (corr. >= 0.95), one of them
#'will have to be removed. Such measures are flagged by \code{step1measures}. These values can be removed with \code{discard} or they will be automatically removed by the function.
#'
#' @author Marie-Pierre Sylvestre, Dan Vatnik
#'
#'marie-pierre.sylvestre@umontreal.ca
#'
#' @examples
#' \dontrun{
#'# Setup data
#'data = example.data$data
#'
#' # Run step1measures and step2factors
#'s1 = step1measures(data, ID=TRUE)
#'s2 = step2factors(s1)
#'
#' # Display factors
#'head(s2$factors)
#'
#'# The next step would be to run "step3clusters"
#'}
#'
#'
#' @seealso
#' \code{\link[psych]{principal}}
#' \code{\link[traj]{step1measures}}
#'
#' @rdname step2factors
#'
#' @export
step2factors <- function(trajMeasures, num.factors = NULL, discard = NULL, verbose = TRUE, ...)
{
# Deal with varibles to discard
if(!is.null(discard)){
# if(class(discard) == "character")
if(is.character(discard)) # ICI!!!!!!!!!!!!!
vars.to.discard = which(names(trajMeasures$measurments) %in% discard)
else
vars.to.discard = discard
if(19 %in% vars.to.discard)
stop("m18 will automatically be removed. Do not include it in the 'discard' variable.")
if(length(vars.to.discard) != length(discard))
stop("Not all variables in 'discard' are to be removed. There is an error in the format of 'discard'.")
data = trajMeasures$measurments[,-vars.to.discard]
}
else
data = trajMeasures$measurments
# Sizing data
dim.of.data = dim(data)
sample.size = dim.of.data[1]
# Deal with IDs
IDvector = data[1]
data = data[-1]
# Remove m18 id correslation larger than 0.95
if(cor(data$m17,data$m18) >= 0.95){
data = data[,-which(names(data) == "m18")]
}
# Deal with remaining correlated variables
corr.vars = check.correlation(data, verbose = FALSE, is.return = TRUE)
if(!is.null(corr.vars)){
corr.vars.pos = which(names(data) %in% corr.vars[,1])
data = data[,-corr.vars.pos]
print(paste(corr.vars[,1], "is removed because it is perfectly correlated with", corr.vars[,2]))
}
# Checking validity of num.factors
if(num.factors > ncol(data) && !is.null(num.factors))
stop("Requesting more factors in 'num.factors' than available variables.")
max.num.obs = dim(data)[2]
eigen.values = NULL;
pricipal.factors = NULL;
# Calculate the number of factors to use
if(is.null(num.factors))
{
if(verbose)
print("Computing reduced correlation e-values...")
eigen.values = reduced.eigen(data)
num.factors = length(which(eigen.values$values >= 1))
}
# Choose the principal varaibles that will represent the factors
principal.factors = principal(data, rotate = "varimax", nfactors = num.factors, ...)
principal.variables = c(rep(NA , num.factors))
for(i_factors in 1: num.factors){
principal.variables[i_factors] = which.max(abs(principal.factors$loadings[,i_factors]))
}
principal.variables = sort(principal.variables)
# Bind the vectors of the factor variables to the ID vector
output = IDvector
for(i_col in 1 : num.factors){
output = cbind(output, data[principal.variables[i_col]])
}
# Create list to export
trajFactors = structure(list( factors = output, e.values = eigen.values, princ.fact = principal.factors,
measurments = trajMeasures$measurments, data = trajMeasures$data, time = trajMeasures$time), class = "trajFactors")
return(trajFactors)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.