traj: Trajectory Analysis

Documented in step2factors

#' @title Performs Factor Analysis to Select a Subset of the 24 Measures
#' @description Performs a factor analysis to reduce the set of 24 measures into a smaller set of measures that captures the main features of the trajectories.
#' @param trajMeasures List generated by \code{step1mesures}. Contains
#'original data, original time and 24 measures.
#' @param discard Vector containing names or numerical
#'positions of measures to discard during factor analysis.
#' @param num.factors Numerical value specifying the number
#'of factors to choose. Defaults to \code{NULL}.
#'See details.Defaults to \code{NULL}.
#' @param verbose Logical indicating if the function should
#'print information on screen. Defaults to \code{TRUE}.
#' @param \dots Arguments to be passed to \code{principal}. See details.
#' @return trajFactors Object containing the measures chosen as factors, the eigenvalues of the correlation matrix of the 24 measures, the list generated by the
#'\code{principal} function used for the factor analysis and the data stored in the \code{trajMeasures} object.
#'
#' @details If \code{num.factor} is \code{NULL},the function will select the number of factors as the number of eigenvalues greater than 1.
#'
#'The \code{\link[psych]{principal}} function is used in order to choose the measure that will represent each factor. \code{varimax} is used to rotate the data during 
#'the execution of the\ code{principal} function. Any other parameter can be passed through \code{\dots} in order to further control the \code{principal} function.
#'
#'If any measures that happen to be extremely correlated among themselves (corr. >= 0.95), one of them
#'will have to be removed. Such measures are flagged by \code{step1measures}. These values can be removed with \code{discard} or they will be automatically removed by the function.
#'
#' @author Marie-Pierre Sylvestre, Dan Vatnik
#'
#'marie-pierre.sylvestre@umontreal.ca
#'
#' @examples
#' \dontrun{
#'# Setup data 
#'data = example.data$data
#'
#' # Run step1measures and step2factors
#'s1 = step1measures(data, ID=TRUE)
#'s2 = step2factors(s1)
#'
#' # Display factors
#'head(s2$factors)
#'
#'# The next step would be to run "step3clusters"
#'}
#'
#' 
#' @seealso 
#' \code{\link[psych]{principal}} 
#' \code{\link[traj]{step1measures}}
#' 
#' @rdname step2factors
#' 
#' @export 
step2factors <- function(trajMeasures, num.factors = NULL, discard  = NULL, verbose = TRUE, ...)
{

  # Deal with varibles to discard
  if(!is.null(discard)){
    # if(class(discard) == "character")
    if(is.character(discard)) # ICI!!!!!!!!!!!!!
      vars.to.discard = which(names(trajMeasures$measurments) %in% discard)
    else
      vars.to.discard = discard

    if(19 %in% vars.to.discard)
      stop("m18 will automatically be removed. Do not include it in the 'discard' variable.")
    if(length(vars.to.discard) != length(discard))

      stop("Not all variables in 'discard' are to be removed. There is an error in the format of 'discard'.")

    data = trajMeasures$measurments[,-vars.to.discard]
  }
  else
    data = trajMeasures$measurments



  # Sizing data
  dim.of.data = dim(data)
  sample.size = dim.of.data[1]

  # Deal with IDs
    IDvector = data[1]
    data = data[-1]



  # Remove m18 id correslation larger than 0.95
  if(cor(data$m17,data$m18) >= 0.95){
    data = data[,-which(names(data) == "m18")]
  }

  # Deal with remaining correlated variables
  corr.vars = check.correlation(data, verbose = FALSE, is.return = TRUE)
  if(!is.null(corr.vars)){
    corr.vars.pos = which(names(data) %in% corr.vars[,1])
    data = data[,-corr.vars.pos]
    print(paste(corr.vars[,1], "is removed because it is perfectly correlated with", corr.vars[,2]))
  }

  # Checking validity of num.factors
  if(num.factors > ncol(data) && !is.null(num.factors))
    stop("Requesting more factors in 'num.factors' than available variables.")

  max.num.obs = dim(data)[2]

  eigen.values = NULL;
  pricipal.factors = NULL;

  # Calculate the number of factors to use
  if(is.null(num.factors))
  {
    if(verbose)
      print("Computing reduced correlation e-values...")

    eigen.values = reduced.eigen(data)

    num.factors = length(which(eigen.values$values >= 1))
  }

  # Choose the principal varaibles that will represent the factors
  principal.factors = principal(data, rotate = "varimax", nfactors = num.factors, ...)
  principal.variables = c(rep(NA , num.factors))

  for(i_factors in 1: num.factors){
    principal.variables[i_factors] = which.max(abs(principal.factors$loadings[,i_factors]))
  }

  principal.variables = sort(principal.variables)

  # Bind the vectors of the factor variables to the ID vector
  output = IDvector

  for(i_col in 1 : num.factors){
    output = cbind(output, data[principal.variables[i_col]])
  }

  # Create list to export
  trajFactors = structure(list( factors  = output, e.values = eigen.values, princ.fact = principal.factors,
                  measurments = trajMeasures$measurments, data = trajMeasures$data, time = trajMeasures$time), class = "trajFactors")

  return(trajFactors)
}