R/PCAandFA.R

Defines functions .orange .purple2 .purple .pipple2 .pipple .green2 .coral2 .rosered .crimson .red2 .bluez2 .bluez .lightgreen .hotpink .seamist .nurple .nurple2 .coral .newblack .ifa_init .ifa_fit .fafs_aux .FAfitstats .lederman .scoredist .orthdist .recodex .recodecat .recodeData .adjLoadings .adjEigLaplace .distgraph nfac.est kmo print.facanal print.PrincipalComp ifa pfa alfa robfa facanal empLPP pcaKern pcaMix pcaRobSparse pcaSparse pcaLocal pcaCurve pcaR1 pcaL1 pcaSphere ppca pca

Documented in alfa empLPP facanal ifa kmo nfac.est pca pcaCurve pcaKern pcaL1 pcaLocal pcaMix pcaR1 pcaRobSparse pcaSparse pcaSphere pfa ppca print.facanal print.PrincipalComp robfa

#' Ordinary Principal Components Analysis
#'
#' @description This is simply an alternative to R's principal and prcomp functions.
#'
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param rotate a rotation function from the GPArotation package. Defaults to none.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param method the svd algorithm to use, one of "dc" and "std" corresponding to
#' the divide-and-conquer (the default here) and standard algorithm.
#' @return an object of class PrincipalComp
#' @export
#' @examples
#' pca(x, 3)
#'
pca <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, method = c("dc","std")){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)

  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  method <- match.arg(method)
  columnNames <- colnames(x)
  x <- sweep(x, 2, colMeans(x), "-")
  if (scale) x <- scale(x, F, T)
  out <- wsvd(x, method = method); if (sum(sign(out$v[,1]))<=0){out$v <- out$v * -1}
  out$values <- as.vector(out$d^2 / nrow(x))
  out$vectors <- out$v; out$v <- NULL
  out$total.var <- sum(out$values)

  # filter out eigenvectors with eigenvalues <= 0
  ncomp <- min(ncomp, sum(zapsmall(out$values, 6) > 0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)[1:ncomp]
  if (ncomp > 1){
    out$loadings <- out$vectors[,1:ncomp] %*% diag(sqrt(out$values[1:ncomp]))
    out$scores <- out$u[,1:ncomp] %*% diag(out$d[1:ncomp])
  } else {
    out$loadings <- as.matrix(out$vectors[,1]) * sqrt(out$values[1])
    out$scores <- as.matrix(out$u[,1]) * out$d[1]
  }
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  out$values <- out$values[1:ncomp]
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "PCA"))
}


#' Probabilistic Principal Components Analysis via Expectation Maximization
#'
#' @description This is simply an alternative to R's principal and prcomp functions that uses expectation
#' maximization to fit the PCA in the presence of missing values. If there are no missing values the output
#' should be virtually identical to the \code{\link{pca}} function, save for sign changes in the eigenvectors
#' and loadings, and small numerical differences. The 'sensible principal components analysis' EM
#' algorithm described by Rowels (1997) is implemented here. It is simply a variant of Tipping & Bishop's (1997)
#' EM algorithm.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param maxit maximum number of iterations for expectation maximization. defaults to 1000.
#' @param tol tolerance for convergence. defaults to 1e-4.
#' @return an object of class PrincipalComp
#' @export
#' @examples
#' ppca(x, 3)
#'
#' @references
#' Tipping, M. & Bishop, C. Probabilistic principal component analysis. Technical
#' Report NCRG/97/010, Neural Computing Research Group, Aston University, September 1997. \cr
#' \cr
#' Rowels, S.(1997) EM algorithms for PCA and SPCA. NIPS'97: Proceedings of the 10th International
#' Conference on Neural Information Processing Systems
#'
ppca <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, maxit = 1000, tol=1e-4) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  if (!exists(".Random.seed", mode="numeric", envir=globalenv()))
    sample(NA);
  oldSeed <- get(".Random.seed", mode="numeric", envir=globalenv());
  set.seed(666)
  N <- nrow(x)
  D <- ncol(x)
  Obs <- !is.na(x)
  hidden <- which(is.na(x))
  naobs <- length(hidden)
  if(naobs) { x[hidden] <- 0 }
  columnNames <- colnames(x)
  x <- sweep(x, 2, colMeans(x), "-")
  if (scale) x <- sweep(x, 2, apply(x,2,function(i) sqrt(mean((i-mean(i))^2))), "/")
  ## Initialization
  r <- sample(N)
  C <- t(x[r[1:ncomp], ,drop = FALSE])
  ## Random matrix with the same dim names as x
  C <- matrix(cvreg::rpowexp(prod(dim(C)),0,1,4), nrow(C), ncol(C), dimnames = labels(C))
  assign(".Random.seed", oldSeed, envir=globalenv());
  CtC <- crossprod(C)
  ## inv(C'C) C' S is the solution to the EM problem
  S  <- x %*% C %*% pseudoinverse(CtC)
  recon <- tcrossprod(S,C)
  recon[hidden] <- 0
  ss <- sum(sum((recon - x)^2)) / (N * D - naobs)
  count <- 1
  old <- 1e100

  ##  EM iterations
  while (count > 0) {
    ## Expectation Step
    Sx <- pseudoinverse(diag(ncomp) + CtC/ss)
    ss_old <- ss
    if(naobs) {
      proj <- tcrossprod(S, C)
      x[hidden] <- proj[hidden]
    }
    S <- x %*% C %*% Sx / ss
    ## Maximization Step
    SumXtX <- crossprod(S)
    C <- crossprod(x, S) %*% solve(SumXtX + N * Sx)
    CtC <- crossprod(C)
    ss <- (sum(sum((tcrossprod(C,S)-t(x))^2)) + N*sum(sum(CtC %*% Sx)) + naobs*ss_old ) / (N * D)
    ## Evaluate Objective Function
    objective <- N * (D * log(ss) + sum(diag(Sx)) - log(det(Sx))) + sum(diag(SumXtX)) - naobs * log(ss_old)
    ## Evaluate convergence
    rel_ch <- abs( 1 - objective / old )
    old <- objective
    count <- count + 1
    if (is.na(rel_ch)){
      count <- 0
      converged <- FALSE
    }
    else if( !is.na(rel_ch) && rel_ch < tol & count > 5 ) {
      count <- 0
      converged <- TRUE
    }
    else if (count > maxit) {
      count <- 0
      converged <- FALSE
    }
  }

  C <- qb(C,rand=F)$Q
  S <- x %*% C
  eig <- eigen(crossprod(sweep(S,2,colMeans(S)))/nrow(S))
  if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
  vals <- eig$values
  vecs <- eig$vectors
  C <- C %*% vecs
  S <- x %*% C
  R2cum <- rep(NA, ncomp)
  SSE <- sum(x^2, na.rm=TRUE)

  for (i in 1:ncol(C)) {
    difference <- x - (S[,1:i, drop=FALSE] %*% t(C[,1:i, drop=FALSE]))
    R2cum[i] <- 1 - (sum(difference^2, na.rm=TRUE) / SSE)
  }

  out <- list()
  out$scores <- S
  out$values <- apply(out$scores, 2, var)
  out$loadings <- C %*% diag(sqrt(out$values))
  out$vectors <- C
  out$total.var <- sum(out$values)
  out$cum.exp<- R2cum
  out$converged <- FALSE
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$vectors <- out$vectors[,1:ncomp]
  return(structure(out,class="PrincipalComp", rotation = "none", model = "Probablistic PCA"))
}



#' Robust Principal Components Analysis (Filzmoser-Maronna-Werner method)
#'
#' @description This applies the outlier detection method of Filzmoser, Maronna, and Werner (2008) to
#' obtain weights, which are used to construct a weighted covariance matrix which is in turn used
#' for principal components analysis.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param control a list of control options for the outlier identification step. usually these will not need to be changed.
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @references
#' Filzmoser, P., Maronna, M., & Werner., M. (2008) Outlier identification in high dimensions, Computational Statistics and Data Analysis, 52, 1694-1711.
#'
pcaRobust <- function (x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, control = list(explvar = 0.96, crit.M1 = 0.33, crit.c1 = 2.5, crit.M2 = 0.25, crit.c2 = 0.975, cs = 0.25, outbound = 0.25)) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)

  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  columnNames <- colnames(x)
  if (scale) x <- scale(x)
  explvar <- control$explvar
  crit.M1 <- control$crit.M1
  crit.c1 <- control$crit.c1
  crit.M2 <- control$crit.M2
  crit.c2 <- control$crit.c2
  cs <- control$cs
  outbound <- control$outbound
  p <- ncol(x); n <- nrow(x)
  L1med <- L1median(x)$center

  scalefun <- function(x, mu0){
    u <- x - mu0
    s0 <- mean(abs(u)) * sqrt(pi/2)
    err <- 1000
    it <- 0
    while ((err > 1e-4) && (it < 50)) {
      it <- it + 1
      s1 <- sqrt(s0^2 * mean(rho.Bisquare(u/s0, 1.547645)/0.50))
      err <- abs(s1 - s0)/s0
      s0 <- s1
    }
    return(s0)
  }

  x.mad = sapply(1:ncol(x), function(i) scalefun(x[,i], L1med[i]))
  x.sc <- sweep(sweep(x,2,L1med),2,x.mad,"/")
  x.svd <- wsvd(scale(x.sc, TRUE, FALSE))
  a <- x.svd$d^2/(n - 1)
  if (explvar==1){p1<-p}else{p1 <- (1:p)[(cumsum(a)/sum(a) > explvar)][1]}
  p1 <- ncomp
  x.pc <- x.sc %*% as.matrix(x.svd$v[, 1:p1])
  L1med.pc <- L1median(x.pc)$center
  xpc.sc <- scale(x.pc, L1med.pc, sapply(1:ncol(x.pc), function(i) scalefun(x.pc[,i], L1med.pc[i])))
  wp <- abs(apply(xpc.sc^4, 2, mean) - 3)
  xpcw.sc <- xpc.sc %*% diag(wp/sum(wp))

  xpc.norm <- sqrt(apply(xpcw.sc^2, 1, sum))
  x.dist1 <- xpc.norm * sqrt(qchisq(0.5, p1))/hdmedian(xpc.norm)
  M1 <- hdquantile(x.dist1, crit.M1)
  const1 <- hdmedian(x.dist1) + crit.c1 * scalefun(x.dist1, hdmedian(x.dist1))
  w1 <- (1 - ((x.dist1 - M1)/(const1 - M1))^2)^2
  w1[x.dist1 < M1] <- 1
  w1[x.dist1 > const1] <- 0
  xpc.norm <- sqrt(apply(xpc.sc^2, 1, sum))
  x.dist2 <- xpc.norm * sqrt(qchisq(0.5, p1))/hdmedian(xpc.norm)
  M2 <- sqrt(qchisq(crit.M2, p1))
  const2 <- sqrt(qchisq(crit.c2, p1))
  w2 <- (1 - ((x.dist2 - M2)/(const2 - M2))^2)^2
  w2[x.dist2 < M2] <- 1
  w2[x.dist2 > const2] <- 0
  wfinal <- (w1 + cs) * (w2 + cs)/((1 + cs)^2)
  wfinal01 <- round(wfinal + 0.5 - outbound)
  wcov <- cov.wt(x, wt = wfinal, method = "ML")
  dist <- mahalanobis_dist(x, wcov$center, wcov$cov)
  const3 <- hdmedian(dist)/(qchisq(0.5, p))
  wcov$cov <- wcov$cov * const3
  eig <- eigen(wcov$cov, symmetric = TRUE)
  if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
  out <- list()

  out$values <- eig$values
  out$total.var <- sum(out$values)
  ncomp <- min(ncomp, sum(zapsmall(out$values, 6)>0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 8)[1:ncomp]
  out$values <- out$values[1:ncomp]; out$vectors <- as.matrix(eig$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- out$vectors %*% diag(sqrt(out$values))
  else
    out$loadings <- out$vectors * sqrt(out$values)

  out$scores <-  x %*% out$vectors
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$center <- wcov$center
  out$cov <- wcov$cov
  out$dist <- dist
  out$weights <- wfinal
  out$outliers <- which(wfinal01==0)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$vectors <- out$vectors[,1:ncomp]
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Robust PCA"))
}

#' Spherical Principal Components Analysis
#'
#' @description This function implements the spherical principal components analysis method of Locantore et al. (1999)
#' to conduct PCA while downweighting large outliers. It is one of the earliest attempts at producing a robust PCA method,
#' but despite the development of more sophisticated methods, this works quite well (Maronna, 2005). The method makes use
#' of the multivariate spatial median, also known as the multivariate L1-median, or just multivariate median. The spatial
#' median minimizes the sum of euclidean distances, and hence gives the coordinates for the center of the hypershpere (hence
#' the origin of the name spherical principal components analysis): \cr
#'
#' \strong{\deqn{M_hat = arg min \Sigma ||x_i - M||}}
#' \cr
#'
#' It differs from the vector of univariate medians, called the marginal multivariate median or componentwise median. The componentwise
#' median is not invariant to transformations of the data, while the spatial median of a data set corresponds to
#' the back-transformed spatial median of a transformed data set. The spatial median is unique when the data are not perfectly collinear, and when the data are collinear, multiple
#' solutions (however all valid) may exist (Milasevic & Ducharme, 1987). The spatial median has a bounded
#' influence function and a breakdown point of 50\%. \cr
#'
#' Now that the nature of the spatial median has been made clear, the motivation behind spherical PCA can be made clear.
#' The distance of a data point from the spatial median is known as its spatial sign rank. The spatial signs are thus
#' a transformation/standardization of the data in unit-hypersphere coordinates. Due to the robustness of the spatial median,
#' multivariate outliers are easily identifiable, and do not negatively influence the estimate of the principal components for
#' the "clean" data. While the eigenvectors enjoy this benefit and are consistent estimates of the population eigenvalues
#' of a data set, the eigenvalues are not consistent estimators. However, this is easily remedied by using a robust scale estimator
#' to calculate the standard deviations for the PCA scores, then squaring these to obtain the variances (which are the eigenvalues).
#' The method is very fast as well, even for larger data sets.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param evest the scale estimator used to provide consistent estimates of the
#' population eigenvalues. one of "tau" (the default), "bisq" (bisquare), or "pb"
#' (percentage bend).
#' @return a pcaSphere object containing eigenvalues, eigenvectors, component loadings, component scores, and spatial median.
#' @export
#'
#' @references
#' Locantore, N., Marron, J., Simpson, D, Tripoli, N., Zhang, J., Cohen, K. (1999) Robust principal component analysis for functional data. Sociedad de Estadistica e Investigacion Operativa Test. 8: 1. https://doi.org/10.1007/BF02595862 \cr \cr
#' Maronna, R.A. (2005) Principal components and orthogonal regression based on robust scales, Technometrics, 47, 264–273 \cr \cr
#' Milasevic, P. & Ducharme, G. R. (1987) Uniqueness of the Spatial Median. Ann. Statist. 15(3) 1332-1333. doi:10.1214/aos/1176350511. \cr \cr
#' @examples
#' pcaSphere(x, 3)
#'
pcaSphere <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, evest = c("tau", "bisq", "pb")){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  evest <- match.arg(evest)
  evest <-  which(c("tau", "bisq", "pb") == evest)
  columnNames <- colnames(x)
  if (scale) x <- scale(as.matrix(as.data.frame(x)), T, T) else x <- scale(as.matrix(as.data.frame(x)), T, F)
  out <- cvreg:::SPC(x, scale = evest)
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  out$values <- as.vector(out$values)
  out$weights <- as.vector(out$weights)
  out$total.var <- sum(out$values)

  # filter out eigenvectors with eigenvalues <= 0
  ncomp <- min(ncomp, sum(zapsmall(out$values, 6)>0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)[1:ncomp]
  out$values <- out$values[1:ncomp]
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- out$vectors %*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(out$vectors * sqrt(out$values))

  out$scores <- as.matrix(x %*% out$vectors)
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)

  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Spherical PCA"))
}



#' L1 Principal Components Analysis
#'
#' @description Uses the L1 singular value decomposition in the function \code{\link{svdL1}} to construct an L1-norm
#' set of principal components. L1 PCA can be more resistant to outliers, as well as often has intrinsic sparsity
#' in the loadings matrix. The raw L1 eigenvectors are orthonormalized prior to computation
#' of the principal components due to the L1 singular value decomposition not being
#' orthnormal. However, the non-orthogonalized L1-PCA results can be returned by setting the argument
#' orth = FALSE. See the  \code{\link{pcaR1}} function for an intrinsically orthogonal L1-norm
#' based method.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param interp should values be interpolated during the calculation of L1 weighted svd? Since
#' the calculations entail calculating weighted medians, and weighted medians are not always unique,
#' this will smooth the values as to obtain a unique solution. this can come at the cost of accuracy,
#' however, as well as substantially slow down both the rate of convergence
#' and rate of calculations at each iteration, so the default is FALSE. However,
#' if convergence problems occur with this option set to FALSE, it may be worth trying
#' interp=TRUE.
#' @param orth should the L1-eigenvectors be orthogonalized using a QR decomposition? the default
#' is TRUE. setting to FALSE returns loadings and scores based on the raw L1 eigenvectors.
#' @param maxit the maximum number of iterations for the L1-SVD algorithm. defaults to 500.
#' @param tol convergence tolerance. defaults to 1e-6
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @references Hawkins, D.M.; Liu, L.; Young, S.S. (2001) Robust Singular Value Decomposition, National Institute of Statistical Sciences, Technical Report Number
#'
#' @examples
#' pcaL1(x, 3)
#'
pcaL1 <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = T, interp = F, orth = T, maxit = 500, tol = 1e-6){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  columnNames <- colnames(x)
  center <- L1median(x)$center
  x <- sweep(x, 2,center, "-")
  if (scale){
    scales <- sapply(1:ncol(x),function(i)mean(abs(x[,i]-center[i]))*1.2533)
    x <- sweep(x,2,scales,"/")
  }
  out <- svdL1(x, maxit = maxit, tol = tol, interp = interp)
  out$values <- (out$d/sqrt(nrow(x)-1))^2;
  out$vectors <- out$v; out$v <- NULL; out$u <- NULL
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  if (orth){
    out$vectors <- try(qb(out$vectors,rand=F)$Q,silent=T)
    if (class(out$vectors)=="try.error"){
      out$vectors <- try(graham.schmidt(out$vectors)$Q,silent=T)
      if (class(out$vectors)=="try.error"){
        out$vectors <- try(qr.Q(qr(out$vectors)),silent=T)
        if (class(out$vectors)=="try.error"){
          out$vectors <- svd(out$vectors)$u
          }
        }
    }
    if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  }
  out$total.var <- sum(out$values)

  # filter out eigenvectors with eigenvalues <= 0
  ncomp <- min(ncomp, sum(zapsmall(out$d, 6) > 0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)[1:ncomp]
  out$values <- out$values[1:ncomp]
  out$vectors  <- as.matrix(out$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- out$vectors %*% diag(sqrt(out$values))
  else
    out$loadings <- out$vectors * sqrt(out$values)

  out$scores <- as.matrix(x %*% out$vectors); out$d <- NULL
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "L1 PCA"))
}


#' Rotation Invariant L1-Principal Components Analysis
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param maxit the maximum number of iterations for the L1-SVD algorithm. defaults to 500.
#' @param tol convergence tolerance. defaults to 1e-8
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @references Hawkins, D.M.; Liu, L.; Young, S.S. (2001) Robust Singular Value Decomposition, National Institute of Statistical Sciences, Technical Report Number
#'
#' @examples
#' pcaR1(x, 3)
#'
pcaR1 <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = T, maxit = 500, tol = 1e-8) {

  wtfun <- function(x, V, c){
    if(norm(x-tcrossprod(V)%*%x, type="2") <= c) {return(1)}
    else { return(c/norm(x-tcrossprod(V)%*%x, type="2"))}
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  center <- L1median(x)$center
  #center <- apply(x, 2, hdmedian)
  xc <- sweep(x, 2, center)
  if (scale){
    scales <- sapply(1:ncol(xc), function(i) sqrt(mean(xc[,i]^2)))
    xc <- sweep(x,2,scales,"/")
  }
  I <- diag(p)
  V <- wsvd(crossprod(xc)/n)$v; if (sum(sign(V[,1]))<=0){V <- V * -1}
  s <- sapply(1:n, function(i) sqrt(max(0, (xc[i,] %*% ((I-tcrossprod(V,V))%*%xc[i,]))[1])))
  c <- hdmedian(s)
  iter <- 0; normdiff <- Inf; Vold <- V
  while (iter < maxit && normdiff > tol) {
    q <- sapply(1:n,function(i) wtfun(xc[i,],Vold,c))
    W <- crossprod(xc, (xc * q))
    Vnew <- qb(W %*% Vold,rand=F)$Q
    if (sum(sign(Vnew[,1]))<=0){Vnew <- Vnew * -1}
    iter <- iter + 1
    normdiff <- norm(Vnew - Vold)
    Vold <- Vnew
  }

  values <- apply(xc%*%Vold, 2, var)
  ord <- order(values, decreasing = TRUE)
  values <- values[ord]; Vold <- Vold[,ord]
  total.var <- sum(values)
  loadings <- Vold %*% diag(sqrt(values)); loadings <- as.matrix(loadings[,1:ncomp])
  vectors <- as.matrix(Vold[,1:ncomp])
  values <- values[1:ncomp]
  colnames(vectors) <- colnames(loadings) <- paste0("PC", 1:ncomp)
  rownames(vectors) <- rownames(loadings) <- columnNames
  cum.exp <- cumsum(values)/total.var
  scores <- as.matrix(xc %*% vectors); colnames(scores) <- paste0("PC", 1:ncomp)
  orthdist <- .orthdist(xc, loadings)
  scoredist <- .scoredist(scores, values)
  return(structure(list(values=values, loadings=loadings, vectors=vectors, scores=scores,
                        cum.exp=cum.exp, orthdist=orthdist, scoredist=scoredist),
            class = "PrincipalComp", rotation = "none", model = "R1 PCA"))
}



#' Curvilinear Principal Components Analysis
#'
#' @description This performs a curvilinear component analysis, which is a method of PCA that is capable of
#' capturing non-linear trends in the data. The algorithm takes an initial set of scores as a starting point,
#' along with a distance matrix, to unfold the multidimensional manifold. Then a neural network is used to minimize
#' a cost function to find a projection of the input data which function as principal component scores. In this
#' function the eigenvalues and loadings matrix are approximated by matrix-multiplying the transposed data x by the
#' transposed pseudoinverse of the scores. The original data set is then reconstructed from this approximated loadings
#' matrix and set of scores and the eigenvalues of this reconstruction are returned as the approximate eigenvalues. This
#' is simply to facilitate plotting and ease of interpretation, but do keep in mind the eigenvalues and loadings matrix
#' returned here are approximations.
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of desired principal components.
#' @param init linear component scores to initialize the nonlinear estimator. can be one of "mds", "pca",  or "pcaSphere".
#' "mds" uses the cmdscale function to compute the classical multidimensional scaling. alternatively, a matrix of component scores
#' with the number of columns equal to 'ncomp' can be supplied.
#' @param lambda the initial influence radius. if left as NULL, it defaults to twice the maximum of the vector of
#' absolute average deviations for each component.
#' @param alpha initial learning rate. defaults to 0.5.
#' @param Lp a norm for the Minkowski distance metric. defaults to 2.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param maxit maximum number of iterations. defaults to 1000.
#' @param tol convergence tolerance. defaults to 1e-12.
#' @return an object of class PrincipalComp
#' @export
#'
#' @examples
#' pcaCurve(x)
#'
#' @references
#' Demartines, P.; Herault, J. (1997) Curvilinear component analysis: a self-organizing neural network for nonlinear mapping of data sets. IEEE Transactions on Neural Networks archive. 8, 1, 148-154.
#'
pcaCurve = function(x, ncomp = min(nrow(x)-2, ncol(x)-1), init = c("mds", "pca", "pcaSphere"),
                    lambda = NULL, alpha = 0.5, Lp = 2, scale = T, maxit = 1000, tol = 1e-12){

  columnNames <- colnames(x)
  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  if (!exists(".Random.seed", mode="numeric", envir=globalenv()))
    sample(NA);
  oldSeed <- get(".Random.seed", mode="numeric", envir=globalenv());
  set.seed(666)
  vss <- sample(seq(0, nrow(x)-1, len = nrow(x)), maxit, replace = T)
  assign(".Random.seed", oldSeed, envir=globalenv());
  minkowski.dist <- cvreg:::minkowski_dist(x, Lp)


  if (is.character(init) || is.vector(init)){
    init <- match.arg(init)
    if (init == "mds"){
      init <- cmdscale(minkowski.dist, k = ncomp)
      ord <- order(apply(init, 2, var), decreasing = T)
      init <- as.matrix(init[,ord])
    }
    else if (init == "pca"){
      init <- as.matrix(pca(x, ncomp, scale = scale)$scores)
      total.var <- sum(apply(init,2,var));
    }
    else{
      init <- as.matrix(pcaSphere(x, ncomp, scale = scale)$scores)
      total.var <- sum(apply(init,2,var))
    }
  }
  else if (is.matrix(init) || is.data.frame(x)){
     init <- as.matrix(init)
  }

  ncomp <- ncol(init)
  if (is.null(lambda))
    lambda <- 2*max(apply(init, 2, aad))

  out = list()
  out$scores = cvreg:::CPCAcpp(minkowski.dist,
                               Yinit = init,
                               lambda = lambda,
                               alpha = alpha,
                               p = Lp,
                               maxiter = maxit,
                               tolerance = tol,
                               vs = vss)

  out$scores <- as.matrix(out$scores)
  out$values <- apply(out$scores,2,var)
  out$vectors <- as.matrix(crossprod(scale(x), pseudoinverse(t(scale(out$scores)))))
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1; out$scores <- out$scores * -1}
  if (ncomp > 1)
    out$loadings <- as.matrix(out$vectors[,1:ncomp])%*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(out$vectors) * sqrt(out$values)

  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- colnames(out$loadings) <- colnames(out$vectors) <- paste0("CC", 1:ncol(out$loadings))
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Curvilinear PCA"))
}

#' Locally Principal Components Analysis
#'
#' @description Locally principal components analysis is a variant of PCA that is useful for data sets with clustered
#' observations (Yang, Zhang, & Yang, 2006). LPCA begins with individual observations and finds the structure of the data
#' by considering the data points within a certain distance δ. In this implementation the Minkowski distance is used to
#' find the distances. The Minkowski distance is a generalized distance measure that bridges L1 and L2 norms, as well as
#' higher than L2. This makes it a flexible distance metric that encompasses Manhattan distance and Euclidean distance.
#' This distance based nearest neighbors strategy is used to build an adjacency matrix, which is converted to the Laplacian
#' Graph representation. This is then  subjected to eigendecomposition, and the eigenvectors P are multiplied by the data x
#' to obtain the matrix XP. Next, the crossproduct of XP is used to calculate the rotation matrix R, ie, R = XP'XP, and the
#' data are projected onto a new basis. A more complete description of the algorithm can be found in the cited paper.
#' Note that the new basis can have more columns than it does variables, and that the eigenvalues
#' are not ordered due to the local nature of the components.
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of components to extract.
#' @param pct this determines the percentage of the distance matrix whose
#' edges will be non-null. the default is 0.15.
#' @param Lp a norm for the Minkowski distance metric. defaults to 2.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @return an object of class PrincipalComp
#' @export
#'
#' @examples
#' pcaLocal(x, 3)
#'
#' @references
#' Yang, J., Zhang, D., & Yang, J. (2006). Locally principal component learning for face representation and recognition. Neurocomputing, 69(13-15), 1697–1701. doi:10.1016/j.neucom.2006.01.009
#'
pcaLocal = function(x, ncomp = min(nrow(x)-1, ncol(x)), pct = 0.15, Lp = 2, scale = T){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)) ncomp = sum(eigen(crossprod(x)/nrow(x))$values > 0)
  graph.mat = .distgraph(x, pct, Lp)
  H <- symm(1*(graph.mat$mask));
  L = diag(rowSums(H))-H
  eigL = .adjEigLaplace(eigen(L))
  Lval = eigL$values^0.50
  Pl   = (eigL$vectors)%*%diag(Lval)
  R  = crossprod(Pl,x)%*%crossprod(x,Pl)
  S = crossprod(x,Pl)%*%crossprod(Pl,x)
  topEigen <- function(x, n){
    e = eigen(x)
    n = sum(e$values > 0)
    list(values = e$values[1:n], vectors = e$vectors[,1:n], ncomp = n)
  }
  topR = topEigen(R, ncomp)
  topR$vectors <- as.matrix(topR$vectors)
  out = list()
  out$vectors <- as.matrix(.adjLoadings(crossprod(x, Pl)%*%(topR$vectors%*%diag(1/sqrt(topR$values)))))
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  out$scores <- x%*%out$vectors
  out$values <- apply(out$scores,2,aad)^2
  out$total.var <- sum(out$values)
  ord <- order(out$values, decreasing = TRUE)
  ncomp <-min(ncomp, topR$ncomp)
  out$values <- out$values[1:ncomp]
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- as.matrix(out$vectors)%*% diag(sqrt(out$values))
  else
    out$loadings <- out$vectors * sqrt(out$values)
  out$scores <- as.matrix(out$scores[,1:ncomp])
  colnames(out$scores) <-colnames(out$vectors) <- colnames(out$loadings) <- paste0("LC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Local PCA"))
}



#' Sparse Principal Components Analysis
#'
#' @description This function is based on the original paper by Zou, Hastie, and Tibsharini (2006) where an elastic net
#' formulation of principal components analysis was demonstrated.
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of components to extract.
#' @param alpha the elastic net mixing parameter, which can take values of \eqn{0 \le \alpha \ge 1}.
#' @param lambda the shrinkage parameter. as in the elastic net, the L1 shrinkage penalty is \eqn{\lambda_1 = \alpha * \lambda},
#' and the L2 shrinkage penalty is \eqn{\lambda_2 = (1-\alpha) * \lambda}.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param max.iter maximum number of iterations
#' @param tol tolerance for convergence
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @examples
#' pcaSparse(x, 3, 0.5, 0.12)
#'
#' @references
#' Zou, H.; Hastie, T.; Tibshirani, R. (2006). Sparse principal component analysis. Journal of Computational and Graphical Statistics. 15 (2): 262–286. doi:10.1198/106186006x113430.
#'

pcaSparse <- function(x, ncomp = min(nrow(x)-1, ncol(x)), alpha = 0.75, lambda = 1e-4, scale = T, max.iter = 1000, tol=1e-16) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  if (alpha > 1){
    alpha <- 1
    purple <- crayon::make_style(rgb(0.431,0.055,0.616))
    cat(purple("Alpha cannot be greater than 1. Automatically correcting to alpha = 1."))
  }

  if (alpha < 0){
    alpha <- 0
    violet <- crayon::make_style(rgb(0.62,0.06,0.48))
    cat(violet("Alpha cannot be negative. Automatically correcting to alpha = 0."))
  }

  x <- as.matrix(x)
  if (any(is.na(x))) {
    warning("Missing values are omitted: na.omit(x).")
    x <- stats::na.omit(x)
  }
  n <- nrow(x)
  p <- ncol(x)

  if(is.null(ncomp)) ncomp <- min(n,p)
  if(ncomp > min(n,p)) ncomp <- min(n,p)
  if(ncomp<1) stop("Target rank is not valid!")
  svd_init <- cvreg::wsvd(x, tol = 1e-100)
  if (sum(sign(svd_init$v[,1]))<=0){svd_init$v <- svd_init$v * -1}
  Dmax <- svd_init$d[1]
  A <- as.matrix(svd_init$v[,1:ncomp])
  B <- as.matrix(svd_init$v[,1:ncomp])
  V <- as.matrix(svd_init$v)
  VD = sweep(V, MARGIN = 2, STATS = svd_init$d, FUN = "*", check.margin = TRUE)
  VD2 = sweep(V, MARGIN = 2, STATS = svd_init$d^2, FUN = "*", check.margin = TRUE)

  lambdaL1 <- alpha * lambda
  lambdaL2 <- (1-alpha) * lambda
  lambdaL1 <- lambdaL1 *  Dmax^2
  lambdaL2 <- lambdaL2 * Dmax^2
  nu <- 1.0 / (Dmax^2 + lambdaL2)
  kappa <- nu * lambdaL1
  obj <- c();change <- Inf;iter<- 1
  while (iter<= max.iter && change > tol) {
    Z <- VD2 %*% crossprod(V, B)
    svd_update <- wsvd(Z);
    if (sum(sign(svd_update$v[,1]))<=0){
      svd_update$v <- svd_update$v * -1
      svd_update$u <- svd_update$u * -1
    }
    A <- tcrossprod(svd_update$u, svd_update$v)
    grad <- VD2 %*% crossprod(V, (A-B)) - lambdaL2 * B
    B_temp <- B + nu * grad
    # lasso penalty
    idxH <- which(B_temp > kappa)
    idxL <- which(B_temp <= -kappa)
    B = matrix(0, nrow = nrow(B_temp), ncol = ncol(B_temp))
    B[idxH] <- B_temp[idxH] - kappa
    B[idxL] <- B_temp[idxL] + kappa
    # compute errors
    R <- t(VD) - tcrossprod(crossprod(VD, B), A)
    # evaluate elastic net objective function
    obj <- c(obj, 0.5 * sum(R^2) + lambdaL1 * sum(abs(B)) + 0.5 * lambdaL2 * sum(B^2))
    # check for convergence
    if(iter> 1){change <- (obj[iter-1] - obj[iter]) / obj[iter]}
    iter<- iter+ 1
  }

  out = list()
  if (sum(sign(B[,1]))<=0){B <- B * -1; A <- A * -1}
  out$values <- svd_update$d[1:ncomp]/(nrow(x)-1)
  out$scores <- as.matrix(x %*% B)
  if (ncomp > 1)
    out$loadings <- as.matrix(B) %*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(B) * sqrt(out$values)

  out$vectors <- as.matrix(B)
  out$objective <- obj

  colnames(out$scores) <- paste0("PC", 1:ncomp)
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames

  out$total.var <- sum(apply(Re(x), 2, stats::var))
  out$cum.exp <- cumsum(apply(out$scores, 2, var)/ out$total.var)
  class(out) <- "PrincipalComp"
  attr(out, "rotation") <- "none"
  attr(out, "model") <- "Sparse PCA"
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(out)
}


#' Sparse Robust Principal Principal Components Analysis
#'
#' @description This function is based on the original paper by Zou, Hastie, and Tibsharini (2006) where an elastic net
#' formulation of principal components analysis was demonstrated. The algorithm can be initialized using one of two methods.
#' The first option is the robust principal components method in the \code{\link{pcaRobust}} function which utilizes the method
#' of Filzmoser, Maronna, and Werner (2008).  The second option is spherical principal components analysis (Locantore et al., 1999)
#' as suggested by Maronna (2005). After the initial principal components analysis, the elastic net penalization is applied to this
#' initial fit. The elastic net objective function is modified such that the sum of squared errors is weighted utilizing a bisquare
#' function, leading to the objective function \eqn{0.5 * \sum_i{\epsilon_i * w_i} + \sum(|B|) * \lambda_1 + 0.5 * \lambda_2 * \sum(B^2)}.
#'
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of components to extract.
#' @param alpha the elastic net mixing parameter, which can take values of \eqn{0 \le \alpha \ge 1}.
#' @param lambda the shrinkage parameter. as in the elastic net, the L1 shrinkage penalty is \eqn{\lambda_1 = \alpha * \lambda},
#' and the L2 shrinkage penalty is \eqn{\lambda_2 = (1-\alpha) * \lambda}.
#' @param delta the desired breakdown point. defaults to 0.50, which is the maximum.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param max.iter maximum number of iterations
#' @param tol tolerance for convergence
#'
#' @return a PrincipalComp object
#' @export
#'
#' @examples
#' pcaRobSparse(x, 3, 0.5, 0.12)
#'
#' @references
#' Hawkins, D.M., Liu, L., & Young, S.S. (2001) Robust Singular Value Decomposition, National Institute of Statistical Sciences, Technical Report Number \cr \cr
#' Locantore, N., Marron, J., Simpson, D, Tripoli, N., Zhang, J., & Cohen, K. Robust principal component analysis for functional data. (1999) Sociedad de Estadistica e Investigacion Operativa Test. 8: 1. https://doi.org/10.1007/BF02595862 \cr \cr
#' Zou, H., Hastie, T., & Tibshirani, R. (2006). Sparse principal component analysis. Journal of Computational and Graphical Statistics. 15 (2): 262–286. doi:10.1198/106186006x113430. \cr \cr
#' Maronna, R.A. (2005) Principal components and orthogonal regression based on robust scales, Technometrics, 47, 264–273 \cr \cr
#' Filzmoser, P., Maronna, M., & Werner., M. (2008) Outlier identification in high dimensions, Computational Statistics and Data Analysis, 52, 1694-1711.
#
pcaRobSparse = function(x, ncomp = min(nrow(x)-1, ncol(x)), alpha = 0.75, lambda = 1e-4, delta = 0.50, init = c("robust", "sphere"), scale = T, max.iter = 200, tol=1e-16) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  init <- match.arg(init)
  wts <- function(r){ return((1-r)^2*(r<=1))}
  original.means <- colMeans(x)
  x <- sweep(x,2,original.means)
  if (alpha > 1){
    alpha <- 1
    purple <- crayon::make_style(rgb(0.431,0.055,0.616))
    cat(purple("Alpha cannot be greater than 1. Automatically correcting to alpha = 1."))
  }

  if (alpha < 0){
    alpha <- 0
    violet <- crayon::make_style(rgb(0.62,0.06,0.48))
    cat(violet("Alpha cannot be negative. Automatically correcting to alpha = 0."))
  }

  x <- as.matrix(x)
  if (any(is.na(x))) {
    warning("Missing values are omitted: na.omit(x).")
    x <- stats::na.omit(x)
  }
  n <- nrow(x)
  p <- ncol(x)

  if(is.null(ncomp)) ncomp <- min(n,p)
  if(ncomp > min(n,p)) ncomp <- min(n,p)
  if(ncomp<1) stop("Target rank is not valid!")
  if (init == "pcaSphere"){
    svd_init <- pcaSphere(x, ncomp = min(n-1,p), evest = "bis")
  }
  else{
    svd_init <- pcaRobust(x, ncomp = min(n-1,p))
  }
  svd_init$d <- sqrt(svd_init$values) * sqrt(nrow(x))
  mu0 <- svd_init$center
  fit <- scale(x%*%svd_init$loadings%*%t(svd_init$loadings), center = -mu0, FALSE)
  R <- colSums(t(x-fit)^2)
  sig0 <- .biScalePCA(sqrt(R), delta = delta, c = 1)^2
  ww <- wts(R/sig0)
  Dmax <- svd_init$d[1]
  A <- as.matrix(svd_init$vectors[,1:ncomp])
  B <- as.matrix(svd_init$vectors[,1:ncomp])
  V <- as.matrix(svd_init$vectors)

  VD = sweep(V, MARGIN = 2, STATS = svd_init$d, FUN = "*", check.margin = TRUE)
  VD2 = sweep(V, MARGIN = 2, STATS = svd_init$d^2, FUN = "*", check.margin = TRUE)

  lambdaL1 <- alpha * lambda
  lambdaL2 <- (1-alpha) * lambda
  lambdaL1 <- lambdaL1 *  Dmax^2
  lambdaL2 <- lambdaL2 * Dmax^2
  nu <- 1.0 / (Dmax^2 + lambdaL2)
  kappa <- nu * lambdaL1
  obj <- c();change <- Inf;iter<- 1
  repre <- x%*%B
  Xcen <- x

  while (iter<= max.iter && change > tol) {

    # Update mu and data
    mu=colSums(Xcen*ww)/sum(ww)
    Xcen=scale(Xcen, center=mu, scale=FALSE)
    Z <- VD2 %*% crossprod(V, B)
    svd_update <- wsvd(Z)
    A <- tcrossprod(svd_update$u, svd_update$v)
    grad <- VD2 %*% crossprod(V, (A - B)) - lambdaL2 * B
    B_temp <- B + nu * grad
    # lasso penalty
    idxH <- which(B_temp > kappa)
    idxL <- which(B_temp <= -kappa)
    B = matrix(0, nrow = nrow(B_temp), ncol = ncol(B_temp))
    B[idxH] <- B_temp[idxH] - kappa
    B[idxL] <- B_temp[idxL] + kappa
    # calculate fit
    fit=Xcen%*%B%*%t(B)
    # compute errors
    Rprime <- Xcen - fit
    R <- colSums(Rprime^2)
    sig2 <- .biScalePCA(sqrt(R), delta = delta, c = 1)^2
    # evaluate elastic net objective function
    obj <- c(obj, 0.5 * (length(R)*sig2) + lambdaL1 * sum(abs(B)) + 0.5 * lambdaL2 * sum(B^2))
    repre <- Xcen%*%B
    ww=wts(R/sig2)
    # check for convergence
    if(iter> 1){change <- (obj[iter-1] - obj[iter]) / obj[iter]}
    iter<- iter+ 1
  }

  out = list()
  if (sum(sign(B[,1]))<=0){B <- B * -1; A <- A * -1}
  out$values <- (svd_update$d[1:ncomp]/(max(1, nrow(x))))
  out$vectors <- as.matrix(B)
  if (ncomp > 1)
    out$loadings <- as.matrix(B)  %*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(B) * sqrt(1/out$values)
  out$scores <- as.matrix(x %*% B)

  out$objective <- obj
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  robvar <- function(u){.biScalePCA(u, delta = delta, c = 1)^2}

  out$total.var <- sum(apply(Re(x), 2, var))
  out$cum.exp <- cumsum(apply(out$scores, 2, var)/ out$total.var)
  class(out) <- "PrincipalComp"
  out$center <- mu
  attr(out, "rotation") <- "none"
  attr(out, "model") <- "Robust Sparse PCA"
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(out)
}


#' Mixed Principal Components Analysis
#'
#' @description This function allows you to integrate information about data structure contained
#' in categorical variables in a principal components analysis.
#' @param x a matrix or data frame containing only numeric variables
#' @param cat the categorical variables. must be coded as factors or characters, not numeric dummy variables.
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @return an object of class PrincipalComp
#' @export
#' @examples
#' x <- Alzheimers[,-c(1,3)] # Get rid of categorical variables for the PCA.
#' cats <- Alzheimers[,1:3] # Put categorical variables here for the PCA
#' pcaMix(x, cats) # Run pcaMix
#'
pcaMix <- function(x=NULL,cat=NULL,ncomp=min(nrow(x)-1,ncol(x)+ncol(cat)),scale=T){


  if (ncomp <= 1){
    ncomp <- 2
    cat(.crimson(("The number of extracted components must be at least 2. Automatically setting 'ncomp = 2'")))
  }

  if (scale) x <- scale(x)
  if (is.null(ncomp)) ncomp <- min(nrow(x)-1,ncol(x)+ncol(cat))
  cl <- match.call()
  rec <- .recodeData(x,cat,T)
  n <- rec$n; p <- rec$p; p1 <- rec$p1; p2 <- p - p1
  X <- rec$X; W <- rec$W; m <- ncol(W) - p1; indexj <- rec$indexj

  #construct the metrics
  N <- rep(1/n, n); M1 <- M2 <- NULL; P1 <- P2 <- NULL
  if (p1!=0) {M1 <- rep(1,p1); P1 <- rep(1,p1)}
  if (p2!=0){ns <- apply(rec$G, 2, sum); M2 <- n/ns; P2 <- rep(1,m)}
  M <- c(M1,M2); P <- c(P1,P2); M.col <- P*M; names(M.col) <- colnames(W)
  e <- .gsvd(W, N, M.col); V.total.dim <- e$V; U.total.dim <- e$U; d.total.dim <- e$vs

  q <- qr(W)$rank
  ncomp <- min(ncomp, q)
  values <- e$vs[1:q]^2
  prop.exp <- values/sum(values, na.rm = T)
  cum.exp <- cumsum(values)/sum(values, na.rm = T)
  d <- e$vs[1:ncomp]

  #scores
  U <- e$U[, 1:ncomp,drop=FALSE]
  rownames(U) <- rownames(W)
  colnames(U) <- paste0("PC", 1:ncomp)
  scores <- sweep(U,2,d,"*")
  F.total.dim <- sweep(U.total.dim,2,d.total.dim,"*")
  contrib.ind<-scores^2/n
  cos2.ind <- sweep(scores^2, 1, apply(F.total.dim, 1, function(v) {return(sum(v^2))}), "/")
  result.ind <- list(coord = scores, contrib = contrib.ind, cos2 = cos2.ind)

  #loadings and contributions
  A1 <- A2 <- NULL
  C1 <- C2 <- NULL
  contrib.x <- contrib.cat <- NULL
  V <- e$V[, 1:ncomp,drop=FALSE]
  rownames(V) <- colnames(W)
  colnames(V) <- paste0("PC", 1:ncomp)

  if(p1 >0){
    V1 <- V[1:p1, ,drop=FALSE]
    V1.total.dim <- V.total.dim[1:p1, ,drop=FALSE]
    A1 <- sweep(V1,2,d,"*")
    A1.total.dim <- sweep(V1.total.dim,2,d.total.dim,"*")
    contrib.x <- sweep(A1^2, 1, M1*P1, "*")
    contrib.x.pct <- sweep(contrib.x, 2, d^2, "/")
    cos2.x <- sweep(A1^2, 1, apply(A1.total.dim,1,function(v){sum(v^2)}), "/")
  }
  if(p2 >0){
    V2 <- V[(p1 + 1):(p1 + m), ,drop=FALSE]
    V2.total.dim <- V.total.dim[(p1 + 1):(p1+m), ,drop=FALSE]
    A2 <- sweep(V2,2,d,"*")
    A2 <- sweep(A2,1,M2,"*")
    A2.total.dim <- sweep(V2.total.dim,2,d.total.dim,"*")
    A2.total.dim <- sweep(A2.total.dim,1,M2,"*")
    contrib.cat.measure <- sweep(A2^2, 1, ns/n, "*")
    cos2.cat <- sweep(A2^2, 1, apply(A2.total.dim, 1, function(v) {return(sum(v^2))}), "/")
    contrib.cat <-matrix(NA,p2,ncomp)
    rownames(contrib.cat) <- colnames(cat)
    colnames(contrib.cat) <- paste("PC", 1:ncomp, sep = " ")
    for (j in 1:p2){
      contrib.cat[j,] <- apply(contrib.cat.measure[which(indexj == (j+p1))-p1, ,drop=FALSE], 2, sum)
    }
    contrib.cat.pct<-sweep(contrib.cat, 2, d^2, "/")
  }

  sqload <- rbind(contrib.x, contrib.cat) #correlation ratio
  cat.eta2 <- NULL
  if (p2 > 0) cat.eta2 <- sqload[(p1+1):(p1+p2),,drop=FALSE]

  #organization of the results
  result.x <- result.levels <- result.cat <- NULL
  if (p1!=0){
    result.x <- list(coord = A1, contrib= contrib.x, cos2 = cos2.x)
  }
  if (p2!=0) {
    result.levels <- list(coord = A2, contrib=contrib.cat, cos2 = cos2.cat)
    result.cat <- list(contrib = contrib.cat)
  }

  coef <- list()
  for (i in 1:ncomp){
    b <- V[,i]*M.col
    if (p1 > 0) b[1:p1] <-  b[1:p1]/rec$s[1:p1]
    b0 <- -sum(b*rec$g)
    coef[[i]] <- as.matrix(c(b0,b))
  }
  coef <- do.call(cbind, coef)[-1,]
  coefnames <- rownames(coef)
  coef <- gram.schmidt(coef)$Q
  A2rot <- NULL
  if (p2 >0) A2rot <- sweep(A2,1,sqrt(ns/n) ,"*")
  A <- rbind(A1,A2rot)
  colnames(A) <- colnames(coef) <- paste("PC",1:ncomp, sep = "")
  rownames(A) <- rownames(coef) <- coefnames
  Z <- rec$Z
  values <- apply(scores, 2, var)
  out <- list(call = cl,
              values = values,
              loadings = A,
              vectors = coef,
              scores = scores,
              x.loadings = A1,
              c.loadings = A2,
              ncomp = ncomp,
              cum.exp = cumsum(values)/sum(values),
              orthdist = .orthdist(W, coef),
              scoredist = .scoredist(scores, values)
  )

  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Mixed PCA"))
}





#' Kernel Principal Components Analysis
#'
#' @param x a data frame or matrix of numeric variables. can be NULL
#' if a kernel matrix is supplied directly to argument 'kern'.
#' @param ncomp specify the maximum number of components to be returned.
#' if there are fewer components with non-zero eigenvalues than the number
#' specified, then all components with non-zero eigenvalues will be returned.
#' at least two components will be returned regardless of what is specified here.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param kern either a kernlab compatible kernel function, i.e., 'laplacedot(1)',
#' or a function that generates square, symmetric, kernel matrix. a kernel matrix
#' can also be directly supplied if one has been created. centering is done internally so
#' it is not necessary to pre-center the kernel matrix. The default is the
#' radial basis function kernel with the scale parameter automatically determined.
#' Unless the default gives poor results, the rbf kernel with the automatic scale
#' parameter is safe to leave as-is.
#' @param ... additional arguments to supply if the argument 'kern' is given a
#' function that generates a matrix (ie, not a kernlab function). anything supplied
#' here will be passed to the function, so the argument must match one of the parameters
#' in the kernel.
#' @return a PrincipalComp object
#' @export
#'
pcaKern <- function(x = NULL, ncomp = NULL, scale = T, kern = RbfKernel, ...){

  # rbfdot(1/mean(L1median(as.matrix(dist(x)))$center))
  if (scale) x <- scale(x)
  if (is.function(kern)){
    if (!is.null(attr(kern,"package"))){
      if (attr(kern,"package")=="kernlab"){
        Kx <- kernlab::kernelMatrix(kern, x);
      }
    }
    else{
      Kx <- kern(x, ...)
    }
  }
  else if (is.matrix(kern) && isSymmetric(kern)){
    Kx <- kern;
  }
  else{
    return(cat(crayon::red("Please supply a kernlab compatible kernel function or a square, symmetric, kernel matrix")))
  }

  out <- cvreg:::kpcaCPP(Kx);
  out$values <- zapsmall(out$values, 8);
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1; out$scores <- out$scores * -1}
  if (is.null(ncomp)){
    ncomp <- max(2, ncol(x))
  }
  else{
    ncomp <- max(2, min(ncomp, sum(out$values > 0)))
  }
  pcs <- 1:ncomp
  out$values <- out$values[pcs];
  out$scores <- out$scores[,pcs];
  out$vectors <- out$vectors[,pcs];
  out$cum.exp <- NULL;
  out$scores <- out$scores[,pcs];
  out$loadings <- out$vectors %*% diag(out$values);
  if (!is.null(rownames(x))){rowNames<-rownames(x)}else{rowNames<-paste0("Row",1:nrow(x))}
  rownames(out$vectors) <- rownames(out$loadings) <- rowNames
  colnames(out$loadings) <- colnames(out$vectors) <- colnames(out$scores) <- paste0("KC",pcs)
  structure(out, class = "PrincipalComp", model = "Kernel PCA", rotation = "none")
}



#' Empirical Locality Preserving Projection
#'
#' @description This function implements a variant of the locality preserving projection method.
#' Ordinary locality preserving projections require user-defined parameters to construct a graph
#' for characterizing the local structure of the data. Here an empirical (data dependent) approach
#' is used to construct the graph. The only tuning parameter that requires input by the user is
#' the kernel precision parameter tau.
#'
#' @param x a data frame or matrix of numeric covariates
#' @param ncomp number of desired components
#' @param h kernel smoothing parameter. defaults to 1.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#'
#' @return a 'PrincipalComp' object.
#' @export
#'
#' @references
#' Yang, B. and Chen, S. (2010). Sample-dependent graph construction with application to dimensionality reduction. Neurocomputing, 74(1-3), pp. 301–314.
#'
empLPP <- function(x, ncomp=min(nrow(x)-1,ncol(x)), h = 1, scale = T){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }

  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  columnNames <- colnames(x); tau = 1/h
  x = as.matrix(x); n = nrow(x); p = ncol(x)
  center <- L1median(x)$center; x <- sweep(x,2,center)
  if (scale) x <- sweep(x,2,apply(x,2,function(i) sqrt(mean(i^2))),"/")
  D = cvreg:::minkowski_dist(x, 2)
  for (i in 1:n){sumvecD = sum(D[i,]); D[i,] = D[i,]/sumvecD}
  K = exp(-D/(2*(tau^2))); W = array(0,c(n,n))

  rowCenters = try(L1median(t(K))$center, silent = T)
  if (class(rowCenters)=="try.error")
    rowCenters = rowMeans(K)

  for (i in 1:n){
    for (j in 1:n){
      if (K[i,j] > (rowCenters[i])){
        W[i,j] = K[i,j]
      }
    }
  }

  Omega = 2*symm(W)
  Ds = diag(rowSums(W))+diag(colSums(W))
  L = crossprod(x,Ds-Omega)%*%x; R = crossprod(x,Ds)%*%x
  vectors = cvreg:::.adjLoadings(eigen(solve(R,L))$vectors)
  scores = x%*%vectors
  values = apply(scores, 2, var)
  loadings = vectors%*%diag(sqrt(values))
  ord = order(values, decreasing = TRUE)
  values = values[ord]; vectors = vectors[,ord];
  if (sum(sign(vectors[,1])) <= 0){vectors <- vectors * -1; loadings}


  loadings = loadings[,ord]; scores = scores[,ord]
  values = values[1:ncomp]; vectors = vectors[,1:ncomp]; loadings = loadings[,1:ncomp]; scores = scores[,1:ncomp]
  colnames(vectors) <- colnames(loadings) <- paste0("DIM", 1:ncomp); rownames(vectors) <- rownames(loadings) <- columnNames
  out <- list(values=values,vectors=vectors,loadings=loadings,scores=scores)
  return(structure(out, class = "PrincipalComp", model = "Sample Dependent LPP", rotation = "none"))
}




#' Factor analysis via Expectation Maximization
#'
#' @description This function fits an exploratory factor analysis model using expectation maximization using the
#' method of Bai & Li (2012) to estimate the maximum likelihood solution.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to extract.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-10.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#' @references
#' Bai, J. and Li, K. (2012). Statistical analysis of factor models of high dimension. The Annals of Statistics 40, 436-465.
#' @export
#'
facanal <- function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = GPArotation::Varimax, scale = T, max.iter = 4000, tol = 1e-10) {

  if (nfac==0){
    cat(crayon::red(("You need at least one factor... One factor will be returned.")))
  }

  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!")
    )))
  }

  Y <- as.matrix(Y)
  e.values <- eigen(cor(Y))$values
  if (is.null(nfac)){nfac <- sum(e.values > 0)}
  nfac <- min(nfac, .lederman(Y, e.values))
  p <- ncol(Y); n <- nrow(Y); if (scale) {Y <- scale(Y)} else {Y <- sweep(Y,2,colMeans(Y))}

  ## Use the singular value decomposition to create an initial fit.
  svd_init <- wsvd(Y, tol = 1e-6)
  if (sum(sign(svd_init$v[,1]))<=0){svd_init$v <- svd_init$v * -1; svd_init$u <- svd_init$u * -1}
  Gamma <- (svd_init$v %*% diag(svd_init$d, p, p) / sqrt(nrow(Y)))[, 1:nfac]
  scores <- as.matrix(sqrt(nrow(Y)) * svd_init$u[, 1:nfac])
  Sigma <- as.matrix(Y - tcrossprod(scores, Gamma)); Sigma <- apply(Sigma, 2, function(x) mean(x^2))
  Tau <- 1/Sigma; YVar <- apply(Y, 2, function(i) mean((i-mean(i))^2))
  GammaTilde <- sqrt(Tau) * Gamma
  M <- diag(nfac) + crossprod(GammaTilde , GammaTilde)
  eigM <- eigen(M, symmetric = TRUE)
  if (sum(sign(eigM$vectors[,1]))<=0){eigM$vectors <- eigM$vectors * -1}
  YSG <- Y %*% (Tau * Gamma)
  logdetY <- -sum(log(Tau)) + sum(log(eigM$values))
  B <- 1/sqrt(eigM$values) * tcrossprod(t(eigM$vectors), YSG)
  logtrY <- sum(Tau * YVar) - sum(B^2)/n
  logLik <- -logdetY - logtrY

  ## Initialize loop:
  is.converged <- FALSE; iter <- 0
  while (!is.converged && iter < max.iter) {
    ## Expectation step:
    varZ <- eigM$vectors %*% (1/eigM$values * t(eigM$vectors))
    EZ <- YSG %*% varZ
    EZZ <- n * varZ + crossprod(EZ)
    ## Maximization step:
    eigEZZ <- eigen(EZZ, symmetric = TRUE)
    if (sum(sign(eigEZZ$vectors[,1]))<=0){eigEZZ$vectors <- eigEZZ$vectors * -1}
    YEZ <- crossprod(Y, EZ)
    G <- sqrt(eigEZZ$values) * t(eigEZZ$vectors)
    Tau <- 1/(YVar - 2/n * rowSums(YEZ * Gamma) + 1/n * rowSums(tcrossprod(Gamma,G)^2))
    Gamma <- YEZ %*% eigEZZ$vectors %*% (1/eigEZZ$values * t(eigEZZ$vectors))
    GammaTilde <- sqrt(Tau) * Gamma
    M <- diag(nfac) + crossprod(GammaTilde, GammaTilde)
    eigM <- eigen(M, T)
    if (sum(sign(eigM$vectors[,1]))<=0){eigM$vectors <- eigM$vectors * -1}
    YSG <- Y %*% (Tau * Gamma)
    previous.logLik <- logLik
    logdetY <- -sum(log(Tau)) + sum(log(eigM$values))
    B <- 1/sqrt(eigM$values) * (t(eigM$vectors) %*% t(YSG))
    logtrY <- sum(Tau * YVar) - sum(B^2)/n
    logLik <- -logdetY - logtrY
    iter <- iter + 1
    if (abs(logLik - previous.logLik) < tol * abs(logLik)) {
      is.converged <- TRUE
    }
  }
  Gamma <- as.matrix(Gamma)
  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  uniquenesses <- 1/Tau
  communalities <- rowSums(Gamma^2)
  if (!is.null(rotate) && nfac > 1){
    if (!is.function(rotate)) {
      stop("please supply a function from the GPArotation package to the argument rotate")
    } else {
      rotated <- rotate(Gamma)
      rotation <- rotated$method
      Gamma <- rotated$loadings
      values <- diag(t(Gamma) %*% Gamma)
      ord <- order(values, decreasing = TRUE)
      Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
      values <- values[ord]
      if (rotated$orthogonal){
        communalities <- rowSums(Gamma^2)
        uniquenesses <- 1 - communalities
        Tau <- 1/uniquenesses
        Phi <- NULL
      } else if (!rotated$orthogonal) {
        Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
      }
      svd.H <- wsvd(crossprod(Gamma, (Tau * Gamma)))
      scores <- Y %*% (Tau * Gamma) %*% (svd.H$u %*% (1/svd.H$d * t(svd.H$v)))
    }
  } else{
    rotation <- "none"; Gamma <- as.matrix(Gamma); if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
    values <- diag(t(Gamma) %*% Gamma); svd.H <- wsvd(crossprod(Gamma, (Tau * Gamma))); Phi <- NULL
    scores <- as.matrix(Y %*% (Tau * Gamma) %*% (svd.H$u %*% (1/svd.H$d * t(svd.H$v))))
  }
  if (nfac > 1)
    vectors = Gamma %*% diag(1/sqrt(values))
  else
    vectors = as.matrix(Gamma * sqrt(1/values))

  colnames(Gamma) <- paste0("FA", 1:ncol(Gamma))
  rownames(Gamma) <- colnames(Y)
  colnames(scores) <- colnames(Gamma)
  names(communalities) <- names(uniquenesses) <- colnames(Y)
  total.var <- sum(communalities + uniquenesses)
  cum.exp <- cumsum(colSums(Gamma^2))/total.var
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  orthdist <- .orthdist(Y, Gamma)
  scoredist <- .scoredist(scores, values)
  out <- list(loadings = Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity, cum.exp = cum.exp, e.values = e.values, uniquenesses = uniquenesses, communalities = communalities, complexity = complexity, Phi = Phi , scores = scores, factors = nfac, orthdist = orthdist, scoredist = scoredist, rotation = rotation, n.obs = nrow(Y), niter = iter, converged = is.converged)
  if (exists("structure") && exists("pattern")){
    colnames(structure) <- colnames(pattern) <- colnames(Gamma)
    rownames(structure) <- rownames(pattern) <- rownames(Gamma)
    out$structure <- structure; out$pattern <- pattern
  }
  out$stats <- .FAfitstats(Gamma, Phi, cor(Y), Y)
  out <- structure(out, class = "facanal", model = "Factor Analysis (EM-Algorithm)")
  return(out)
}

#' Robust Factor Analysis
#'
#' @description This is similar to the factor model fitting method in the \code{\link{facanal}} function, but the variant of the EM
#' algorithm by Rubin & Thayer (1982) is implemented instead of the Bai & Li (2012) EM algorithm uesd in \code{\link{facanal}}. This
#' is due to more easily accomodating a change in objective function; here the median absolute deviation of the errors is used as the
#' criteria to be minimized. \cr
#' \cr
#' The algorithm is also initialized with an eigendecomposition of a robust correlation/covariance matrix. The minimum regularized
#' covariance determinant (\code{\link{cov.mrcd}}) will be used in the case of n < p. Otherwise, if p <= 10 the smoothed-hard-rejection
#' estimator (\code{\link{cov.shr}}) will be used, and if p > 10 Rocke's S-estimator will be used. \cr
#' \cr
#' In the case of Likert scale type data, \emph{univariate outliers} in the sense of a data point far from its mean are rarely an issue save
#' for data entry errors. This is due to the fact that likert scales take on values from a small set of integers. However, if the data
#' being analyzed are continuous measurements without hard bounds on the possible values the robust estimator will assist in ensuring
#' results are not compromised. However, \strong{\emph{multivariate outliers}}, which are unusual combinations of values across the rows of a
#' data matrix, could still potentially obscure the latent factor structure. If the data are suspected to be 'dirty', or one wishes to
#' validate the results of a standard factor fitting method, the robust factor model can be useful.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to extract.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-12.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#' @references
#' Rubin, D. B., and Thayer, D. T. (1982), EM Algorithms for ML Factor Analysis, Psychometrika, 47 (1), 69-76. \cr \cr
#' Bai, J. and Li, K. (2012). Statistical analysis of factor models of high dimension. The Annals of Statistics 40, 436-465.
#' @export
#'
robfa <-function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = GPArotation::Varimax, scale = T, max.iter = 4000, tol = 1e-12) {

  if (nfac==0){
    cat(crayon::red(("You need at least one factor... One factor will be returned.")))
  }


  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!"))))
  }

  Y <- as.matrix(Y); m = ncol(Y); n = nrow(Y)

  if (n < m){cov <- cov.mrcd(Y); rhoY <- cov2cor(cov$cov)}
  else{
    if (m > 10){cov <- cov.rocke(Y)}else{cov <- cov.shr(Y)}
    rhoY <- cov2cor(cov$cov)
  }

  Ymu <- cov$center
  YVar <- diag(cov$cov)
  Y <- sweep(Y, 2, Ymu); if (scale){Y <- sweep(Y, 2, sqrt(YVar), "/"); YVar <- apply(Y,2,function(i) mean(i^2))}
  svdY = eigen(rhoY); svdY$v <- svdY$vectors
  if (is.null(nfac)){nfac <- p-1}; nfac <- min(nfac, .lederman(Y,svdY$values));
  if (sum(sign(svdY$v[,1]))<=0){svdY$v <- svdY$v * -1}
  values = sqrt(svdY$values[1:nfac])
  evectors = as.matrix(svdY$v[, 1:nfac, drop = FALSE])
  Gamma = evectors %*% diag(values,nrow=nfac,ncol=nfac); eps=1e-8
  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  Psi = pmax(eps,as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[, 1]))
  crit <- Inf; iter <- 0; is.converged <- F; obj <- Psi
  while (!is.converged && iter < max.iter) {
    beta = Gamma/(sqrt(Psi)%*%t(rep(1,nfac)))
    svdBeta = wsvd(beta)
    theta = as.matrix(svdBeta$u)*(rep(1,m)%*%t(svdBeta$d/sqrt(1+svdBeta$d^2)))
    phi = 1/sqrt(Psi)
    phitheta = theta*(phi%*%t(rep(1,ncol(theta))))
    iS = diag(phi^2)-tcrossprod(phitheta)
    thetabeta = crossprod(theta,beta)
    theta2beta = tcrossprod(theta,t(thetabeta))
    aux = beta-theta2beta
    iSB = (phi%*%t(rep(1,ncol(theta))))*aux
    xiSB = Y %*% iSB
    Yz = crossprod(Y, xiSB/(n - 1))
    Czz = crossprod(iSB, Yz) + diag(nfac) - crossprod(Gamma, iSB)
    Gammanew=Yz %*% pseudoinverse(Czz)
    Psinew=as.vector(YVar - (Gammanew^2 %*% rep(1, nfac))[,1])
    # check for convergence
    if(iter>1){
      obj <- c(obj, mad(Psi-Psinew)^2)
      crit <- obj[iter-1] - obj[iter]
      is.converged <- crit <= tol
    }
    iter = iter+ 1
    Gamma = Gammanew
    Psi = Psinew
  }

  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  if (!is.null(rotate) && nfac > 1){
    if (!is.function(rotate)) {
      stop("please supply a function from the GPArotation package to the argument rotate")
    } else {
      rotated <- rotate(Gamma)
      rotation <- rotated$method
      GammaOld <- Gamma
      Gamma <- rotated$loadings
      values <- diag(crossprod(Gamma))
      ord <- order(values, decreasing = TRUE)
      Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
      values <- values[ord]
      if (rotated$orthogonal){
        Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1]); Phi <- NULL
      } else if (!rotated$orthogonal){
        Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
      }
      scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(mpd(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/")))))
    }
  } else{
    values <- diag(crossprod(Gamma))
    rotation <- "none"; Phi <- NULL
    scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(mpd(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/")))))
  }
  if (nfac > 1)
    vectors = Gamma %*% diag(1/sqrt(values))
  else
    vectors = Gamma * sqrt(1/values)

  colnames(Gamma) <- paste0("FA", 1:ncol(Gamma))
  rownames(Gamma) <- colnames(Y)
  colnames(scores) <- colnames(Gamma)
  uniquenesses <- as.vector(Psi)
  communalities <- 1-uniquenesses
  names(communalities) <- names(uniquenesses) <- colnames(Y)
  total.var <- sum(communalities + uniquenesses)
  cum.exp <- cumsum(colSums(Gamma^2))/total.var
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  orthdist <- .orthdist(Y, Gamma)
  scoredist <- .scoredist(scores, values)
  out = list(loadings = Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity, cum.exp = cum.exp, e.values = eigen(cov(Y))$values,
             uniquenesses = uniquenesses, communalities = communalities, complexity = complexity, scores = scores, Phi = Phi, orthdist = orthdist, scoredist = scoredist, factors = nfac, rotation = rotation,
             n.obs = nrow(Y), niter = iter, converged = is.converged)
  if (exists("structure") && exists("pattern")){
    colnames(structure) <- colnames(pattern) <- colnames(Gamma)
    rownames(structure) <- rownames(pattern) <- rownames(Gamma)
    out$structure <- structure; out$pattern <- pattern
  }
  out$stats <- .FAfitstats(Gamma, Phi, rhoY, Y)
  return(structure(out, class = "facanal", model = "Robust Factor Analysis (EM-Algorithm)"))
}


#' Alpha Factor Analysis
#'
#' @description Alpha factor analysis (AFA) is a method proposed by Kaiser and Caffrey (1965)
#' which is motivated by considering as a source of error that only a portion of possibly
#' relevant variables have been collected and submitted to the analysis. As such the
#' objective function is minimizing the L1 norm of communalities, rather than the L2
#' norm of uniquenesses. Furthermore, the eigenvalues of the latent variables are taken
#' as measures of the generalizability of the latent variables. \cr
#' \cr
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to attempt to extract.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param corr one of "pearson", "robust", or "spearman".
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param screen Kaiser and Caffrey suggested that only latent variables with eigenvalues greater
#' than 1 are retained. If TRUE, if the initial fit with the user-chosen number of factors fails
#' to satisfy the criterion the model is iteratively refit with a smaller number of factors until
#' all factor eigenvalues are greater than 1. However, such criteria have fallen out of favor
#' thus the default is FALSE.
#' @param tol a tolerance value for convergence. defaults to 1e-9.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#' @references
#' Kaiser, H. F., & Caffrey, J. (1965). Alpha factor analysis. Psychometrika, 30(1), 1–14. doi:10.1007/bf02289743 \cr \cr
#' Kaiser, H. F., & Derflinger, G. (1990). Some Contrasts Between Maximum Likelihood Factor Analysis and Alpha Factor Analysis. Applied Psychological Measurement, 14(1), 29–32. doi:10.1177/014662169001400103
#' @export
#'
alfa <- function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = Varimax, scale = T, screen = F, corr = c("pearson","robust","spearman"), max.iter = 4000, tol = 1e-24){

  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!"))))
  }
  corr <- match.arg(corr)
  columnNames <- colnames(Y)
  Y <- as.matrix(Y); n <- nrow(Y); p <- ncol(Y)

  if (corr=="robust"){
    if (n < p){
      cov <- cov.mrcd(Y); ycor <- rhomat <- cov2cor(cov$cov)
    }
    else{
      if (p > 10){cov <- cov.rocke(Y)} else{cov <- cov.shr(Y)}
      ycor <- rhomat <- cov2cor(mpd(cov$cov))
    }
    YVar <- diag(cov$cov); Ymu <- cov$center; Y <- sweep(Y, 2, Ymu)
    if (scale){Y <- sweep(Y, 2, sqrt(Yvar), "/"); YVar <- apply(Y,2,var)}
  }
  else if (corr=="spearman"){
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(apply(Y,2,rank), target = "adaptive"))
    }
    else{
      ycor <- rhomat <- mpd(cor(Y,method="s"))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
    if (scale){Y <- scale(Y); YVar <- rep(1, p)}else{Y <- sweep(Y, 2, Ymu)}
  }
  else {
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(Y, target = "adaptive"))
    }
    else{
      ycor <- rhomat <- cov2cor(mpd(cov(Y)))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
    if (scale){Y <- scale(Y); YVar <- rep(1, p)}else{Y <- sweep(Y, 2, Ymu)}
  }

  e.values <- eigen(rhomat, T, T)$values
  nfac <- min(nfac, .lederman(Y))

  if (n < p)  nfac <- max(sum(3, nfac - 2))
  .alphafit <- function(Y, rhomat, nfac, rotate, e.values, tol, max.iter, columnNames){
    rho <- rhomat
    H2 <- diag(rhomat)
    iter <- 0; err <- Inf
    while (err > tol && iter < max.iter) {
      rhomat <- cov2cor(rhomat)
      eig <- eigen(rhomat, T, F)
      if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
      if (nfac > 1){
        GammaTilde <- as.matrix(as.matrix(eig$vectors[,1:nfac]) %*% diag(sqrt(eig$values[1:nfac])))
      }
      else {
        GammaTilde <- as.matrix(eig$vectors[,1] * sqrt(eig$values[1]))
      }
      M <- tcrossprod(GammaTilde)
      newH2 <- H2*diag(M)
      err <- sum(abs(H2-newH2))
      rhomat <- rho
      diag(rhomat) <- newH2
      H2 <- newH2
      iter <- iter + 1
    }
    is.converged <- err <= tol
    if (sum(sign(GammaTilde[,1]))<=0){GammaTilde <- GammaTilde * -1}
    Gamma <- sqrt(H2) * GammaTilde
    if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
    eig <- sqrt(H2) * eig$values
    communalities <- rowSums(Gamma^2)
    uniquenesses <- Psi <- as.vector(YVar-(Gamma^2 %*% rep(1, nfac))[,1])
    values <- diag(crossprod(Gamma))
    if (nfac > 1){
      vectors <- Gamma %*% diag(1/sqrt(values))
    }
    else{
      vectors <- as.matrix(Gamma * sqrt(1/values[1]))
    }

    if (!is.null(rotate) && nfac > 1){
      if (!is.function(rotate)) {
        stop("please supply a function from the GPArotation package to the argument rotate")
      } else {
        rotated <- rotate(Gamma)
        rotation <- rotated$method
        GammaOld <- Gamma
        Gamma <- rotated$loadings
        values <- diag(crossprod(Gamma))
        ord <- order(values, decreasing = TRUE)
        Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
        values <- values[ord]
        if (rotated$orthogonal){
          communalities <- rowSums(Gamma^2); Phi <- NULL
          uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
        } else if (!rotated$orthogonal){
          Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
        }
        Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
        scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
      }
    }
    else{
      rotation <- "none"; Phi <- NULL
      scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/"), solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
    }
    complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
    fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
    colnames(Gamma) <- colnames(scores) <- paste0("FA", 1:ncol(Gamma))
    rownames(Gamma) <- names(uniquenesses) <- names(communalities) <- columnNames
    out <- list(loadings=Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity,
                uniquenesses = uniquenesses, communalities = communalities, complexity = complexity,
                Phi = Phi, scores = scores, factors = nfac, rotation = rotation, n.obs = nrow(Y), niter = iter,
                converged = is.converged)
    if (exists("structure") && exists("pattern")){
      colnames(structure) <- colnames(pattern) <- colnames(Gamma)
      rownames(structure) <- rownames(pattern) <- rownames(Gamma)
      out$structure <- structure; out$pattern <- pattern
    }
    return(out)
  }

  out <- suppressWarnings(.alphafit(Y, ycor, nfac, rotate, e.values, tol, max.iter, columnNames))
  if (screen && any(out$values < 1)){
    all1ormore <- FALSE
    while(!all1ormore && nfac != 1){
      nfac <- max(1, sum(out$values >= 1))
      out <- suppressWarnings(.alphafit(Y, ycor, nfac, rotate, e.values, tol, max.iter, columnNames))
      all1ormore <- all(out$values >= 1)
    }
  }

  out$orthdist <- .orthdist(Y, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$total.var <- sum(out$communalities + out$uniquenesses)
  out$cum.exp <- cumsum(colSums(out$loadings^2))/out$total.var
  out$stats <- .FAfitstats(out$loadings, out$Phi, rhomat, Y)
  return(structure(out, class = "facanal", model = "Alpha Factor Analysis"))
}



#' Principal Factors Analysis (Principal Axis Factoring)
#'
#' @description This implements the original factor analysis method, principal axis
#' factoring. The only augmentation besides rotation of the loadings is shrinkage can
#' be used to acheive a more numerically stable fit, particuarly under high dimensional
#' settings. When n < p it is switched on automatically.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to attempt to extract.
#' @param shrink if TRUE, the adaptive non-linear shrinkage method from the \code{\link{covShrink}}
#' function is used. It is used regardless of the setting here if n < p.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param corr one of "pearson", "robust", or "spearman".
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-12.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#'
#' @return a factanal object
#' @export
#'
pfa <- function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = Varimax, scale = T, corr = c("pearson","robust","spearman"), max.iter = 4000, tol = 1e-12){

  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!"))))
  }
  corr <- match.arg(corr)
  columnNames <- colnames(Y)
  Y <- as.matrix(Y)
  n <- nrow(Y); p <- ncol(Y)

  if (corr=="robust"){
    if (n < p){
      cov <- cov.mrcd(Y); ycor <- rhomat <- cov2cor(cov$cov)
    }
    else{
      if (p > 10){cov <- cov.rocke(Y)} else{cov <- cov.shr(Y)}
      ycor <- rhomat <- cov2cor(mpd(cov$cov))
    }
    Ymu <- cov$center; YVar <- diag(cov$cov)
  }
  else if (corr=="spearman"){
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(apply(Y,2,rank), target = "adaptive"))
    }
    else{
      ycor <- rhomat <- mpd(cor(Y,method="s"))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
  }
  else if (corr=="pearson"){
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(Y, target = "adaptive"))
    }
    else{
      ycor <- rhomat <- cov2cor(mpd(cov(Y)))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
  }
  Y <- sweep(Y, 2, Ymu)
  if (scale){Y <- sweep(Y, 2, sqrt(YVar), "/"); Yvar <- apply(Y, 2, var)}
  e.values <- eigen(rhomat, T, T)$values
  nfac <- min(nfac, .lederman(Y))
  rsse <- Inf; iter <- 0; h.old <- 1e100
  while (rsse > tol && iter <= max.iter) {
    eig <- eigen(rhomat, T)
    if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
    if (nfac > 1) {
      Gamma <- eig$vectors[,1:nfac] %*% diag(sqrt(eig$values[1:nfac]))
    }
    else {
      Gamma <- as.matrix(eig$vectors[,1] * sqrt(eig$values[1]))
    }
    M <- tcrossprod(Gamma)
    h.new <- diag(M)
    diag(rhomat) <- h.new
    rsse <- sqrt(sum((h.old - h.new)^2))
    h.old <- h.new
    iter <- iter + 1
  }
  values <- eig$values[1:nfac]
  is.converged <- rsse <= tol
  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  communalities <- rowSums(Gamma^2)
  uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
  if (nfac > 1){
    vectors <- Gamma %*% diag(1/sqrt(values))
  }
  else{
    vectors <- as.matrix(Gamma * 1/sqrt(values))
  }

  if (!is.null(rotate) && nfac > 1){
    if (!is.function(rotate)) {
      stop("please supply a function from the GPArotation package to the argument rotate")
    } else {
      rotated <- rotate(Gamma)
      rotation <- rotated$method
      GammaOld <- Gamma
      Gamma <- rotated$loadings
      values <- diag(crossprod(Gamma))
      ord <- order(values, decreasing = TRUE)
      Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
      values <- values[ord]
      if (rotated$orthogonal){
        communalities <- rowSums(Gamma^2); Phi <- NULL
        uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
      } else if (!rotated$orthogonal){
        Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
        communalities <- rowSums(Gamma^2); uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
      }
      Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
      scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
    }
  }
  else{
    rotation <- "none"; Phi <- NULL; Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
    scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
  }
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  colnames(Gamma) <- colnames(scores) <- paste0("FA", 1:ncol(Gamma))
  rownames(Gamma) <- names(uniquenesses) <- names(communalities) <- columnNames
  out <- list(loadings=Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity, uniquenesses = uniquenesses, communalities = communalities,
              complexity = complexity, Phi = Phi, scores = scores, factors = nfac, rotation = rotation, n.obs = nrow(Y), niter = iter,
              converged = is.converged)

  out$orthdist <- .orthdist(Y, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$total.var <- sum(out$communalities + out$uniquenesses)
  out$cum.exp <- cumsum(colSums(out$loadings^2))/out$total.var
  if (exists("structure") && exists("pattern")){
    colnames(structure) <- colnames(pattern) <- colnames(Gamma)
    rownames(structure) <- rownames(pattern) <- rownames(Gamma)
    out$structure <- structure; out$pattern <- pattern
  }
  out$stats <- .FAfitstats(Gamma, Phi, ycor, Y)
  return(structure(out, class = "facanal", model = "Principal Axis Factoring"))
}

#' Independent Factors Analysis
#'
#' @description Independent Factors Analysis (IFA) is an extension and synthesis of the factor analysis
#' and independent components analysis methods. Note that rotation is not implemented in this model, as
#' the IFA solution is not invariant under orthogonal rotations. Any orthogonal rotation of independent
#' variables will result in correlated ones, breaking the independence of the latent variables extracted
#' by an independent components or independent factors analysis. Uncorrelatedness is a neccessary, but not
#' sufficient condition for independence. Independence is a stronger condition: a set of random variables
#' has the property of independence when the joint probability density function of the set is the product
#' of each individual variable's probability density function, i.e. \emph{\eqn{f(x_1, x_2, ..., x_p) =
#' \sum f(x_p)}}. However, the property of being uncorrelated only means that the linear relationship between
#' a set of variables is null, which obviously leaves open other types of relationships. Likewise orthogonality
#' only implies linear independence. While independent components/factors are orthogonal to one another,
#' the basis vectors are \emph{not} neccessarily orthogonal. This is unlike PCA, where the basis vectors
#' are orthogonal to each other. Therefore, orthogonal rotation of the basis vectors of ICA/IFA will not
#' be guaranteed to preserve orthogonality of the components, as the orthogonal rotation only guarantees
#' \emph{linear} independence.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to extract.
#' @param ic an integer vector of length \emph{nfac} detailing how many independent components comprise each factor.
#' for example, c(4,3,3,1). the default is NULL, which yields 2 independent components per factor.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-4.
#' @param max.iter maximum number of iterations. defaults to 1000.
#'
#' @return an object of class "facanal"
#' @export
#' @references
#' Attias, H. (1999). Independent factor analysis, Neural Comput. 11(4), 803–851. \cr \cr
#' Montanari, A. & Viroli, C. (2010). The independent factor analysis approach to latent variable modelling, Statistics, 44:4, 397-416, doi:10.1080/02331880903189125

ifa <-function(Y, nfac = max(nrow(Y)-2, ncol(Y)), ic = NULL, scale=TRUE, max.iter = 1000, tol=1e-4){

  columnNames <- colnames(Y)
  if (is.null(nfac)){nfac <- .lederman(Y)}

  if (is.null(ic)){
    ic <- c(2, rep(1, nfac-1))
  }
  else if (length(ic) < nfac){
    ic <- c(ic, rep(1, nfac-length(ic)))[1:nfac]
  }
  else if (length(ic) > nfac){
    ic <- ic[1:nfac]
  }
  else {
    ic <- ic
  }
  n<-nrow(Y); p<-ncol(Y); ymu<- colMeans(Y)
  if (scale) Y<-scale(Y) else Y <- sweep(Y,2,ymu)
  ic<-as.matrix(ic)
  lik <- -100000
  totalICs<-prod(ic)
  maxIC<-max(ic)
  nfac <- min(nfac, .lederman(Y));
  output.init<-.ifa_init(Y,nfac,scale)
  Psi<-output.init$Psi
  Psi<-diag(diag(Psi))
  Gamma<-output.init$Gamma
  w<-matrix(0,nfac,maxIC)
  ic.means<-matrix(0,nfac,maxIC)
  ic.var<-matrix(0,nfac,maxIC)

  for (i in 1:nfac){
    for (j in 1:ic[i]){
      w[i,j] <- 1
      ic.means[i,j] <- if(j==1){0}else if(j==2){-1}else{ (0.25 + abs(ic.means[i,j-1])) * -1 * sign(ic.means[i,j-1])}
      ic.var[i,j]  <- 1
    }
  }
  w<-w/rowSums(w)


  out<-.ifa_fit(Y,n,nfac,p,ic,totalICs,maxIC,max.iter,Gamma,w,ic.means,ic.var,tol,Psi,lik)

  likelihood<-out$likelihood
  sigma<-out$sigma
  pqy<-out$pqy
  Gamma<-out$Gamma
  values <- colSums(Gamma^2)
  vectors <- Gamma %*% diag(1/sqrt(values))
  w<-out$w
  ic.means<-out$ic.means
  ic.var<-out$ic.var
  Psi<-out$Psi
  uniquenesses <- Psi<-diag(Psi)
  communalities <- 1 - uniquenesses
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  likelihood<-matrix(likelihood[!likelihood==0])
  scores <- Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , pseudoinverse(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/")))); scores <- scores %*% matpower(cov(scores), -0.5)
  orthdist <- .orthdist(Y, Gamma)
  scoredist <- .scoredist(scores, values)
  total.var <- sum(communalities + uniquenesses)
  cum.exp <- cumsum(colSums(Gamma^2))/total.var
  stats <- .FAfitstats(Gamma, NULL, cor(Y), Y)
  rotation <- "none"; Phi <- NULL
  colnames(Gamma) <- colnames(vectors) <- colnames(scores) <- paste0("FA", 1:ncol(Gamma))
  rownames(vectors) <- rownames(Gamma) <- names(uniquenesses) <- names(communalities) <- columnNames

  ic.info <- list(totalICs = totalICs, ic = ic, sigma = sigma, pqy = pqy, w = w, ic.means=ic.means,ic.var=ic.var)

  output<-list(loadings=Gamma,vectors=vectors,values=values, scores=scores, orthdist = orthdist, scoredist = scoredist,
               uniquenesses=uniquenesses,communalities=communalities,complexity=complexity, fac.complexity = fac.complexity,
               rotation = rotation, Phi = Phi, factors=nfac, n.obs=n, ic.info = ic.info, stats = stats)

  return(structure(output, class = "facanal", model = "Independent Factor Analysis"))
}


#' print method for PrincipalComp objects
#'
#' @param fit model fit
#' @param tol absolute loadings smaller than this value will be ommitted from the display. defaults to 1e-4.
#' @export
#' @method print PrincipalComp
#' @keywords internal
print.PrincipalComp <- function(fit, tol = 1e-4){

  loadings <- zapsmall(fit$loadings, 5)
  rownames(loadings) <- abbreviate(rownames(loadings), minlength = 7, method = "both.sides", named = F)
  loadings[which(abs(loadings) < tol)] <- 0
  loadings <- round(loadings, 3)
  model <- attr(fit, "model")

  if (model=="PCA" || model=="L1 PCA" || model=="R1 PCA" || model == "Robust PCA" || model == "Spherical PCA" || model == "Probablistic PCA" || model == "Mixed PCA"){
    cat(.coral("\nEigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.hotpink("Cumulative Variance Explained : \n\n"), round(fit$cum.exp, 4), "\n\n")
    cat(.rosered("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.coral2("Fitting Method: "), model, "\n")
  }
  else if (model=="Sparse PCA" || model == "Robust Sparse PCA"){
    cat(.newblack("\nEigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.bluez("Cumulative Variance Explained : \n\n"), round(fit$cum.exp, 3), "\n\n")
    cat(.bluez2("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.nurple("Fitting Method: "), model, "\n")
  }
  else if (model=="Local PCA"){
    cat(.lightgreen("\nEigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.nurple("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.seamist("Fitting Method: "), model, "\n")
  }
  else if (model=="Curvilinear PCA"){
    cat(.lightgreen("\nApprox. Eigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.bluez("Approx. Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.nurple("Fitting Method: "), model, "\n")
  }
  else if (model=="Kernel PCA"){
    cat(.nurple("\n Eigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.purple("Fitting Method: "), model, "\n")
  }
  else if (model=="Sample Dependent LPP"){
    cat(.nurple("\n Eigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.purple("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.pipple("Fitting Method: "), model, "\n")
  }
}


#' print method for facanal objects
#'
#' @param fit model fit
#' @param tol absolute loadings smaller than this value will be ommitted from the display. defaults to 0.075.
#' @export
#' @method print facanal
#' @keywords internal
print.facanal <- function(fit, tol = 0.075){
  model <- attr(fit, "model")
  values <- fit$values
  e.values <- fit$e.values
  communalities <- round(unname(fit$communalities), 3)
  uniquenesses <-  1 - communalities
  complexity <- round(fit$complexity, 2)
  fac.complexity <- fit$fac.complexity
  names(fac.complexity) <- names(values) <- paste0("FA", 1:length(values))
  names(complexity) <- names(communalities) <-  names(uniquenesses) <- abbreviate(rownames(fit$loadings),
                                    minlength = 7, method = "both.sides", named = F)
  cat(.coral("\nEigenvalues of Factor Solution: \n"))
  print(noquote(format(values, digits = 2, nsmall = 2)))
  cat(.coral2("\nFactor Complexities: \n"))
  print(noquote(format(fac.complexity, digits = 2, nsmall = 2)))
  cat(.hotpink("\nLoadings, Uniquenesses, & Communalities: \n"))
  loadings <- zapsmall(fit$loadings, 4)
  rownames(loadings) <- abbreviate(rownames(loadings), minlength = 7, method = "both.sides", named = F)
  loadings[which(abs(loadings) < tol)] <- 0
  loadings <- round(loadings, 3)
  newcolnames <- c(colnames(loadings), "", "c2", "u2", "k")
  loadings <- cbind(loadings, rep(0, length(complexity)),  communalities, uniquenesses, complexity)
  colnames(loadings) <- newcolnames
  Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
  cat("\n")
  if (any(communalities > 1)){
    cat(crayon::bold(.crimson("An ultra Heywood case was detected. Factor solution is invalid. \n")))
  }
  if (fit$stats$KMO <= 0.50){
    cat(crayon::bold(.red2("The Kaiser-Meyer-Olkin index indicates data lacks sufficient structure for legitimate factors to be recovered.  \n")))
  }
  cat(.nurple2("Fitting Method: "), model, .nurple(" Rotation Method: "), fit$rotation, "\n")
  cat(.pipple2("Bentler-Bonett Normed Fit Index: "), fit$stats$NFI, .pipple2("KMO Sampling Adequacy Index: "), round(fit$stats$KMO,3), "\n")
  cat(.pipple(paste0("Chi-Squared (", fit$stats$dof, " df) : ")), fit$stats$chisq, .pipple(" BIC:"), fit$stats$BIC, .pipple(" Evidence Ratio:"), Rmpfr::formatDec(Rmpfr::mpfr(1/Rmpfr::mpfr(exp(-0.5*(fit$stats$null.BIC-fit$stats$BIC)),500),500),precBits=15,digits=5), "\n")
}



#' Promax Factor Rotation (GPArotation format)
#'
#' @description This implements the promax rotation with output consistent with the naming
#' conventions of the GPArotation package. Promax rotation is a factor rotation method which
#' seeks a solution that is as orthogonal as possible, but allows obliqueness rather than
#' forcing an orthogonal solution.
#'
#' @param L A factor loading matrix
#' @param m The power used for promax. Values of 2 to 6 are recommended. Defaults to 4.
#'
#' @return A list (which includes elements used by facanal) with: \cr
#' \describe{
#' \item{loadings}{The new loadings obtained by L \%*\% Th.}
#' \item{Th}{The rotation matrix.}
#' \item{method}{A string indicating the rotation objective function.}
#' \item{orthogonal}{A logical indicating if the rotation is orthogonal. Is FALSE for Promax.}
#' \item{Phi}{t(Th) \%*\% Th. The covariance matrix of the rotated factors.}
#' }
#' @export
#' @references
#' Hendrickson, A. E. and White, P. O. (1964). Promax: a quick method for rotation to orthogonal oblique structure. British Journal of Statistical Psychology, 17, 65–70. doi: 10.1111/j.2044-8317.1964.tb00244.x.
#'
Promax <- function (L, m = 4){
  if (missing(m))
    m <- 4
  if (!is.matrix(L) & !is.data.frame(L)) {
    if (!is.null(L$loadings))
      L <- as.matrix(L$loadings)
  }
  else {
    L <- L
  }
  if (ncol(L) < 2)
    return(L)
  dn <- dimnames(L)
  xx <- GPArotation::Varimax(L, normalize = F, eps = 1e-4, maxit = 5000)
  L <- xx$loadings
  Q <- L * abs(L)^(m - 1)
  U <- lm.fit(L, Q)$coefficients
  d <- try(diag(pseudoinverse(crossprod(U))), silent = TRUE)
  if (class(d)=="try.error")
    d <- try(diag(cvreg:::.ridgeinv(crossprod(U))), silent = TRUE)
  U <- U %*% diag(sqrt(d))
  dimnames(U) <- NULL
  z <- L %*% U
  U <- xx$Th %*% U
  ui <- pseudoinverse(U)
  Phi <- tcrossprod(ui)
  dimnames(z) <- dn
  class(z) <- "loadings"
  result <- structure(list(loadings = z, Th = U, Phi = Phi, orthogonal = FALSE, method = "Promax"),class="GPArotation")
  return(result)
}



#' Kaiser Meyer Olkin Sampling Adequacy Index
#'
#' @param x a data frame or matrix of numeric variables
#' @param rho a custom correlation or covariance matrix if one is so desired
#' (ie one of the robust estimators). if left as NULL the pearson correlation
#' matrix is calculated.
#'
#' @return a list containing the measure of sampling adequacy for each variable, the KMO
#' index for overall adequacy, and the Lederman bound which defines the maximum number of
#' latent variables that can be extracted.
#' @export
#'
kmo <- function(x, rho=NULL){
  if (is.null(rho))
    cormat  <- cor(x)
  else
    cormat <- cov2cor(rho)
  pcormat <- cov2pcor(mpd(cormat))
  n <- nrow(x); p <- ncol(x)
  seq_p <- seq_len(p)
  MSA <- unlist(lapply(seq_p, function(i){sum((cormat[i,-i])^2) / ( sum((cormat[i,-i])^2) + sum((pcormat[i,-i])^2))}))
  names(MSA) <- colnames(x)
  KMO <- sum((cormat[!diag(p)])^2) / ( sum((cormat[!diag(p)])^2) + sum((pcormat[!diag(p)])^2) )
  eig <- eigen(cormat)$values
  res <- list("MSA" = MSA, "KMO" = KMO, "LedermanBound" = floor(.lederman(x,eig)))
  return(res)
}


#' Estimate the number of latent factors for exploratory factor analysis
#'
#' @description This implements the estimator of the number of latent variables for exploratory factor analysis
#' derived by Onatski (2010) from random matrix theory.  The number of factors bounded from above using Lederman's
#' boundary, which is the maximum number of factors possible to derive from a given data set.
#'
#' @param Y a matrix or data frame of numeric covariates
#' @param max.fac maximum number of factors (if larger than the Lederman boundary this will be ignored)
#' @param max.iter maximum number of iterations. defaults to 10.
#' @param scale should the data be scaled first? defaults to TRUE.
#'
#' @return a list containing a numeric value and a vector of the eigenvalue differences.
#' @export
#'
#' @references
#' Onatski, A. (2010). Determining the Number of Factors from Empirical Distribution of Eigenvalues. Review of Economics and Statistics, 92(4), 1004–1016. doi:10.1162/rest_a_00043
#'
nfac.est <- function(Y, max.fac = NULL, max.iter = 10, scale = TRUE) {

  Y <- as.matrix(Y)
  n <- nrow(Y); p <- ncol(Y)
  if (scale)
    Y <- scale(Y)
  else
    Y <- sweep(Y, 2, colMeans(Y))

  ev <- wsvd(Y)$d^2 / (n-1)
  lederman.bound <- .lederman(Y, ev)
  if (is.null(max.fac))
    max.fac <- lederman.bound
  else
    max.fac <- min(c(lederman.bound, max.fac))

  j <- max.fac + 1
  diffs <- ev - c(ev[-1], 0)

  for (i in 1:max.iter) {
    y <- ev[j:(j+4)]
    x <- ((j-1):(j+3))^(0.666)
    lm.coef <- lm(y ~ x)
    delta <- 2 * abs(lm.coef$coef[2])
    idx <- which(diffs[1:rmax] > delta)
    if (length(idx) == 0)
      hatr <- 0
    else hatr <- max(idx)

    newj = hatr + 1
    if (newj == j) break
    j = newj
  }
  return(list(est.facs = hatr, diffs = diffs))
}




.distgraph <- function(data, p = 0.25, Lp=1.4){
  D = cvreg:::minkowski_dist(as.matrix(data), Lp)
  ndata = nrow(D)
  nnpar = max(as.double(p*ndata,1))
  nnpar = as.integer(min((ceiling(nnpar)),ndata))
  outMask = array(0,c(ndata,ndata))
  for (i in 1:ndata){
    tgt = D[i,]
    idx = as.vector(which(tgt<=max((sort(tgt,decreasing=FALSE))[1:min((nnpar+1),ndata)]),arr.ind=T))
    outMask[i,idx] = 1
  }
  for (i in 1:ndata){
    outMask[i,i] = 0
  }
  outMask = matrix(as.logical(as.integer(outMask)),nrow=ndata,ncol=ndata)
  outMask = matrix(as.logical(outMask * t(outMask)),nrow=ndata,ncol=ndata)
  outD1 = outMask * D
  outD2 = (!outMask) * array(-Inf,c(ndata,ndata))
  outD2[which(is.na(outD2))] = 0
  outD = outD1 + outD2
  for (i in 1:ndata){
    outD[i,i] = 0
  }
  result = list()
  result$mask = outMask
  result$dist = outD
  return(result)
}

## For adjusting Laplacian eigenvalues
.adjEigLaplace <- function(eigL){
  if(min(eigL$values)<=0){
    minpos <- min(eigL$values[-which(eigL$values<=0)])
    new <- minpos * rev(seq(0.98, 0.99, len = sum(eigL$values<=0)))
    eigL$values[which(eigL$values<=0)]<-new;
    eigL$values <- eigL$values + 1e-16
    eigL$values<-sort(eigL$values,decreasing=T)
    return(eigL)
  }
  else{
    return(eigL)
  }
}


.adjLoadings <- function(P){
  n = nrow(P); p = ncol(P)
  output = array(0,c(n,p))
  for (i in 1:p){
    cvec = as.vector(P[,i])
    output[,i] = cvec/sqrt(sum(cvec*cvec))
  }
  return(output)
}


.biScalePCA <- function (u, delta = 0.5, c = 1.547645, maxit = 100, tol = 1e-04){
  s0 <- median(abs(u))
  if (s0 == 0){s0 <- mean(abs(u))/0.7978846}
  else{s0 <- s0/0.6744898}
  err <- tol + 1
  it <- 0
  while ((err > tol) && (it < maxit)) {
    it <- it + 1
    s1 <- sqrt(s0^2 * mean(rho.Bisquare(u/s0, c = c))/delta)
    err <- abs(s1 - s0)/s0
    s0 <- s1
  }
  return(s0)
}


.gsvd = function (X, row.w = NULL, col.w = NULL,ncp=Inf) {
  tryCatch.W.E <- function(expr){
    W <- NULL
    w.handler <- function(w){
      W <<- w
      invokeRestart("muffleWarning")
    }
    list(value = withCallingHandlers(tryCatch(expr, error = function(e) e),
                                     warning = w.handler),
         warning = W)
  }


  if (is.null(row.w)) row.w <- rep(1/nrow(X), nrow(X))
  if (is.null(col.w)) col.w <- rep(1, ncol(X))
  ncp <- min(ncp,nrow(X)-1,ncol(X))
  row.w <- row.w / sum(row.w)
  X <- t(t(X)*sqrt(col.w))*sqrt(row.w)
  if (ncol(X)<nrow(X)){
    svd.usl <- tryCatch.W.E(svd(X,nu=ncp,nv=ncp))$val
    if (names(svd.usl)[[1]]=="message"){
      svd.usl <- tryCatch.W.E(svd(t(X),nu=ncp,nv=ncp))$val
      if (names(svd.usl)[[1]]=="d"){
        aux <- svd.usl$u
        svd.usl$u <- svd.usl$v
        svd.usl$v <- aux
      } else{
        bb <- eigen(crossprod(X),symmetric=TRUE)
        svd.usl <- vector(mode = "list", length = 3)
        svd.usl$d[svd.usl$d<0]=0
        svd.usl$d <- sqrt(svd.usl$d)
        svd.usl$v <- bb$vec[,1:ncp]
        svd.usl$u <- t(t(crossprod(t(X),svd.usl$v))/svd.usl$d[1:ncp])
      }
    }
    U <- svd.usl$u
    V <- svd.usl$v
    if (ncp >1){
      mult <- sign(as.vector(crossprod(rep(1,nrow(V)),as.matrix(V))))
      mult[mult==0] <- 1
      U <- t(t(U)*mult)
      V <- t(t(V)*mult)
    }
    U <- U/sqrt(row.w)
    V <- V/sqrt(col.w)
  }
  else{
    svd.usl <- tryCatch.W.E(svd(t(X),nu=ncp,nv=ncp))$val
    if (names(svd.usl)[[1]]=="message"){
      svd.usl <- tryCatch.W.E(svd(X,nu=ncp,nv=ncp))$val
      if (names(svd.usl)[[1]]=="d"){
        aux <- svd.usl$u
        svd.usl$u <- svd.usl$v
        svd.usl$v <- aux
      } else{
        bb <- eigen(crossprod(t(X),t(X)),symmetric=TRUE)
        svd.usl <- vector(mode = "list", length = 3)
        svd.usl$d[svd.usl$d<0]=0
        svd.usl$d <- sqrt(svd.usl$d)
        svd.usl$v <- bb$vec[,1:ncp]
        svd.usl$u <- t(t(crossprod(X,svd.usl$v))/svd.usl$d[1:ncp])
      }
    }
    U <-  svd.usl$v
    V <- svd.usl$u
    mult <- sign(as.vector(crossprod(rep(1,nrow(V)),as.matrix(V))))
    mult[mult==0] <- 1
    V <- t(t(V)*mult)/sqrt(col.w)
    U <- t(t(U)*mult)/sqrt(row.w)
  }
  vs <- svd.usl$d[1:min(ncol(X),nrow(X)-1)]
  num <- which(vs[1:ncp]<1e-15)
  if (length(num)==1){
    U[,num] <- U[,num,drop=FALSE]*vs[num]
    V[,num] <- V[,num,drop=FALSE]*vs[num]
  }
  if (length(num)>1){
    U[,num] <- t(t(U[,num])*vs[num])
    V[,num] <- t(t(V[,num])*vs[num])
  }
  res <- list(vs = vs, U = U, V = V)
  return(res)
}

.recodeData <- function(x,cat,rename.level=FALSE){
  G <- NULL
  Gcod <- NULL
  nbcat <- NULL
  if (!is.null(x)){
    if (is.factor(x))
      stop("All variables in x must be numeric",call. = FALSE)
    if (is.numeric(x))
      x <- data.frame(x)
    for (v in 1:ncol(x)) {
      if (!is.numeric(x[, v]))
        stop("All variables in x must be numeric",call. = FALSE)
    }
    n1 <- nrow(x)
    p1 <- ncol(x)

    recodqt <- .recodecat(x)
    Z1 <- recodqt$Z
    g1 <- recodqt$g
    s1 <- recodqt$s
    Y1 <- recodqt$Xcod
  }
  if (!is.null(cat)){
    if (is.numeric(cat))
      stop("All variables in cat must be categorical",call.=FALSE)
    if (is.factor(cat))
      cat <- data.frame(cat)
    for (v in 1:ncol(cat)) {
      if (is.numeric(cat[, v]))
        stop("All variables in cat must be categorical",call.=FALSE) }
    vect.all.levels<-as.character(unlist(apply(cat,2,unique)))
    vect.all.levels.unique<-unique(vect.all.levels)
    test.name.categ<-length(vect.all.levels)==length(vect.all.levels.unique)
    if(test.name.categ==FALSE && rename.level==FALSE)
      stop("Some categorical variables have same names of categories,
             rename categories or use the option rename.level=TRUE to rename it automatically",call.=FALSE)
    for (v in 1:ncol(cat)) cat[,v] <- factor(as.character(cat[,v]))
    n2 <- nrow(cat)
    p2 <- ncol(cat)
    G <- .recodex(cat,rename.level)
    g2 <- apply(G,2,mean)
    ns <- apply(G,2,sum)
    s2 <- sqrt(ns/nrow(G))

    Gcod <- sweep(G,MARGIN=2,STATS=s2,FUN="/")
    moy<-apply(Gcod,2,mean)
    Z2 <- sweep(Gcod,MARGIN=2,STATS=moy,FUN="-")
    G.cent<-sweep(G,MARGIN=2,STATS=ns/nrow(G),FUN="-")
    nb.cat <- function(cat) {
      cat <- as.factor(cat)
      length(levels(cat))
    }
    nbcat <- apply(cat,2, nb.cat)
    indexj2<-NULL
    for (j in 1:ncol(cat)) {
      indexj2 <- c(indexj2,rep(j,nbcat[j]))}
  }
  if (!is.null(x)&& !is.null(cat))
    if (n1==n2) {
      n <- n1
      p <- p1+p2
      Z <- cbind(Z1,Z2)
      Y <- cbind(Y1,G)
      W <- cbind(Z1,G.cent)
      g <- c(g1,g2)
      s <- c(s1,s2)
      X <- cbind.data.frame(x,cat)
      indexj <- c(1:p1,indexj2+p1)
    } else
      stop("number of objects in x and cat must be the same",call.=FALSE)
  if (!is.null(x)&& is.null(cat)) {
    n <- n1
    p <- p1
    p2 <- 0
    Z <- Z1
    Y <- Y1
    W <- Z1
    g <- g1
    s <- s1
    X <- x
    indexj <- 1:p1
  }
  if (is.null(x)&& !is.null(cat)){
    n <- n2
    p <- p2
    p1 <- 0
    Z <- Z2
    Y <- G
    W <- G.cent
    g <- g2
    s <- s2
    X <- cat
    indexj <- indexj2
  }
  if (is.null(x)&& is.null(cat))
    stop("A data matrix must be given",call.=FALSE)

  if (is.null(colnames(X)))
    colnames(X) <- paste("V", 1:ncol(X), sep = "")
  for (j in 1:ncol(X)) if (colnames(X)[j] == "")
    colnames(X)[j] <- paste("V", j, sep = "")

  return(list(X=X,Y=Y,Z=Z,W=W,n=n,p=p,p1=p1,p2=p2,g=g,s=s,indexj=indexj,
              G=G,Gcod=Gcod,x=x,cat=cat,nbcat=nbcat))
}


.recodecat <- function(X){
    X <- as.matrix(X)
    missing.mean <-
      function(C1){
        moy <- mean(C1,na.rm=T)
        ind <- which(is.na(C1)==T)
        if(length(ind)>=1){C1[ind]<-moy
        }
        return(C1)
      }
    if (nrow(X)==1){
      Xcod <- X
      Z <- NULL
      mean.Xcod <- X
      sd.Xcod <- NULL
    }
    else{
      Xcod <- apply(X,2,missing.mean)
      red <- sqrt((nrow(X)-1)/nrow(X))
      sd.Xcod <- apply(Xcod,2,sd)*red
      mean.Xcod <- apply(Xcod,2,mean)
      Z<- scale(Xcod,scale=sd.Xcod)
      apply(Z,1,function(x) sum(is.na(x)))
      if (sum(is.na(Z))!= 0) stop("There are columns in 'x' where all the values are identical",call.=FALSE)
    }
    return(list(Z=Z,g=mean.Xcod,s=sd.Xcod,Xcod=Xcod))
  }

.recodex <-function(X,rename.level=FALSE){
  #X <- as.matrix(X)
  GNA <- .tabdisna(X,rename.level)
  G <- replace(GNA,is.na(GNA),0)
  n <- nrow(GNA)
  if (n > 1){
    ns <- apply(G,2,sum)
    nmiss <- apply((apply(GNA,2,is.na)),2,sum)
    if(sum((n-nmiss)==ns)!=0) stop("There are columns in 'cat' where all the categories are identical",call.=FALSE)
  }
  return(G)
}


.tabdisna <- function (tab,rename.level=FALSE) {
  tab <- as.data.frame(tab)
  .mdis <- function(i) {
    cat <- tab[, i]
    nom <- names(tab)[i]
    n <- length(cat)
    cat <- as.factor(cat)
    x <- matrix(0, n, length(levels(cat)))
    ind<-(1:n) + n * (unclass(cat) - 1)
    indNA<-which(is.na(ind))

    x[(1:n) + n * (unclass(cat) - 1)] <- 1
    x[indNA,]<-NA
    if (rename.level==TRUE){
      dimnames(x) <- list(row.names(tab), paste(nom, levels(cat), sep = "="))
    }    else{
      dimnames(x) <- list(row.names(tab), levels(cat))
    }
    return(x)
  }
  if (ncol(tab) == 1)
    res <- .mdis(1)
  else {
    res <- lapply(1:ncol(tab), .mdis)
    res <- as.matrix(data.frame(res, check.names = FALSE))
  }
  return(res)
}

.orthdist <- function(xc, loadings){
  orthDist<-xc-xc%*%loadings%*%t(loadings)
  return(sqrt(rowSums(orthDist*orthDist)))
}

.scoredist <- function(scores, values){
  scores <- as.matrix(scores)
  if (length(values) > 1)
    val <- try(sqrt(mahalanobis_dist(scores, colMeans(scores), diag(values))), silent =T)
  else if (length(values)==1)
    val <- try(sqrt(mahalanobis_dist(scores, mean(scores), values)), silent =T)
  if (class(val) != "try.error") return(val) else return(rep(NA, NROW(scores)))
}


.lederman <- function(x,evals=NULL){
  if (is.null(evals))
    p <- sum(svd(x)$d > 1e-4)
  else
    p <- sum(evals > 1e-4)

  floor( 0.5 * ((2*p) + (1 - sqrt((8*p)+1))))
}


.FAfitstats <- function(loadings, Phi = NULL, rho, Y) {

  if (is.null(rho))
    r <- cor(Y)
  else
    r <- rho
  results1 <- .fafs_aux(loadings, Phi, rho, Y)
  BIC <- results1$BIC
  SABIC <- results1$SABIC
  null.BIC <- results1$null.BIC
  null.SABIC <- results1$null.SABIC
  chisq <- results1$STATISTIC
  TLI <- results1$TLI
  NFI <- results1$NFI
  kmsstats <- kmo(Y, r)
  KMO <- kmsstats$KMO
  MSA <- kmsstats$MSA
  null.chisq <- results1$null.chisq
  n.obs <- nrow(Y)
  p <- ncol(Y)
  nfac <- ncol(loadings)
  if (is.null(Phi)) { M <- tcrossprod(loadings)} else {M <- loadings %*% tcrossprod(Phi, loadings)}
  M <- mpd(M)
  residual <- r - M
  r2 <- sum(r^2)
  rstar2 <- sum(residual * residual)
  result <- list(residual = residual, M = M)
  result$dof <- dof <- p * (p - 1)/2 - p * nfac + (nfac * (nfac - 1)/2)
  r2.off <- r2 - tr(r)
  diag(residual) <- 0
  rstar.off <- sum(residual^2)
  result$ENull <- r2.off * n.obs
  result$eChisq <- rstar.off * n.obs
  result$rms <- sqrt(rstar.off/(p * (p - 1)))
  result$nh <- n.obs
  if (result$dof > 0) {
    result$eBIC <- result$eChisq - result$dof * log(n.obs)
    result$eSABIC <- result$eChisq - result$dof * log((n.obs + 2)/24)
  }
  else {
    result$eBIC <- result$eSABIC <- NA
  }
  results <- list()
  results$model <- M; results$residuals <- result$residual; results$TLI <- TLI; results$NFI <- NFI;
  results$KMO <- KMO; results$MSA <- MSA; results$dof <- result$dof; results$chisq <- chisq;
  results$BIC <- BIC; results$SABIC <- SABIC; results$null.chisq <- results$null.BIC <- null.BIC;
  results$null.SABIC <- null.SABIC; results$echisq <- result$eChisq; results$eBIC <- result$eBIC;
  results$eSABIC <- result$eSABIC
  return(results)
}

.fafs_aux <- function(loadings, Phi = NULL, rho, Y) {
  conf.level <- 0.95
  if (is.null(rho))
    r <- mpd(cor(Y))
  else
    r <- mpd(rho)
  n.obs <- nrow(Y)
  p <- ncol(Y)
  nfac <- ncol(loadings)
  if (is.null(Phi)) { M <- tcrossprod(loadings)} else {M <- loadings %*% tcrossprod(Phi, loadings)}
  diag(M) <- diag(r)
  m.inv.r <- pseudoinverse(mpd(M)) %*% r
  result <- list()
  result$n.obs = n.obs
  result$dof <- p * (p - 1)/2 - p * nfac + (nfac * (nfac - 1)/2)
  result$objective <- sum(diag((m.inv.r))) - log(det(m.inv.r)) - p
  result$criteria <- c(objective = result$objective, NA, NA)
  result$STATISTIC <- chisq <- result$objective * ((n.obs - 1) - (2 * p + 5)/6 - (2 * nfac)/3)
  if (!is.nan(result$STATISTIC))
    if (result$STATISTIC < 0) {
      result$STATISTIC <- 0
    }
  if (result$dof > 0) {
    result$PVAL <- pchisq(result$STATISTIC, result$dof, lower.tail = FALSE)
  }
  else {
    result$PVAL <- NA
  }

  F0 <- sum(diag((r))) - log(det(r)) - p
  if (is.infinite(F0)) { F0 <- r2 }
  Fm <- result$objective
  Mm <- Fm/(p * (p - 1)/2 - p * nfac + (nfac * (nfac - 1)/2))
  M0 <- F0 * 2/(p * (p - 1))
  nm <- ((n.obs - 1) - (2 * p + 5)/6 - (2 * nfac)/3)
  result$null.model <- F0
  result$null.dof <- p * (p - 1)/2
  result$null.chisq <- F0 * ((n.obs - 1) - (2 * p + 5)/6)
  result$TLI <- (M0 - Mm)/(M0 - 1/nm)
  result$NFI <- (result$null.chisq-chisq)/result$null.chisq
  result$null.BIC <- result$null.chisq  - result$null.dof * log(n.obs)
  result$null.SABIC <- result$null.chisq  - result$null.dof * log((n.obs + 2)/24)
  result$BIC <- chisq - result$dof * log(n.obs)
  result$SABIC <- chisq - result$dof * log((n.obs + 2)/24)

  return(result)
}




.ifa_fit<-function(Y,n,nfac,p,ic,totalICs,maxIC,max.iter,Gamma,w,ic.means,ic.var,tol,Psi,lik){
  likelihood<-NULL
  iter<-0
  ratio<-1000

  while ((iter < max.iter) & (ratio > tol )) {
    iter<-iter+1
    wq<-matrix(1,1,totalICs)
    muq<-matrix(0,nfac,totalICs)
    vuq<-array(0,c(nfac,nfac,totalICs))
    pr<-totalICs
    cont<-1
    for (k in 1:nfac) {
      j<-1
      pr<-pr/ic[k]
      for (i in 1:totalICs){
        muq[k,i]<-ic.means[k,j]
        vuq[k,k,i]<-ic.var[k,j]
        wq[,i]<-wq[,i]*w[k,j]
        cont<-cont+1
        if (cont>pr) {
          j <- ifelse(j==ic[k], j, j+1)
          cont<-1}
      }
    }

    sigma<-array(1e-6,c(nfac,nfac,totalICs))
    psiInv<-pseudoinverse(Psi)
    roqy<-array(0,c(nfac,n,totalICs))

    sigma<-array(apply((array(crossprod(Gamma, psiInv %*% Gamma),c(nfac,nfac,totalICs))+array(apply(vuq,3,solve),c(nfac,nfac,totalICs))),3,solve),c(nfac,nfac,totalICs))


    for (i in 1:totalICs) {
      roqy[,,i]<-sigma[,,i]%*%((crossprod(Gamma, tcrossprod(psiInv,Y))) + matrix(pseudoinverse(vuq[,,i])%*%muq[,i],nfac,n))}

    exqy<-roqy
    exxqy<-array(diag(rep(1e-3, nfac)),c(nfac,nfac,n,totalICs))
    pyq<-matrix(0,n,totalICs)

    for (i in 1:n) {
      for (j in 1 :totalICs) {
        exxqy[,,i,j] <- exxqy[,,i,j] + sigma[,,j] + tcrossprod(roqy[,i,j], roqy[,i,j])
        pyq[i,j]<-dmvnorm(x=t(Y[i,]),t(Gamma%*%muq[,j]),(Gamma%*%vuq[,,j]%*%t(Gamma)+Psi), log  = F)
      }
    }

    pqy<-matrix(0,n,totalICs)
    den<- tcrossprod(wq, pyq)
    temp<-t(matrix(wq,totalICs,n))
    pqy<-pyq*temp/den[,]
    pqy<-ifelse(is.na(pqy),mean(pqy, na.rm=TRUE),pqy)
    pqiy<-array(0,c(n,nfac,maxIC))
    nummu<-array(0,c(n,nfac,maxIC))
    numvu<-array(0,c(n,nfac,maxIC))

    pr<-totalICs
    cont<-1
    for (k in 1:nfac) {
      j<-1
      pr<-pr/ic[k]
      for (i in 1:totalICs){
        nummu[,k,j]<-nummu[,k,j]+(pqy[,i]*exqy[k,,i])
        numvu[,k,j]<-numvu[,k,j]+(pqy[,i]*exxqy[k,k,,i])
        pqiy[,k,j]<-pqiy[,k,j]+pqy[,i]
        cont<-cont+1
        if (cont>pr){
          j <- ifelse(j==ic[k], j, j+1)
          cont<-1
        }
      }
    }

    exy<-matrix(1e-9, nfac ,n)
    exxy<-array(diag(rep(1e-4,nfac)), c(nfac,nfac,n))

    for (j in 1:totalICs){
      exy<-exy+(t(matrix(pqy[,j],n,nfac))*exqy[,,j])
      exxy<-exxy+(array(rep(pqy[,j],each=nfac*nfac),c(nfac,nfac,n))*exxqy[,,,j])
    }

    exy<-ifelse(is.na(exy),mean(exy, na.rm=TRUE),exy)
    exxy<-ifelse(is.na(exxy),mean(exxy, na.rm=TRUE),exxy)
    eexxy<-rowMeans(exxy, na.rm=TRUE, dims=2)
    eexxyInv<-pseudoinverse(eexxy)
    Gamma<-matrix(0,p,nfac)
    Psi<-matrix(0,p,p);
    for (i in 1:n){Gamma <- Gamma + (crossprod(t(Y[i,]), crossprod(exy[,i], eexxyInv)))}; Gamma<-Gamma/n
    for (i in 1:n) {Psi<- Psi+ (crossprod(t(Y[i,]), Y[i,])-(crossprod(t(Y[i,]), tcrossprod(exy[,i], Gamma))))}; Psi<-Psi/n

    Enummu<-matrix(0,nfac,maxIC)
    Enumvu<-matrix(0,nfac,maxIC)
    Eden<-matrix(0,nfac,maxIC)
    ic.means<-matrix(0,nfac,maxIC)
    ic.var<-matrix(0,nfac,maxIC)
    w<-matrix(0,nfac,maxIC)

    for (i in 1:nfac){
      for (j in 1:ic[i]){
        Enummu[i,j]<-sum(nummu[,i,j])
        Eden[i,j]<-sum(pqiy[,i,j])
        Enumvu[i,j]<-sum(numvu[,i,j])
      }
    }

    for (i in 1:nfac) {
      for (j in 1:ic[i]){
        ic.means[i,j]<-Enummu[i,j]/Eden[i,j]
        ic.var[i,j]<-Enumvu[i,j]/Eden[i,j]-(ic.means[i,j]*ic.means[i,j])
        w[i,j] <- mean(pqiy[,i,j])
      }
    }


    sigmascale<-matrix(0,nfac)

    for (i in 1:nfac){
      cont1<-0
      cont2<-0
      for (j in 1:ic[i]) {
        cont1<-cont1+ ( w[i,j]*(ic.means[i,j]*ic.means[i,j]+ic.var[i,j]) )
        cont2<-cont2+(w[i,j]*ic.means[i,j])    }

      sigmascale[i]<-cont1-(cont2*cont2)
    }


    for (i in 1:nfac) {
      for (j in 1:ic[i]) {
        ic.means[i,j]<-ic.means[i,j]/sqrt(sigmascale[i])
        ic.var[i,j]<-ic.var[i,j]/sigmascale[i]
      }
      for (j in 1:p)  Gamma[j,i]=Gamma[j,i]*sqrt(sigmascale[i])
    }

    temp<-sum(log(tcrossprod(pyq,wq)))/n
    likelihood<-c(likelihood,temp)
    ratio<-abs((temp-lik)/lik)
    if ((temp < lik) & (iter > 8)) ratio<-tol
    lik<-temp
  }
  out<-list(Gamma=Gamma,w=w,ic.means=ic.means,ic.var=ic.var,Psi=Psi,likelihood=likelihood,sigma=sigma,pqy=pqy)
  return(out)
}


.ifa_init <- function(x,nfac,scaling=TRUE){

  fit <- pca(x, nfac, scale = scaling)
  Gamma <- try(infomaxT(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
  if (class(Gamma)=="try.error"){
    Gamma <- try(entropy(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
      if (class(Gamma)=="try.error"){
        Gamma <- try(quartimax(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
          if (class(Gamma)=="try.error"){
            Gamma <- try(Varimax(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
              if (class(Gamma)=="try.error"){
                Gamma <- fit$loadings
              }
          }
    }
    ord <- order(colSums(Gamma^2), decreasing = T)
  }
  else{
    ord <- order(colSums(Gamma^2), decreasing = T)
  }
  Gamma <- Gamma[,ord]
  if (scaling)
    Psi <- cor(x) - tcrossprod(Gamma)
  else
    Psi <- cov(x) - tcrossprod(Gamma)

  diag(Psi) <- pmax(diag(Psi), 0.001)
  output<-list(Psi=Psi, Gamma=Gamma)
  return(output)
}





.newblack   <- function(x){crayon::make_style(rgb(0.900, 0.278, 0.106))(x)}
.coral   <- function(x){crayon::make_style(rgb(0.994,0.165,0.219))(x)}
.nurple2  <- function(x){crayon::make_style(rgb(0.808, 0.104, 0.709))(x)}
.nurple  <- function(x){crayon::make_style(rgb(0.72, 0.10, 0.78))(x)}
.seamist <- function(x){crayon::make_style(rgb(0.163, 0.778, 0.683))(x)}
.hotpink <- function(x){crayon::make_style(rgb(1.00, 0.098, 0.700))(x)}
.lightgreen <- function(x){crayon::make_style(rgb(0.72, 0.81, 0.16))(x)}
.bluez <- function(x){crayon::make_style(rgb(0.063, 0.230, 1.00))(x)}
.bluez2 <- function(x){crayon::make_style(rgb(0.00, 0.278, 0.563))(x)}
.red2  <- function(x){crayon::make_style(rgb(1.00, 0.078, 0.0263))(x)}
.crimson <- function(x){crayon::make_style(rgb(0.823,0.048,0.04))(x)}
.rosered <- function(x){crayon::make_style(rgb(1.00,0.00,0.506))(x)}
.coral2 <- function(x){crayon::make_style(rgb(0.999,0.112,0.619))(x)}
.green2 <- function(x){crayon::make_style(rgb(0.031, 0.353, 0.220))(x)}
.pipple  <- function(x){crayon::make_style(rgb(0.408, 0.074, 0.979))(x)}
.pipple2  <- function(x){crayon::make_style(rgb(0.631, 0.094, 0.839))(x)}
.purple  <- function(x){crayon::make_style(rgb(0.42, 0.00, 0.42))(x)}
.purple2 <- function(x){crayon::make_style(rgb(0.46, 0.01, 0.63))(x)}
.orange <- function(x){crayon::make_style(rgb(1,0.6125,0))(x)}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.