cvreg: Cross Validation and Robust Estimation Utilities

Documented in alfa empLPP facanal ifa kmo nfac.est pca pcaCurve pcaKern pcaL1 pcaLocal pcaMix pcaR1 pcaRobSparse pcaSparse pcaSphere pfa ppca print.facanal print.PrincipalComp robfa

#' Ordinary Principal Components Analysis
#'
#' @description This is simply an alternative to R's principal and prcomp functions.
#'
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param rotate a rotation function from the GPArotation package. Defaults to none.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param method the svd algorithm to use, one of "dc" and "std" corresponding to
#' the divide-and-conquer (the default here) and standard algorithm.
#' @return an object of class PrincipalComp
#' @export
#' @examples
#' pca(x, 3)
#'
pca <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, method = c("dc","std")){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)

  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  method <- match.arg(method)
  columnNames <- colnames(x)
  x <- sweep(x, 2, colMeans(x), "-")
  if (scale) x <- scale(x, F, T)
  out <- wsvd(x, method = method); if (sum(sign(out$v[,1]))<=0){out$v <- out$v * -1}
  out$values <- as.vector(out$d^2 / nrow(x))
  out$vectors <- out$v; out$v <- NULL
  out$total.var <- sum(out$values)

  # filter out eigenvectors with eigenvalues <= 0
  ncomp <- min(ncomp, sum(zapsmall(out$values, 6) > 0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)[1:ncomp]
  if (ncomp > 1){
    out$loadings <- out$vectors[,1:ncomp] %*% diag(sqrt(out$values[1:ncomp]))
    out$scores <- out$u[,1:ncomp] %*% diag(out$d[1:ncomp])
  } else {
    out$loadings <- as.matrix(out$vectors[,1]) * sqrt(out$values[1])
    out$scores <- as.matrix(out$u[,1]) * out$d[1]
  }
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  out$values <- out$values[1:ncomp]
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "PCA"))
}


#' Probabilistic Principal Components Analysis via Expectation Maximization
#'
#' @description This is simply an alternative to R's principal and prcomp functions that uses expectation
#' maximization to fit the PCA in the presence of missing values. If there are no missing values the output
#' should be virtually identical to the \code{\link{pca}} function, save for sign changes in the eigenvectors
#' and loadings, and small numerical differences. The 'sensible principal components analysis' EM
#' algorithm described by Rowels (1997) is implemented here. It is simply a variant of Tipping & Bishop's (1997)
#' EM algorithm.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param maxit maximum number of iterations for expectation maximization. defaults to 1000.
#' @param tol tolerance for convergence. defaults to 1e-4.
#' @return an object of class PrincipalComp
#' @export
#' @examples
#' ppca(x, 3)
#'
#' @references
#' Tipping, M. & Bishop, C. Probabilistic principal component analysis. Technical
#' Report NCRG/97/010, Neural Computing Research Group, Aston University, September 1997. \cr
#' \cr
#' Rowels, S.(1997) EM algorithms for PCA and SPCA. NIPS'97: Proceedings of the 10th International
#' Conference on Neural Information Processing Systems
#'
ppca <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, maxit = 1000, tol=1e-4) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  if (!exists(".Random.seed", mode="numeric", envir=globalenv()))
    sample(NA);
  oldSeed <- get(".Random.seed", mode="numeric", envir=globalenv());
  set.seed(666)
  N <- nrow(x)
  D <- ncol(x)
  Obs <- !is.na(x)
  hidden <- which(is.na(x))
  naobs <- length(hidden)
  if(naobs) { x[hidden] <- 0 }
  columnNames <- colnames(x)
  x <- sweep(x, 2, colMeans(x), "-")
  if (scale) x <- sweep(x, 2, apply(x,2,function(i) sqrt(mean((i-mean(i))^2))), "/")
  ## Initialization
  r <- sample(N)
  C <- t(x[r[1:ncomp], ,drop = FALSE])
  ## Random matrix with the same dim names as x
  C <- matrix(cvreg::rpowexp(prod(dim(C)),0,1,4), nrow(C), ncol(C), dimnames = labels(C))
  assign(".Random.seed", oldSeed, envir=globalenv());
  CtC <- crossprod(C)
  ## inv(C'C) C' S is the solution to the EM problem
  S  <- x %*% C %*% pseudoinverse(CtC)
  recon <- tcrossprod(S,C)
  recon[hidden] <- 0
  ss <- sum(sum((recon - x)^2)) / (N * D - naobs)
  count <- 1
  old <- 1e100

  ##  EM iterations
  while (count > 0) {
    ## Expectation Step
    Sx <- pseudoinverse(diag(ncomp) + CtC/ss)
    ss_old <- ss
    if(naobs) {
      proj <- tcrossprod(S, C)
      x[hidden] <- proj[hidden]
    }
    S <- x %*% C %*% Sx / ss
    ## Maximization Step
    SumXtX <- crossprod(S)
    C <- crossprod(x, S) %*% solve(SumXtX + N * Sx)
    CtC <- crossprod(C)
    ss <- (sum(sum((tcrossprod(C,S)-t(x))^2)) + N*sum(sum(CtC %*% Sx)) + naobs*ss_old ) / (N * D)
    ## Evaluate Objective Function
    objective <- N * (D * log(ss) + sum(diag(Sx)) - log(det(Sx))) + sum(diag(SumXtX)) - naobs * log(ss_old)
    ## Evaluate convergence
    rel_ch <- abs( 1 - objective / old )
    old <- objective
    count <- count + 1
    if (is.na(rel_ch)){
      count <- 0
      converged <- FALSE
    }
    else if( !is.na(rel_ch) && rel_ch < tol & count > 5 ) {
      count <- 0
      converged <- TRUE
    }
    else if (count > maxit) {
      count <- 0
      converged <- FALSE
    }
  }

  C <- qb(C,rand=F)$Q
  S <- x %*% C
  eig <- eigen(crossprod(sweep(S,2,colMeans(S)))/nrow(S))
  if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
  vals <- eig$values
  vecs <- eig$vectors
  C <- C %*% vecs
  S <- x %*% C
  R2cum <- rep(NA, ncomp)
  SSE <- sum(x^2, na.rm=TRUE)

  for (i in 1:ncol(C)) {
    difference <- x - (S[,1:i, drop=FALSE] %*% t(C[,1:i, drop=FALSE]))
    R2cum[i] <- 1 - (sum(difference^2, na.rm=TRUE) / SSE)
  }

  out <- list()
  out$scores <- S
  out$values <- apply(out$scores, 2, var)
  out$loadings <- C %*% diag(sqrt(out$values))
  out$vectors <- C
  out$total.var <- sum(out$values)
  out$cum.exp<- R2cum
  out$converged <- FALSE
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$vectors <- out$vectors[,1:ncomp]
  return(structure(out,class="PrincipalComp", rotation = "none", model = "Probablistic PCA"))
}



#' Robust Principal Components Analysis (Filzmoser-Maronna-Werner method)
#'
#' @description This applies the outlier detection method of Filzmoser, Maronna, and Werner (2008) to
#' obtain weights, which are used to construct a weighted covariance matrix which is in turn used
#' for principal components analysis.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param control a list of control options for the outlier identification step. usually these will not need to be changed.
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @references
#' Filzmoser, P., Maronna, M., & Werner., M. (2008) Outlier identification in high dimensions, Computational Statistics and Data Analysis, 52, 1694-1711.
#'
pcaRobust <- function (x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, control = list(explvar = 0.96, crit.M1 = 0.33, crit.c1 = 2.5, crit.M2 = 0.25, crit.c2 = 0.975, cs = 0.25, outbound = 0.25)) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)

  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  columnNames <- colnames(x)
  if (scale) x <- scale(x)
  explvar <- control$explvar
  crit.M1 <- control$crit.M1
  crit.c1 <- control$crit.c1
  crit.M2 <- control$crit.M2
  crit.c2 <- control$crit.c2
  cs <- control$cs
  outbound <- control$outbound
  p <- ncol(x); n <- nrow(x)
  L1med <- L1median(x)$center

  scalefun <- function(x, mu0){
    u <- x - mu0
    s0 <- mean(abs(u)) * sqrt(pi/2)
    err <- 1000
    it <- 0
    while ((err > 1e-4) && (it < 50)) {
      it <- it + 1
      s1 <- sqrt(s0^2 * mean(rho.Bisquare(u/s0, 1.547645)/0.50))
      err <- abs(s1 - s0)/s0
      s0 <- s1
    }
    return(s0)
  }

  x.mad = sapply(1:ncol(x), function(i) scalefun(x[,i], L1med[i]))
  x.sc <- sweep(sweep(x,2,L1med),2,x.mad,"/")
  x.svd <- wsvd(scale(x.sc, TRUE, FALSE))
  a <- x.svd$d^2/(n - 1)
  if (explvar==1){p1<-p}else{p1 <- (1:p)[(cumsum(a)/sum(a) > explvar)][1]}
  p1 <- ncomp
  x.pc <- x.sc %*% as.matrix(x.svd$v[, 1:p1])
  L1med.pc <- L1median(x.pc)$center
  xpc.sc <- scale(x.pc, L1med.pc, sapply(1:ncol(x.pc), function(i) scalefun(x.pc[,i], L1med.pc[i])))
  wp <- abs(apply(xpc.sc^4, 2, mean) - 3)
  xpcw.sc <- xpc.sc %*% diag(wp/sum(wp))

  xpc.norm <- sqrt(apply(xpcw.sc^2, 1, sum))
  x.dist1 <- xpc.norm * sqrt(qchisq(0.5, p1))/hdmedian(xpc.norm)
  M1 <- hdquantile(x.dist1, crit.M1)
  const1 <- hdmedian(x.dist1) + crit.c1 * scalefun(x.dist1, hdmedian(x.dist1))
  w1 <- (1 - ((x.dist1 - M1)/(const1 - M1))^2)^2
  w1[x.dist1 < M1] <- 1
  w1[x.dist1 > const1] <- 0
  xpc.norm <- sqrt(apply(xpc.sc^2, 1, sum))
  x.dist2 <- xpc.norm * sqrt(qchisq(0.5, p1))/hdmedian(xpc.norm)
  M2 <- sqrt(qchisq(crit.M2, p1))
  const2 <- sqrt(qchisq(crit.c2, p1))
  w2 <- (1 - ((x.dist2 - M2)/(const2 - M2))^2)^2
  w2[x.dist2 < M2] <- 1
  w2[x.dist2 > const2] <- 0
  wfinal <- (w1 + cs) * (w2 + cs)/((1 + cs)^2)
  wfinal01 <- round(wfinal + 0.5 - outbound)
  wcov <- cov.wt(x, wt = wfinal, method = "ML")
  dist <- mahalanobis_dist(x, wcov$center, wcov$cov)
  const3 <- hdmedian(dist)/(qchisq(0.5, p))
  wcov$cov <- wcov$cov * const3
  eig <- eigen(wcov$cov, symmetric = TRUE)
  if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
  out <- list()

  out$values <- eig$values
  out$total.var <- sum(out$values)
  ncomp <- min(ncomp, sum(zapsmall(out$values, 6)>0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 8)[1:ncomp]
  out$values <- out$values[1:ncomp]; out$vectors <- as.matrix(eig$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- out$vectors %*% diag(sqrt(out$values))
  else
    out$loadings <- out$vectors * sqrt(out$values)

  out$scores <-  x %*% out$vectors
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$center <- wcov$center
  out$cov <- wcov$cov
  out$dist <- dist
  out$weights <- wfinal
  out$outliers <- which(wfinal01==0)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$vectors <- out$vectors[,1:ncomp]
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Robust PCA"))
}

#' Spherical Principal Components Analysis
#'
#' @description This function implements the spherical principal components analysis method of Locantore et al. (1999)
#' to conduct PCA while downweighting large outliers. It is one of the earliest attempts at producing a robust PCA method,
#' but despite the development of more sophisticated methods, this works quite well (Maronna, 2005). The method makes use
#' of the multivariate spatial median, also known as the multivariate L1-median, or just multivariate median. The spatial
#' median minimizes the sum of euclidean distances, and hence gives the coordinates for the center of the hypershpere (hence
#' the origin of the name spherical principal components analysis): \cr
#'
#' \strong{\deqn{M_hat = arg min \Sigma ||x_i - M||}}
#' \cr
#'
#' It differs from the vector of univariate medians, called the marginal multivariate median or componentwise median. The componentwise
#' median is not invariant to transformations of the data, while the spatial median of a data set corresponds to
#' the back-transformed spatial median of a transformed data set. The spatial median is unique when the data are not perfectly collinear, and when the data are collinear, multiple
#' solutions (however all valid) may exist (Milasevic & Ducharme, 1987). The spatial median has a bounded
#' influence function and a breakdown point of 50\%. \cr
#'
#' Now that the nature of the spatial median has been made clear, the motivation behind spherical PCA can be made clear.
#' The distance of a data point from the spatial median is known as its spatial sign rank. The spatial signs are thus
#' a transformation/standardization of the data in unit-hypersphere coordinates. Due to the robustness of the spatial median,
#' multivariate outliers are easily identifiable, and do not negatively influence the estimate of the principal components for
#' the "clean" data. While the eigenvectors enjoy this benefit and are consistent estimates of the population eigenvalues
#' of a data set, the eigenvalues are not consistent estimators. However, this is easily remedied by using a robust scale estimator
#' to calculate the standard deviations for the PCA scores, then squaring these to obtain the variances (which are the eigenvalues).
#' The method is very fast as well, even for larger data sets.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param evest the scale estimator used to provide consistent estimates of the
#' population eigenvalues. one of "tau" (the default), "bisq" (bisquare), or "pb"
#' (percentage bend).
#' @return a pcaSphere object containing eigenvalues, eigenvectors, component loadings, component scores, and spatial median.
#' @export
#'
#' @references
#' Locantore, N., Marron, J., Simpson, D, Tripoli, N., Zhang, J., Cohen, K. (1999) Robust principal component analysis for functional data. Sociedad de Estadistica e Investigacion Operativa Test. 8: 1. https://doi.org/10.1007/BF02595862 \cr \cr
#' Maronna, R.A. (2005) Principal components and orthogonal regression based on robust scales, Technometrics, 47, 264–273 \cr \cr
#' Milasevic, P. & Ducharme, G. R. (1987) Uniqueness of the Spatial Median. Ann. Statist. 15(3) 1332-1333. doi:10.1214/aos/1176350511. \cr \cr
#' @examples
#' pcaSphere(x, 3)
#'
pcaSphere <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = TRUE, evest = c("tau", "bisq", "pb")){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  evest <- match.arg(evest)
  evest <-  which(c("tau", "bisq", "pb") == evest)
  columnNames <- colnames(x)
  if (scale) x <- scale(as.matrix(as.data.frame(x)), T, T) else x <- scale(as.matrix(as.data.frame(x)), T, F)
  out <- cvreg:::SPC(x, scale = evest)
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  out$values <- as.vector(out$values)
  out$weights <- as.vector(out$weights)
  out$total.var <- sum(out$values)

  # filter out eigenvectors with eigenvalues <= 0
  ncomp <- min(ncomp, sum(zapsmall(out$values, 6)>0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)[1:ncomp]
  out$values <- out$values[1:ncomp]
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- out$vectors %*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(out$vectors * sqrt(out$values))

  out$scores <- as.matrix(x %*% out$vectors)
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)

  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Spherical PCA"))
}



#' L1 Principal Components Analysis
#'
#' @description Uses the L1 singular value decomposition in the function \code{\link{svdL1}} to construct an L1-norm
#' set of principal components. L1 PCA can be more resistant to outliers, as well as often has intrinsic sparsity
#' in the loadings matrix. The raw L1 eigenvectors are orthonormalized prior to computation
#' of the principal components due to the L1 singular value decomposition not being
#' orthnormal. However, the non-orthogonalized L1-PCA results can be returned by setting the argument
#' orth = FALSE. See the  \code{\link{pcaR1}} function for an intrinsically orthogonal L1-norm
#' based method.
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param interp should values be interpolated during the calculation of L1 weighted svd? Since
#' the calculations entail calculating weighted medians, and weighted medians are not always unique,
#' this will smooth the values as to obtain a unique solution. this can come at the cost of accuracy,
#' however, as well as substantially slow down both the rate of convergence
#' and rate of calculations at each iteration, so the default is FALSE. However,
#' if convergence problems occur with this option set to FALSE, it may be worth trying
#' interp=TRUE.
#' @param orth should the L1-eigenvectors be orthogonalized using a QR decomposition? the default
#' is TRUE. setting to FALSE returns loadings and scores based on the raw L1 eigenvectors.
#' @param maxit the maximum number of iterations for the L1-SVD algorithm. defaults to 500.
#' @param tol convergence tolerance. defaults to 1e-6
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @references Hawkins, D.M.; Liu, L.; Young, S.S. (2001) Robust Singular Value Decomposition, National Institute of Statistical Sciences, Technical Report Number
#'
#' @examples
#' pcaL1(x, 3)
#'
pcaL1 <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = T, interp = F, orth = T, maxit = 500, tol = 1e-6){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  columnNames <- colnames(x)
  center <- L1median(x)$center
  x <- sweep(x, 2,center, "-")
  if (scale){
    scales <- sapply(1:ncol(x),function(i)mean(abs(x[,i]-center[i]))*1.2533)
    x <- sweep(x,2,scales,"/")
  }
  out <- svdL1(x, maxit = maxit, tol = tol, interp = interp)
  out$values <- (out$d/sqrt(nrow(x)-1))^2;
  out$vectors <- out$v; out$v <- NULL; out$u <- NULL
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  if (orth){
    out$vectors <- try(qb(out$vectors,rand=F)$Q,silent=T)
    if (class(out$vectors)=="try.error"){
      out$vectors <- try(graham.schmidt(out$vectors)$Q,silent=T)
      if (class(out$vectors)=="try.error"){
        out$vectors <- try(qr.Q(qr(out$vectors)),silent=T)
        if (class(out$vectors)=="try.error"){
          out$vectors <- svd(out$vectors)$u
          }
        }
    }
    if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  }
  out$total.var <- sum(out$values)

  # filter out eigenvectors with eigenvalues <= 0
  ncomp <- min(ncomp, sum(zapsmall(out$d, 6) > 0))
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)[1:ncomp]
  out$values <- out$values[1:ncomp]
  out$vectors  <- as.matrix(out$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- out$vectors %*% diag(sqrt(out$values))
  else
    out$loadings <- out$vectors * sqrt(out$values)

  out$scores <- as.matrix(x %*% out$vectors); out$d <- NULL
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "L1 PCA"))
}


#' Rotation Invariant L1-Principal Components Analysis
#'
#' @param x a matrix or data frame containing only numeric variables
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param maxit the maximum number of iterations for the L1-SVD algorithm. defaults to 500.
#' @param tol convergence tolerance. defaults to 1e-8
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @references Hawkins, D.M.; Liu, L.; Young, S.S. (2001) Robust Singular Value Decomposition, National Institute of Statistical Sciences, Technical Report Number
#'
#' @examples
#' pcaR1(x, 3)
#'
pcaR1 <- function(x, ncomp = min(nrow(x)-1, ncol(x)), scale = T, maxit = 500, tol = 1e-8) {

  wtfun <- function(x, V, c){
    if(norm(x-tcrossprod(V)%*%x, type="2") <= c) {return(1)}
    else { return(c/norm(x-tcrossprod(V)%*%x, type="2"))}
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  n <- nrow(x)
  p <- ncol(x)
  center <- L1median(x)$center
  #center <- apply(x, 2, hdmedian)
  xc <- sweep(x, 2, center)
  if (scale){
    scales <- sapply(1:ncol(xc), function(i) sqrt(mean(xc[,i]^2)))
    xc <- sweep(x,2,scales,"/")
  }
  I <- diag(p)
  V <- wsvd(crossprod(xc)/n)$v; if (sum(sign(V[,1]))<=0){V <- V * -1}
  s <- sapply(1:n, function(i) sqrt(max(0, (xc[i,] %*% ((I-tcrossprod(V,V))%*%xc[i,]))[1])))
  c <- hdmedian(s)
  iter <- 0; normdiff <- Inf; Vold <- V
  while (iter < maxit && normdiff > tol) {
    q <- sapply(1:n,function(i) wtfun(xc[i,],Vold,c))
    W <- crossprod(xc, (xc * q))
    Vnew <- qb(W %*% Vold,rand=F)$Q
    if (sum(sign(Vnew[,1]))<=0){Vnew <- Vnew * -1}
    iter <- iter + 1
    normdiff <- norm(Vnew - Vold)
    Vold <- Vnew
  }

  values <- apply(xc%*%Vold, 2, var)
  ord <- order(values, decreasing = TRUE)
  values <- values[ord]; Vold <- Vold[,ord]
  total.var <- sum(values)
  loadings <- Vold %*% diag(sqrt(values)); loadings <- as.matrix(loadings[,1:ncomp])
  vectors <- as.matrix(Vold[,1:ncomp])
  values <- values[1:ncomp]
  colnames(vectors) <- colnames(loadings) <- paste0("PC", 1:ncomp)
  rownames(vectors) <- rownames(loadings) <- columnNames
  cum.exp <- cumsum(values)/total.var
  scores <- as.matrix(xc %*% vectors); colnames(scores) <- paste0("PC", 1:ncomp)
  orthdist <- .orthdist(xc, loadings)
  scoredist <- .scoredist(scores, values)
  return(structure(list(values=values, loadings=loadings, vectors=vectors, scores=scores,
                        cum.exp=cum.exp, orthdist=orthdist, scoredist=scoredist),
            class = "PrincipalComp", rotation = "none", model = "R1 PCA"))
}



#' Curvilinear Principal Components Analysis
#'
#' @description This performs a curvilinear component analysis, which is a method of PCA that is capable of
#' capturing non-linear trends in the data. The algorithm takes an initial set of scores as a starting point,
#' along with a distance matrix, to unfold the multidimensional manifold. Then a neural network is used to minimize
#' a cost function to find a projection of the input data which function as principal component scores. In this
#' function the eigenvalues and loadings matrix are approximated by matrix-multiplying the transposed data x by the
#' transposed pseudoinverse of the scores. The original data set is then reconstructed from this approximated loadings
#' matrix and set of scores and the eigenvalues of this reconstruction are returned as the approximate eigenvalues. This
#' is simply to facilitate plotting and ease of interpretation, but do keep in mind the eigenvalues and loadings matrix
#' returned here are approximations.
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of desired principal components.
#' @param init linear component scores to initialize the nonlinear estimator. can be one of "mds", "pca",  or "pcaSphere".
#' "mds" uses the cmdscale function to compute the classical multidimensional scaling. alternatively, a matrix of component scores
#' with the number of columns equal to 'ncomp' can be supplied.
#' @param lambda the initial influence radius. if left as NULL, it defaults to twice the maximum of the vector of
#' absolute average deviations for each component.
#' @param alpha initial learning rate. defaults to 0.5.
#' @param Lp a norm for the Minkowski distance metric. defaults to 2.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param maxit maximum number of iterations. defaults to 1000.
#' @param tol convergence tolerance. defaults to 1e-12.
#' @return an object of class PrincipalComp
#' @export
#'
#' @examples
#' pcaCurve(x)
#'
#' @references
#' Demartines, P.; Herault, J. (1997) Curvilinear component analysis: a self-organizing neural network for nonlinear mapping of data sets. IEEE Transactions on Neural Networks archive. 8, 1, 148-154.
#'
pcaCurve = function(x, ncomp = min(nrow(x)-2, ncol(x)-1), init = c("mds", "pca", "pcaSphere"),
                    lambda = NULL, alpha = 0.5, Lp = 2, scale = T, maxit = 1000, tol = 1e-12){

  columnNames <- colnames(x)
  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  if (!exists(".Random.seed", mode="numeric", envir=globalenv()))
    sample(NA);
  oldSeed <- get(".Random.seed", mode="numeric", envir=globalenv());
  set.seed(666)
  vss <- sample(seq(0, nrow(x)-1, len = nrow(x)), maxit, replace = T)
  assign(".Random.seed", oldSeed, envir=globalenv());
  minkowski.dist <- cvreg:::minkowski_dist(x, Lp)


  if (is.character(init) || is.vector(init)){
    init <- match.arg(init)
    if (init == "mds"){
      init <- cmdscale(minkowski.dist, k = ncomp)
      ord <- order(apply(init, 2, var), decreasing = T)
      init <- as.matrix(init[,ord])
    }
    else if (init == "pca"){
      init <- as.matrix(pca(x, ncomp, scale = scale)$scores)
      total.var <- sum(apply(init,2,var));
    }
    else{
      init <- as.matrix(pcaSphere(x, ncomp, scale = scale)$scores)
      total.var <- sum(apply(init,2,var))
    }
  }
  else if (is.matrix(init) || is.data.frame(x)){
     init <- as.matrix(init)
  }

  ncomp <- ncol(init)
  if (is.null(lambda))
    lambda <- 2*max(apply(init, 2, aad))

  out = list()
  out$scores = cvreg:::CPCAcpp(minkowski.dist,
                               Yinit = init,
                               lambda = lambda,
                               alpha = alpha,
                               p = Lp,
                               maxiter = maxit,
                               tolerance = tol,
                               vs = vss)

  out$scores <- as.matrix(out$scores)
  out$values <- apply(out$scores,2,var)
  out$vectors <- as.matrix(crossprod(scale(x), pseudoinverse(t(scale(out$scores)))))
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1; out$scores <- out$scores * -1}
  if (ncomp > 1)
    out$loadings <- as.matrix(out$vectors[,1:ncomp])%*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(out$vectors) * sqrt(out$values)

  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- colnames(out$loadings) <- colnames(out$vectors) <- paste0("CC", 1:ncol(out$loadings))
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Curvilinear PCA"))
}

#' Locally Principal Components Analysis
#'
#' @description Locally principal components analysis is a variant of PCA that is useful for data sets with clustered
#' observations (Yang, Zhang, & Yang, 2006). LPCA begins with individual observations and finds the structure of the data
#' by considering the data points within a certain distance δ. In this implementation the Minkowski distance is used to
#' find the distances. The Minkowski distance is a generalized distance measure that bridges L1 and L2 norms, as well as
#' higher than L2. This makes it a flexible distance metric that encompasses Manhattan distance and Euclidean distance.
#' This distance based nearest neighbors strategy is used to build an adjacency matrix, which is converted to the Laplacian
#' Graph representation. This is then  subjected to eigendecomposition, and the eigenvectors P are multiplied by the data x
#' to obtain the matrix XP. Next, the crossproduct of XP is used to calculate the rotation matrix R, ie, R = XP'XP, and the
#' data are projected onto a new basis. A more complete description of the algorithm can be found in the cited paper.
#' Note that the new basis can have more columns than it does variables, and that the eigenvalues
#' are not ordered due to the local nature of the components.
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of components to extract.
#' @param pct this determines the percentage of the distance matrix whose
#' edges will be non-null. the default is 0.15.
#' @param Lp a norm for the Minkowski distance metric. defaults to 2.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @return an object of class PrincipalComp
#' @export
#'
#' @examples
#' pcaLocal(x, 3)
#'
#' @references
#' Yang, J., Zhang, D., & Yang, J. (2006). Locally principal component learning for face representation and recognition. Neurocomputing, 69(13-15), 1697–1701. doi:10.1016/j.neucom.2006.01.009
#'
pcaLocal = function(x, ncomp = min(nrow(x)-1, ncol(x)), pct = 0.15, Lp = 2, scale = T){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)) ncomp = sum(eigen(crossprod(x)/nrow(x))$values > 0)
  graph.mat = .distgraph(x, pct, Lp)
  H <- symm(1*(graph.mat$mask));
  L = diag(rowSums(H))-H
  eigL = .adjEigLaplace(eigen(L))
  Lval = eigL$values^0.50
  Pl   = (eigL$vectors)%*%diag(Lval)
  R  = crossprod(Pl,x)%*%crossprod(x,Pl)
  S = crossprod(x,Pl)%*%crossprod(Pl,x)
  topEigen <- function(x, n){
    e = eigen(x)
    n = sum(e$values > 0)
    list(values = e$values[1:n], vectors = e$vectors[,1:n], ncomp = n)
  }
  topR = topEigen(R, ncomp)
  topR$vectors <- as.matrix(topR$vectors)
  out = list()
  out$vectors <- as.matrix(.adjLoadings(crossprod(x, Pl)%*%(topR$vectors%*%diag(1/sqrt(topR$values)))))
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1}
  out$scores <- x%*%out$vectors
  out$values <- apply(out$scores,2,aad)^2
  out$total.var <- sum(out$values)
  ord <- order(out$values, decreasing = TRUE)
  ncomp <-min(ncomp, topR$ncomp)
  out$values <- out$values[1:ncomp]
  out$vectors <- as.matrix(out$vectors[,1:ncomp])
  if (ncomp > 1)
    out$loadings <- as.matrix(out$vectors)%*% diag(sqrt(out$values))
  else
    out$loadings <- out$vectors * sqrt(out$values)
  out$scores <- as.matrix(out$scores[,1:ncomp])
  colnames(out$scores) <-colnames(out$vectors) <- colnames(out$loadings) <- paste0("LC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  out$cum.exp <- round(cumsum(out$values)/out$total.var, 6)
  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Local PCA"))
}



#' Sparse Principal Components Analysis
#'
#' @description This function is based on the original paper by Zou, Hastie, and Tibsharini (2006) where an elastic net
#' formulation of principal components analysis was demonstrated.
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of components to extract.
#' @param alpha the elastic net mixing parameter, which can take values of \eqn{0 \le \alpha \ge 1}.
#' @param lambda the shrinkage parameter. as in the elastic net, the L1 shrinkage penalty is \eqn{\lambda_1 = \alpha * \lambda},
#' and the L2 shrinkage penalty is \eqn{\lambda_2 = (1-\alpha) * \lambda}.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param max.iter maximum number of iterations
#' @param tol tolerance for convergence
#'
#' @return an object of class PrincipalComp
#' @export
#'
#' @examples
#' pcaSparse(x, 3, 0.5, 0.12)
#'
#' @references
#' Zou, H.; Hastie, T.; Tibshirani, R. (2006). Sparse principal component analysis. Journal of Computational and Graphical Statistics. 15 (2): 262–286. doi:10.1198/106186006x113430.
#'

pcaSparse <- function(x, ncomp = min(nrow(x)-1, ncol(x)), alpha = 0.75, lambda = 1e-4, scale = T, max.iter = 1000, tol=1e-16) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }
  if (alpha > 1){
    alpha <- 1
    purple <- crayon::make_style(rgb(0.431,0.055,0.616))
    cat(purple("Alpha cannot be greater than 1. Automatically correcting to alpha = 1."))
  }

  if (alpha < 0){
    alpha <- 0
    violet <- crayon::make_style(rgb(0.62,0.06,0.48))
    cat(violet("Alpha cannot be negative. Automatically correcting to alpha = 0."))
  }

  x <- as.matrix(x)
  if (any(is.na(x))) {
    warning("Missing values are omitted: na.omit(x).")
    x <- stats::na.omit(x)
  }
  n <- nrow(x)
  p <- ncol(x)

  if(is.null(ncomp)) ncomp <- min(n,p)
  if(ncomp > min(n,p)) ncomp <- min(n,p)
  if(ncomp<1) stop("Target rank is not valid!")
  svd_init <- cvreg::wsvd(x, tol = 1e-100)
  if (sum(sign(svd_init$v[,1]))<=0){svd_init$v <- svd_init$v * -1}
  Dmax <- svd_init$d[1]
  A <- as.matrix(svd_init$v[,1:ncomp])
  B <- as.matrix(svd_init$v[,1:ncomp])
  V <- as.matrix(svd_init$v)
  VD = sweep(V, MARGIN = 2, STATS = svd_init$d, FUN = "*", check.margin = TRUE)
  VD2 = sweep(V, MARGIN = 2, STATS = svd_init$d^2, FUN = "*", check.margin = TRUE)

  lambdaL1 <- alpha * lambda
  lambdaL2 <- (1-alpha) * lambda
  lambdaL1 <- lambdaL1 *  Dmax^2
  lambdaL2 <- lambdaL2 * Dmax^2
  nu <- 1.0 / (Dmax^2 + lambdaL2)
  kappa <- nu * lambdaL1
  obj <- c();change <- Inf;iter<- 1
  while (iter<= max.iter && change > tol) {
    Z <- VD2 %*% crossprod(V, B)
    svd_update <- wsvd(Z);
    if (sum(sign(svd_update$v[,1]))<=0){
      svd_update$v <- svd_update$v * -1
      svd_update$u <- svd_update$u * -1
    }
    A <- tcrossprod(svd_update$u, svd_update$v)
    grad <- VD2 %*% crossprod(V, (A-B)) - lambdaL2 * B
    B_temp <- B + nu * grad
    # lasso penalty
    idxH <- which(B_temp > kappa)
    idxL <- which(B_temp <= -kappa)
    B = matrix(0, nrow = nrow(B_temp), ncol = ncol(B_temp))
    B[idxH] <- B_temp[idxH] - kappa
    B[idxL] <- B_temp[idxL] + kappa
    # compute errors
    R <- t(VD) - tcrossprod(crossprod(VD, B), A)
    # evaluate elastic net objective function
    obj <- c(obj, 0.5 * sum(R^2) + lambdaL1 * sum(abs(B)) + 0.5 * lambdaL2 * sum(B^2))
    # check for convergence
    if(iter> 1){change <- (obj[iter-1] - obj[iter]) / obj[iter]}
    iter<- iter+ 1
  }

  out = list()
  if (sum(sign(B[,1]))<=0){B <- B * -1; A <- A * -1}
  out$values <- svd_update$d[1:ncomp]/(nrow(x)-1)
  out$scores <- as.matrix(x %*% B)
  if (ncomp > 1)
    out$loadings <- as.matrix(B) %*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(B) * sqrt(out$values)

  out$vectors <- as.matrix(B)
  out$objective <- obj

  colnames(out$scores) <- paste0("PC", 1:ncomp)
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames

  out$total.var <- sum(apply(Re(x), 2, stats::var))
  out$cum.exp <- cumsum(apply(out$scores, 2, var)/ out$total.var)
  class(out) <- "PrincipalComp"
  attr(out, "rotation") <- "none"
  attr(out, "model") <- "Sparse PCA"
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(out)
}


#' Sparse Robust Principal Principal Components Analysis
#'
#' @description This function is based on the original paper by Zou, Hastie, and Tibsharini (2006) where an elastic net
#' formulation of principal components analysis was demonstrated. The algorithm can be initialized using one of two methods.
#' The first option is the robust principal components method in the \code{\link{pcaRobust}} function which utilizes the method
#' of Filzmoser, Maronna, and Werner (2008).  The second option is spherical principal components analysis (Locantore et al., 1999)
#' as suggested by Maronna (2005). After the initial principal components analysis, the elastic net penalization is applied to this
#' initial fit. The elastic net objective function is modified such that the sum of squared errors is weighted utilizing a bisquare
#' function, leading to the objective function \eqn{0.5 * \sum_i{\epsilon_i * w_i} + \sum(|B|) * \lambda_1 + 0.5 * \lambda_2 * \sum(B^2)}.
#'
#'
#' @param x a data frame or matrix of numeric variables
#' @param ncomp the number of components to extract.
#' @param alpha the elastic net mixing parameter, which can take values of \eqn{0 \le \alpha \ge 1}.
#' @param lambda the shrinkage parameter. as in the elastic net, the L1 shrinkage penalty is \eqn{\lambda_1 = \alpha * \lambda},
#' and the L2 shrinkage penalty is \eqn{\lambda_2 = (1-\alpha) * \lambda}.
#' @param delta the desired breakdown point. defaults to 0.50, which is the maximum.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param max.iter maximum number of iterations
#' @param tol tolerance for convergence
#'
#' @return a PrincipalComp object
#' @export
#'
#' @examples
#' pcaRobSparse(x, 3, 0.5, 0.12)
#'
#' @references
#' Hawkins, D.M., Liu, L., & Young, S.S. (2001) Robust Singular Value Decomposition, National Institute of Statistical Sciences, Technical Report Number \cr \cr
#' Locantore, N., Marron, J., Simpson, D, Tripoli, N., Zhang, J., & Cohen, K. Robust principal component analysis for functional data. (1999) Sociedad de Estadistica e Investigacion Operativa Test. 8: 1. https://doi.org/10.1007/BF02595862 \cr \cr
#' Zou, H., Hastie, T., & Tibshirani, R. (2006). Sparse principal component analysis. Journal of Computational and Graphical Statistics. 15 (2): 262–286. doi:10.1198/106186006x113430. \cr \cr
#' Maronna, R.A. (2005) Principal components and orthogonal regression based on robust scales, Technometrics, 47, 264–273 \cr \cr
#' Filzmoser, P., Maronna, M., & Werner., M. (2008) Outlier identification in high dimensions, Computational Statistics and Data Analysis, 52, 1694-1711.
#
pcaRobSparse = function(x, ncomp = min(nrow(x)-1, ncol(x)), alpha = 0.75, lambda = 1e-4, delta = 0.50, init = c("robust", "sphere"), scale = T, max.iter = 200, tol=1e-16) {

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }
  columnNames <- colnames(x)
  x <- as.matrix(x)
  if (scale) x <- scale(x)
  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  init <- match.arg(init)
  wts <- function(r){ return((1-r)^2*(r<=1))}
  original.means <- colMeans(x)
  x <- sweep(x,2,original.means)
  if (alpha > 1){
    alpha <- 1
    purple <- crayon::make_style(rgb(0.431,0.055,0.616))
    cat(purple("Alpha cannot be greater than 1. Automatically correcting to alpha = 1."))
  }

  if (alpha < 0){
    alpha <- 0
    violet <- crayon::make_style(rgb(0.62,0.06,0.48))
    cat(violet("Alpha cannot be negative. Automatically correcting to alpha = 0."))
  }

  x <- as.matrix(x)
  if (any(is.na(x))) {
    warning("Missing values are omitted: na.omit(x).")
    x <- stats::na.omit(x)
  }
  n <- nrow(x)
  p <- ncol(x)

  if(is.null(ncomp)) ncomp <- min(n,p)
  if(ncomp > min(n,p)) ncomp <- min(n,p)
  if(ncomp<1) stop("Target rank is not valid!")
  if (init == "pcaSphere"){
    svd_init <- pcaSphere(x, ncomp = min(n-1,p), evest = "bis")
  }
  else{
    svd_init <- pcaRobust(x, ncomp = min(n-1,p))
  }
  svd_init$d <- sqrt(svd_init$values) * sqrt(nrow(x))
  mu0 <- svd_init$center
  fit <- scale(x%*%svd_init$loadings%*%t(svd_init$loadings), center = -mu0, FALSE)
  R <- colSums(t(x-fit)^2)
  sig0 <- .biScalePCA(sqrt(R), delta = delta, c = 1)^2
  ww <- wts(R/sig0)
  Dmax <- svd_init$d[1]
  A <- as.matrix(svd_init$vectors[,1:ncomp])
  B <- as.matrix(svd_init$vectors[,1:ncomp])
  V <- as.matrix(svd_init$vectors)

  VD = sweep(V, MARGIN = 2, STATS = svd_init$d, FUN = "*", check.margin = TRUE)
  VD2 = sweep(V, MARGIN = 2, STATS = svd_init$d^2, FUN = "*", check.margin = TRUE)

  lambdaL1 <- alpha * lambda
  lambdaL2 <- (1-alpha) * lambda
  lambdaL1 <- lambdaL1 *  Dmax^2
  lambdaL2 <- lambdaL2 * Dmax^2
  nu <- 1.0 / (Dmax^2 + lambdaL2)
  kappa <- nu * lambdaL1
  obj <- c();change <- Inf;iter<- 1
  repre <- x%*%B
  Xcen <- x

  while (iter<= max.iter && change > tol) {

    # Update mu and data
    mu=colSums(Xcen*ww)/sum(ww)
    Xcen=scale(Xcen, center=mu, scale=FALSE)
    Z <- VD2 %*% crossprod(V, B)
    svd_update <- wsvd(Z)
    A <- tcrossprod(svd_update$u, svd_update$v)
    grad <- VD2 %*% crossprod(V, (A - B)) - lambdaL2 * B
    B_temp <- B + nu * grad
    # lasso penalty
    idxH <- which(B_temp > kappa)
    idxL <- which(B_temp <= -kappa)
    B = matrix(0, nrow = nrow(B_temp), ncol = ncol(B_temp))
    B[idxH] <- B_temp[idxH] - kappa
    B[idxL] <- B_temp[idxL] + kappa
    # calculate fit
    fit=Xcen%*%B%*%t(B)
    # compute errors
    Rprime <- Xcen - fit
    R <- colSums(Rprime^2)
    sig2 <- .biScalePCA(sqrt(R), delta = delta, c = 1)^2
    # evaluate elastic net objective function
    obj <- c(obj, 0.5 * (length(R)*sig2) + lambdaL1 * sum(abs(B)) + 0.5 * lambdaL2 * sum(B^2))
    repre <- Xcen%*%B
    ww=wts(R/sig2)
    # check for convergence
    if(iter> 1){change <- (obj[iter-1] - obj[iter]) / obj[iter]}
    iter<- iter+ 1
  }

  out = list()
  if (sum(sign(B[,1]))<=0){B <- B * -1; A <- A * -1}
  out$values <- (svd_update$d[1:ncomp]/(max(1, nrow(x))))
  out$vectors <- as.matrix(B)
  if (ncomp > 1)
    out$loadings <- as.matrix(B)  %*% diag(sqrt(out$values))
  else
    out$loadings <- as.matrix(B) * sqrt(1/out$values)
  out$scores <- as.matrix(x %*% B)

  out$objective <- obj
  colnames(out$vectors) <- colnames(out$loadings) <- paste0("PC", 1:ncomp)
  rownames(out$vectors) <- rownames(out$loadings) <- columnNames
  colnames(out$scores) <- paste0("PC", 1:ncomp)
  robvar <- function(u){.biScalePCA(u, delta = delta, c = 1)^2}

  out$total.var <- sum(apply(Re(x), 2, var))
  out$cum.exp <- cumsum(apply(out$scores, 2, var)/ out$total.var)
  class(out) <- "PrincipalComp"
  out$center <- mu
  attr(out, "rotation") <- "none"
  attr(out, "model") <- "Robust Sparse PCA"
  out$orthdist <- .orthdist(x, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  return(out)
}


#' Mixed Principal Components Analysis
#'
#' @description This function allows you to integrate information about data structure contained
#' in categorical variables in a principal components analysis.
#' @param x a matrix or data frame containing only numeric variables
#' @param cat the categorical variables. must be coded as factors or characters, not numeric dummy variables.
#' @param ncomp the number of components to retain.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @return an object of class PrincipalComp
#' @export
#' @examples
#' x <- Alzheimers[,-c(1,3)] # Get rid of categorical variables for the PCA.
#' cats <- Alzheimers[,1:3] # Put categorical variables here for the PCA
#' pcaMix(x, cats) # Run pcaMix
#'
pcaMix <- function(x=NULL,cat=NULL,ncomp=min(nrow(x)-1,ncol(x)+ncol(cat)),scale=T){


  if (ncomp <= 1){
    ncomp <- 2
    cat(.crimson(("The number of extracted components must be at least 2. Automatically setting 'ncomp = 2'")))
  }

  if (scale) x <- scale(x)
  if (is.null(ncomp)) ncomp <- min(nrow(x)-1,ncol(x)+ncol(cat))
  cl <- match.call()
  rec <- .recodeData(x,cat,T)
  n <- rec$n; p <- rec$p; p1 <- rec$p1; p2 <- p - p1
  X <- rec$X; W <- rec$W; m <- ncol(W) - p1; indexj <- rec$indexj

  #construct the metrics
  N <- rep(1/n, n); M1 <- M2 <- NULL; P1 <- P2 <- NULL
  if (p1!=0) {M1 <- rep(1,p1); P1 <- rep(1,p1)}
  if (p2!=0){ns <- apply(rec$G, 2, sum); M2 <- n/ns; P2 <- rep(1,m)}
  M <- c(M1,M2); P <- c(P1,P2); M.col <- P*M; names(M.col) <- colnames(W)
  e <- .gsvd(W, N, M.col); V.total.dim <- e$V; U.total.dim <- e$U; d.total.dim <- e$vs

  q <- qr(W)$rank
  ncomp <- min(ncomp, q)
  values <- e$vs[1:q]^2
  prop.exp <- values/sum(values, na.rm = T)
  cum.exp <- cumsum(values)/sum(values, na.rm = T)
  d <- e$vs[1:ncomp]

  #scores
  U <- e$U[, 1:ncomp,drop=FALSE]
  rownames(U) <- rownames(W)
  colnames(U) <- paste0("PC", 1:ncomp)
  scores <- sweep(U,2,d,"*")
  F.total.dim <- sweep(U.total.dim,2,d.total.dim,"*")
  contrib.ind<-scores^2/n
  cos2.ind <- sweep(scores^2, 1, apply(F.total.dim, 1, function(v) {return(sum(v^2))}), "/")
  result.ind <- list(coord = scores, contrib = contrib.ind, cos2 = cos2.ind)

  #loadings and contributions
  A1 <- A2 <- NULL
  C1 <- C2 <- NULL
  contrib.x <- contrib.cat <- NULL
  V <- e$V[, 1:ncomp,drop=FALSE]
  rownames(V) <- colnames(W)
  colnames(V) <- paste0("PC", 1:ncomp)

  if(p1 >0){
    V1 <- V[1:p1, ,drop=FALSE]
    V1.total.dim <- V.total.dim[1:p1, ,drop=FALSE]
    A1 <- sweep(V1,2,d,"*")
    A1.total.dim <- sweep(V1.total.dim,2,d.total.dim,"*")
    contrib.x <- sweep(A1^2, 1, M1*P1, "*")
    contrib.x.pct <- sweep(contrib.x, 2, d^2, "/")
    cos2.x <- sweep(A1^2, 1, apply(A1.total.dim,1,function(v){sum(v^2)}), "/")
  }
  if(p2 >0){
    V2 <- V[(p1 + 1):(p1 + m), ,drop=FALSE]
    V2.total.dim <- V.total.dim[(p1 + 1):(p1+m), ,drop=FALSE]
    A2 <- sweep(V2,2,d,"*")
    A2 <- sweep(A2,1,M2,"*")
    A2.total.dim <- sweep(V2.total.dim,2,d.total.dim,"*")
    A2.total.dim <- sweep(A2.total.dim,1,M2,"*")
    contrib.cat.measure <- sweep(A2^2, 1, ns/n, "*")
    cos2.cat <- sweep(A2^2, 1, apply(A2.total.dim, 1, function(v) {return(sum(v^2))}), "/")
    contrib.cat <-matrix(NA,p2,ncomp)
    rownames(contrib.cat) <- colnames(cat)
    colnames(contrib.cat) <- paste("PC", 1:ncomp, sep = " ")
    for (j in 1:p2){
      contrib.cat[j,] <- apply(contrib.cat.measure[which(indexj == (j+p1))-p1, ,drop=FALSE], 2, sum)
    }
    contrib.cat.pct<-sweep(contrib.cat, 2, d^2, "/")
  }

  sqload <- rbind(contrib.x, contrib.cat) #correlation ratio
  cat.eta2 <- NULL
  if (p2 > 0) cat.eta2 <- sqload[(p1+1):(p1+p2),,drop=FALSE]

  #organization of the results
  result.x <- result.levels <- result.cat <- NULL
  if (p1!=0){
    result.x <- list(coord = A1, contrib= contrib.x, cos2 = cos2.x)
  }
  if (p2!=0) {
    result.levels <- list(coord = A2, contrib=contrib.cat, cos2 = cos2.cat)
    result.cat <- list(contrib = contrib.cat)
  }

  coef <- list()
  for (i in 1:ncomp){
    b <- V[,i]*M.col
    if (p1 > 0) b[1:p1] <-  b[1:p1]/rec$s[1:p1]
    b0 <- -sum(b*rec$g)
    coef[[i]] <- as.matrix(c(b0,b))
  }
  coef <- do.call(cbind, coef)[-1,]
  coefnames <- rownames(coef)
  coef <- gram.schmidt(coef)$Q
  A2rot <- NULL
  if (p2 >0) A2rot <- sweep(A2,1,sqrt(ns/n) ,"*")
  A <- rbind(A1,A2rot)
  colnames(A) <- colnames(coef) <- paste("PC",1:ncomp, sep = "")
  rownames(A) <- rownames(coef) <- coefnames
  Z <- rec$Z
  values <- apply(scores, 2, var)
  out <- list(call = cl,
              values = values,
              loadings = A,
              vectors = coef,
              scores = scores,
              x.loadings = A1,
              c.loadings = A2,
              ncomp = ncomp,
              cum.exp = cumsum(values)/sum(values),
              orthdist = .orthdist(W, coef),
              scoredist = .scoredist(scores, values)
  )

  return(structure(out, class = "PrincipalComp", rotation = "none", model = "Mixed PCA"))
}





#' Kernel Principal Components Analysis
#'
#' @param x a data frame or matrix of numeric variables. can be NULL
#' if a kernel matrix is supplied directly to argument 'kern'.
#' @param ncomp specify the maximum number of components to be returned.
#' if there are fewer components with non-zero eigenvalues than the number
#' specified, then all components with non-zero eigenvalues will be returned.
#' at least two components will be returned regardless of what is specified here.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param kern either a kernlab compatible kernel function, i.e., 'laplacedot(1)',
#' or a function that generates square, symmetric, kernel matrix. a kernel matrix
#' can also be directly supplied if one has been created. centering is done internally so
#' it is not necessary to pre-center the kernel matrix. The default is the
#' radial basis function kernel with the scale parameter automatically determined.
#' Unless the default gives poor results, the rbf kernel with the automatic scale
#' parameter is safe to leave as-is.
#' @param ... additional arguments to supply if the argument 'kern' is given a
#' function that generates a matrix (ie, not a kernlab function). anything supplied
#' here will be passed to the function, so the argument must match one of the parameters
#' in the kernel.
#' @return a PrincipalComp object
#' @export
#'
pcaKern <- function(x = NULL, ncomp = NULL, scale = T, kern = RbfKernel, ...){

  # rbfdot(1/mean(L1median(as.matrix(dist(x)))$center))
  if (scale) x <- scale(x)
  if (is.function(kern)){
    if (!is.null(attr(kern,"package"))){
      if (attr(kern,"package")=="kernlab"){
        Kx <- kernlab::kernelMatrix(kern, x);
      }
    }
    else{
      Kx <- kern(x, ...)
    }
  }
  else if (is.matrix(kern) && isSymmetric(kern)){
    Kx <- kern;
  }
  else{
    return(cat(crayon::red("Please supply a kernlab compatible kernel function or a square, symmetric, kernel matrix")))
  }

  out <- cvreg:::kpcaCPP(Kx);
  out$values <- zapsmall(out$values, 8);
  if (sum(sign(out$vectors[,1]))<=0){out$vectors <- out$vectors * -1; out$scores <- out$scores * -1}
  if (is.null(ncomp)){
    ncomp <- max(2, ncol(x))
  }
  else{
    ncomp <- max(2, min(ncomp, sum(out$values > 0)))
  }
  pcs <- 1:ncomp
  out$values <- out$values[pcs];
  out$scores <- out$scores[,pcs];
  out$vectors <- out$vectors[,pcs];
  out$cum.exp <- NULL;
  out$scores <- out$scores[,pcs];
  out$loadings <- out$vectors %*% diag(out$values);
  if (!is.null(rownames(x))){rowNames<-rownames(x)}else{rowNames<-paste0("Row",1:nrow(x))}
  rownames(out$vectors) <- rownames(out$loadings) <- rowNames
  colnames(out$loadings) <- colnames(out$vectors) <- colnames(out$scores) <- paste0("KC",pcs)
  structure(out, class = "PrincipalComp", model = "Kernel PCA", rotation = "none")
}



#' Empirical Locality Preserving Projection
#'
#' @description This function implements a variant of the locality preserving projection method.
#' Ordinary locality preserving projections require user-defined parameters to construct a graph
#' for characterizing the local structure of the data. Here an empirical (data dependent) approach
#' is used to construct the graph. The only tuning parameter that requires input by the user is
#' the kernel precision parameter tau.
#'
#' @param x a data frame or matrix of numeric covariates
#' @param ncomp number of desired components
#' @param h kernel smoothing parameter. defaults to 1.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#'
#' @return a 'PrincipalComp' object.
#' @export
#'
#' @references
#' Yang, B. and Chen, S. (2010). Sample-dependent graph construction with application to dimensionality reduction. Neurocomputing, 74(1-3), pp. 301–314.
#'
empLPP <- function(x, ncomp=min(nrow(x)-1,ncol(x)), h = 1, scale = T){

  numcheck <- sapply(x, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'x' must not contain character or factor variables!")
    )))
  }

  if (is.null(ncomp)){
    ncomp = min(nrow(x)-1, ncol(x))
  }

  columnNames <- colnames(x); tau = 1/h
  x = as.matrix(x); n = nrow(x); p = ncol(x)
  center <- L1median(x)$center; x <- sweep(x,2,center)
  if (scale) x <- sweep(x,2,apply(x,2,function(i) sqrt(mean(i^2))),"/")
  D = cvreg:::minkowski_dist(x, 2)
  for (i in 1:n){sumvecD = sum(D[i,]); D[i,] = D[i,]/sumvecD}
  K = exp(-D/(2*(tau^2))); W = array(0,c(n,n))

  rowCenters = try(L1median(t(K))$center, silent = T)
  if (class(rowCenters)=="try.error")
    rowCenters = rowMeans(K)

  for (i in 1:n){
    for (j in 1:n){
      if (K[i,j] > (rowCenters[i])){
        W[i,j] = K[i,j]
      }
    }
  }

  Omega = 2*symm(W)
  Ds = diag(rowSums(W))+diag(colSums(W))
  L = crossprod(x,Ds-Omega)%*%x; R = crossprod(x,Ds)%*%x
  vectors = cvreg:::.adjLoadings(eigen(solve(R,L))$vectors)
  scores = x%*%vectors
  values = apply(scores, 2, var)
  loadings = vectors%*%diag(sqrt(values))
  ord = order(values, decreasing = TRUE)
  values = values[ord]; vectors = vectors[,ord];
  if (sum(sign(vectors[,1])) <= 0){vectors <- vectors * -1; loadings}


  loadings = loadings[,ord]; scores = scores[,ord]
  values = values[1:ncomp]; vectors = vectors[,1:ncomp]; loadings = loadings[,1:ncomp]; scores = scores[,1:ncomp]
  colnames(vectors) <- colnames(loadings) <- paste0("DIM", 1:ncomp); rownames(vectors) <- rownames(loadings) <- columnNames
  out <- list(values=values,vectors=vectors,loadings=loadings,scores=scores)
  return(structure(out, class = "PrincipalComp", model = "Sample Dependent LPP", rotation = "none"))
}




#' Factor analysis via Expectation Maximization
#'
#' @description This function fits an exploratory factor analysis model using expectation maximization using the
#' method of Bai & Li (2012) to estimate the maximum likelihood solution.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to extract.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-10.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#' @references
#' Bai, J. and Li, K. (2012). Statistical analysis of factor models of high dimension. The Annals of Statistics 40, 436-465.
#' @export
#'
facanal <- function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = GPArotation::Varimax, scale = T, max.iter = 4000, tol = 1e-10) {

  if (nfac==0){
    cat(crayon::red(("You need at least one factor... One factor will be returned.")))
  }

  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!")
    )))
  }

  Y <- as.matrix(Y)
  e.values <- eigen(cor(Y))$values
  if (is.null(nfac)){nfac <- sum(e.values > 0)}
  nfac <- min(nfac, .lederman(Y, e.values))
  p <- ncol(Y); n <- nrow(Y); if (scale) {Y <- scale(Y)} else {Y <- sweep(Y,2,colMeans(Y))}

  ## Use the singular value decomposition to create an initial fit.
  svd_init <- wsvd(Y, tol = 1e-6)
  if (sum(sign(svd_init$v[,1]))<=0){svd_init$v <- svd_init$v * -1; svd_init$u <- svd_init$u * -1}
  Gamma <- (svd_init$v %*% diag(svd_init$d, p, p) / sqrt(nrow(Y)))[, 1:nfac]
  scores <- as.matrix(sqrt(nrow(Y)) * svd_init$u[, 1:nfac])
  Sigma <- as.matrix(Y - tcrossprod(scores, Gamma)); Sigma <- apply(Sigma, 2, function(x) mean(x^2))
  Tau <- 1/Sigma; YVar <- apply(Y, 2, function(i) mean((i-mean(i))^2))
  GammaTilde <- sqrt(Tau) * Gamma
  M <- diag(nfac) + crossprod(GammaTilde , GammaTilde)
  eigM <- eigen(M, symmetric = TRUE)
  if (sum(sign(eigM$vectors[,1]))<=0){eigM$vectors <- eigM$vectors * -1}
  YSG <- Y %*% (Tau * Gamma)
  logdetY <- -sum(log(Tau)) + sum(log(eigM$values))
  B <- 1/sqrt(eigM$values) * tcrossprod(t(eigM$vectors), YSG)
  logtrY <- sum(Tau * YVar) - sum(B^2)/n
  logLik <- -logdetY - logtrY

  ## Initialize loop:
  is.converged <- FALSE; iter <- 0
  while (!is.converged && iter < max.iter) {
    ## Expectation step:
    varZ <- eigM$vectors %*% (1/eigM$values * t(eigM$vectors))
    EZ <- YSG %*% varZ
    EZZ <- n * varZ + crossprod(EZ)
    ## Maximization step:
    eigEZZ <- eigen(EZZ, symmetric = TRUE)
    if (sum(sign(eigEZZ$vectors[,1]))<=0){eigEZZ$vectors <- eigEZZ$vectors * -1}
    YEZ <- crossprod(Y, EZ)
    G <- sqrt(eigEZZ$values) * t(eigEZZ$vectors)
    Tau <- 1/(YVar - 2/n * rowSums(YEZ * Gamma) + 1/n * rowSums(tcrossprod(Gamma,G)^2))
    Gamma <- YEZ %*% eigEZZ$vectors %*% (1/eigEZZ$values * t(eigEZZ$vectors))
    GammaTilde <- sqrt(Tau) * Gamma
    M <- diag(nfac) + crossprod(GammaTilde, GammaTilde)
    eigM <- eigen(M, T)
    if (sum(sign(eigM$vectors[,1]))<=0){eigM$vectors <- eigM$vectors * -1}
    YSG <- Y %*% (Tau * Gamma)
    previous.logLik <- logLik
    logdetY <- -sum(log(Tau)) + sum(log(eigM$values))
    B <- 1/sqrt(eigM$values) * (t(eigM$vectors) %*% t(YSG))
    logtrY <- sum(Tau * YVar) - sum(B^2)/n
    logLik <- -logdetY - logtrY
    iter <- iter + 1
    if (abs(logLik - previous.logLik) < tol * abs(logLik)) {
      is.converged <- TRUE
    }
  }
  Gamma <- as.matrix(Gamma)
  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  uniquenesses <- 1/Tau
  communalities <- rowSums(Gamma^2)
  if (!is.null(rotate) && nfac > 1){
    if (!is.function(rotate)) {
      stop("please supply a function from the GPArotation package to the argument rotate")
    } else {
      rotated <- rotate(Gamma)
      rotation <- rotated$method
      Gamma <- rotated$loadings
      values <- diag(t(Gamma) %*% Gamma)
      ord <- order(values, decreasing = TRUE)
      Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
      values <- values[ord]
      if (rotated$orthogonal){
        communalities <- rowSums(Gamma^2)
        uniquenesses <- 1 - communalities
        Tau <- 1/uniquenesses
        Phi <- NULL
      } else if (!rotated$orthogonal) {
        Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
      }
      svd.H <- wsvd(crossprod(Gamma, (Tau * Gamma)))
      scores <- Y %*% (Tau * Gamma) %*% (svd.H$u %*% (1/svd.H$d * t(svd.H$v)))
    }
  } else{
    rotation <- "none"; Gamma <- as.matrix(Gamma); if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
    values <- diag(t(Gamma) %*% Gamma); svd.H <- wsvd(crossprod(Gamma, (Tau * Gamma))); Phi <- NULL
    scores <- as.matrix(Y %*% (Tau * Gamma) %*% (svd.H$u %*% (1/svd.H$d * t(svd.H$v))))
  }
  if (nfac > 1)
    vectors = Gamma %*% diag(1/sqrt(values))
  else
    vectors = as.matrix(Gamma * sqrt(1/values))

  colnames(Gamma) <- paste0("FA", 1:ncol(Gamma))
  rownames(Gamma) <- colnames(Y)
  colnames(scores) <- colnames(Gamma)
  names(communalities) <- names(uniquenesses) <- colnames(Y)
  total.var <- sum(communalities + uniquenesses)
  cum.exp <- cumsum(colSums(Gamma^2))/total.var
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  orthdist <- .orthdist(Y, Gamma)
  scoredist <- .scoredist(scores, values)
  out <- list(loadings = Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity, cum.exp = cum.exp, e.values = e.values, uniquenesses = uniquenesses, communalities = communalities, complexity = complexity, Phi = Phi , scores = scores, factors = nfac, orthdist = orthdist, scoredist = scoredist, rotation = rotation, n.obs = nrow(Y), niter = iter, converged = is.converged)
  if (exists("structure") && exists("pattern")){
    colnames(structure) <- colnames(pattern) <- colnames(Gamma)
    rownames(structure) <- rownames(pattern) <- rownames(Gamma)
    out$structure <- structure; out$pattern <- pattern
  }
  out$stats <- .FAfitstats(Gamma, Phi, cor(Y), Y)
  out <- structure(out, class = "facanal", model = "Factor Analysis (EM-Algorithm)")
  return(out)
}

#' Robust Factor Analysis
#'
#' @description This is similar to the factor model fitting method in the \code{\link{facanal}} function, but the variant of the EM
#' algorithm by Rubin & Thayer (1982) is implemented instead of the Bai & Li (2012) EM algorithm uesd in \code{\link{facanal}}. This
#' is due to more easily accomodating a change in objective function; here the median absolute deviation of the errors is used as the
#' criteria to be minimized. \cr
#' \cr
#' The algorithm is also initialized with an eigendecomposition of a robust correlation/covariance matrix. The minimum regularized
#' covariance determinant (\code{\link{cov.mrcd}}) will be used in the case of n < p. Otherwise, if p <= 10 the smoothed-hard-rejection
#' estimator (\code{\link{cov.shr}}) will be used, and if p > 10 Rocke's S-estimator will be used. \cr
#' \cr
#' In the case of Likert scale type data, \emph{univariate outliers} in the sense of a data point far from its mean are rarely an issue save
#' for data entry errors. This is due to the fact that likert scales take on values from a small set of integers. However, if the data
#' being analyzed are continuous measurements without hard bounds on the possible values the robust estimator will assist in ensuring
#' results are not compromised. However, \strong{\emph{multivariate outliers}}, which are unusual combinations of values across the rows of a
#' data matrix, could still potentially obscure the latent factor structure. If the data are suspected to be 'dirty', or one wishes to
#' validate the results of a standard factor fitting method, the robust factor model can be useful.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to extract.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-12.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#' @references
#' Rubin, D. B., and Thayer, D. T. (1982), EM Algorithms for ML Factor Analysis, Psychometrika, 47 (1), 69-76. \cr \cr
#' Bai, J. and Li, K. (2012). Statistical analysis of factor models of high dimension. The Annals of Statistics 40, 436-465.
#' @export
#'
robfa <-function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = GPArotation::Varimax, scale = T, max.iter = 4000, tol = 1e-12) {

  if (nfac==0){
    cat(crayon::red(("You need at least one factor... One factor will be returned.")))
  }


  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!"))))
  }

  Y <- as.matrix(Y); m = ncol(Y); n = nrow(Y)

  if (n < m){cov <- cov.mrcd(Y); rhoY <- cov2cor(cov$cov)}
  else{
    if (m > 10){cov <- cov.rocke(Y)}else{cov <- cov.shr(Y)}
    rhoY <- cov2cor(cov$cov)
  }

  Ymu <- cov$center
  YVar <- diag(cov$cov)
  Y <- sweep(Y, 2, Ymu); if (scale){Y <- sweep(Y, 2, sqrt(YVar), "/"); YVar <- apply(Y,2,function(i) mean(i^2))}
  svdY = eigen(rhoY); svdY$v <- svdY$vectors
  if (is.null(nfac)){nfac <- p-1}; nfac <- min(nfac, .lederman(Y,svdY$values));
  if (sum(sign(svdY$v[,1]))<=0){svdY$v <- svdY$v * -1}
  values = sqrt(svdY$values[1:nfac])
  evectors = as.matrix(svdY$v[, 1:nfac, drop = FALSE])
  Gamma = evectors %*% diag(values,nrow=nfac,ncol=nfac); eps=1e-8
  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  Psi = pmax(eps,as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[, 1]))
  crit <- Inf; iter <- 0; is.converged <- F; obj <- Psi
  while (!is.converged && iter < max.iter) {
    beta = Gamma/(sqrt(Psi)%*%t(rep(1,nfac)))
    svdBeta = wsvd(beta)
    theta = as.matrix(svdBeta$u)*(rep(1,m)%*%t(svdBeta$d/sqrt(1+svdBeta$d^2)))
    phi = 1/sqrt(Psi)
    phitheta = theta*(phi%*%t(rep(1,ncol(theta))))
    iS = diag(phi^2)-tcrossprod(phitheta)
    thetabeta = crossprod(theta,beta)
    theta2beta = tcrossprod(theta,t(thetabeta))
    aux = beta-theta2beta
    iSB = (phi%*%t(rep(1,ncol(theta))))*aux
    xiSB = Y %*% iSB
    Yz = crossprod(Y, xiSB/(n - 1))
    Czz = crossprod(iSB, Yz) + diag(nfac) - crossprod(Gamma, iSB)
    Gammanew=Yz %*% pseudoinverse(Czz)
    Psinew=as.vector(YVar - (Gammanew^2 %*% rep(1, nfac))[,1])
    # check for convergence
    if(iter>1){
      obj <- c(obj, mad(Psi-Psinew)^2)
      crit <- obj[iter-1] - obj[iter]
      is.converged <- crit <= tol
    }
    iter = iter+ 1
    Gamma = Gammanew
    Psi = Psinew
  }

  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  if (!is.null(rotate) && nfac > 1){
    if (!is.function(rotate)) {
      stop("please supply a function from the GPArotation package to the argument rotate")
    } else {
      rotated <- rotate(Gamma)
      rotation <- rotated$method
      GammaOld <- Gamma
      Gamma <- rotated$loadings
      values <- diag(crossprod(Gamma))
      ord <- order(values, decreasing = TRUE)
      Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
      values <- values[ord]
      if (rotated$orthogonal){
        Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1]); Phi <- NULL
      } else if (!rotated$orthogonal){
        Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
      }
      scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(mpd(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/")))))
    }
  } else{
    values <- diag(crossprod(Gamma))
    rotation <- "none"; Phi <- NULL
    scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(mpd(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/")))))
  }
  if (nfac > 1)
    vectors = Gamma %*% diag(1/sqrt(values))
  else
    vectors = Gamma * sqrt(1/values)

  colnames(Gamma) <- paste0("FA", 1:ncol(Gamma))
  rownames(Gamma) <- colnames(Y)
  colnames(scores) <- colnames(Gamma)
  uniquenesses <- as.vector(Psi)
  communalities <- 1-uniquenesses
  names(communalities) <- names(uniquenesses) <- colnames(Y)
  total.var <- sum(communalities + uniquenesses)
  cum.exp <- cumsum(colSums(Gamma^2))/total.var
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  orthdist <- .orthdist(Y, Gamma)
  scoredist <- .scoredist(scores, values)
  out = list(loadings = Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity, cum.exp = cum.exp, e.values = eigen(cov(Y))$values,
             uniquenesses = uniquenesses, communalities = communalities, complexity = complexity, scores = scores, Phi = Phi, orthdist = orthdist, scoredist = scoredist, factors = nfac, rotation = rotation,
             n.obs = nrow(Y), niter = iter, converged = is.converged)
  if (exists("structure") && exists("pattern")){
    colnames(structure) <- colnames(pattern) <- colnames(Gamma)
    rownames(structure) <- rownames(pattern) <- rownames(Gamma)
    out$structure <- structure; out$pattern <- pattern
  }
  out$stats <- .FAfitstats(Gamma, Phi, rhoY, Y)
  return(structure(out, class = "facanal", model = "Robust Factor Analysis (EM-Algorithm)"))
}


#' Alpha Factor Analysis
#'
#' @description Alpha factor analysis (AFA) is a method proposed by Kaiser and Caffrey (1965)
#' which is motivated by considering as a source of error that only a portion of possibly
#' relevant variables have been collected and submitted to the analysis. As such the
#' objective function is minimizing the L1 norm of communalities, rather than the L2
#' norm of uniquenesses. Furthermore, the eigenvalues of the latent variables are taken
#' as measures of the generalizability of the latent variables. \cr
#' \cr
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to attempt to extract.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param corr one of "pearson", "robust", or "spearman".
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param screen Kaiser and Caffrey suggested that only latent variables with eigenvalues greater
#' than 1 are retained. If TRUE, if the initial fit with the user-chosen number of factors fails
#' to satisfy the criterion the model is iteratively refit with a smaller number of factors until
#' all factor eigenvalues are greater than 1. However, such criteria have fallen out of favor
#' thus the default is FALSE.
#' @param tol a tolerance value for convergence. defaults to 1e-9.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#' @references
#' Kaiser, H. F., & Caffrey, J. (1965). Alpha factor analysis. Psychometrika, 30(1), 1–14. doi:10.1007/bf02289743 \cr \cr
#' Kaiser, H. F., & Derflinger, G. (1990). Some Contrasts Between Maximum Likelihood Factor Analysis and Alpha Factor Analysis. Applied Psychological Measurement, 14(1), 29–32. doi:10.1177/014662169001400103
#' @export
#'
alfa <- function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = Varimax, scale = T, screen = F, corr = c("pearson","robust","spearman"), max.iter = 4000, tol = 1e-24){

  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!"))))
  }
  corr <- match.arg(corr)
  columnNames <- colnames(Y)
  Y <- as.matrix(Y); n <- nrow(Y); p <- ncol(Y)

  if (corr=="robust"){
    if (n < p){
      cov <- cov.mrcd(Y); ycor <- rhomat <- cov2cor(cov$cov)
    }
    else{
      if (p > 10){cov <- cov.rocke(Y)} else{cov <- cov.shr(Y)}
      ycor <- rhomat <- cov2cor(mpd(cov$cov))
    }
    YVar <- diag(cov$cov); Ymu <- cov$center; Y <- sweep(Y, 2, Ymu)
    if (scale){Y <- sweep(Y, 2, sqrt(Yvar), "/"); YVar <- apply(Y,2,var)}
  }
  else if (corr=="spearman"){
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(apply(Y,2,rank), target = "adaptive"))
    }
    else{
      ycor <- rhomat <- mpd(cor(Y,method="s"))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
    if (scale){Y <- scale(Y); YVar <- rep(1, p)}else{Y <- sweep(Y, 2, Ymu)}
  }
  else {
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(Y, target = "adaptive"))
    }
    else{
      ycor <- rhomat <- cov2cor(mpd(cov(Y)))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
    if (scale){Y <- scale(Y); YVar <- rep(1, p)}else{Y <- sweep(Y, 2, Ymu)}
  }

  e.values <- eigen(rhomat, T, T)$values
  nfac <- min(nfac, .lederman(Y))

  if (n < p)  nfac <- max(sum(3, nfac - 2))
  .alphafit <- function(Y, rhomat, nfac, rotate, e.values, tol, max.iter, columnNames){
    rho <- rhomat
    H2 <- diag(rhomat)
    iter <- 0; err <- Inf
    while (err > tol && iter < max.iter) {
      rhomat <- cov2cor(rhomat)
      eig <- eigen(rhomat, T, F)
      if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
      if (nfac > 1){
        GammaTilde <- as.matrix(as.matrix(eig$vectors[,1:nfac]) %*% diag(sqrt(eig$values[1:nfac])))
      }
      else {
        GammaTilde <- as.matrix(eig$vectors[,1] * sqrt(eig$values[1]))
      }
      M <- tcrossprod(GammaTilde)
      newH2 <- H2*diag(M)
      err <- sum(abs(H2-newH2))
      rhomat <- rho
      diag(rhomat) <- newH2
      H2 <- newH2
      iter <- iter + 1
    }
    is.converged <- err <= tol
    if (sum(sign(GammaTilde[,1]))<=0){GammaTilde <- GammaTilde * -1}
    Gamma <- sqrt(H2) * GammaTilde
    if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
    eig <- sqrt(H2) * eig$values
    communalities <- rowSums(Gamma^2)
    uniquenesses <- Psi <- as.vector(YVar-(Gamma^2 %*% rep(1, nfac))[,1])
    values <- diag(crossprod(Gamma))
    if (nfac > 1){
      vectors <- Gamma %*% diag(1/sqrt(values))
    }
    else{
      vectors <- as.matrix(Gamma * sqrt(1/values[1]))
    }

    if (!is.null(rotate) && nfac > 1){
      if (!is.function(rotate)) {
        stop("please supply a function from the GPArotation package to the argument rotate")
      } else {
        rotated <- rotate(Gamma)
        rotation <- rotated$method
        GammaOld <- Gamma
        Gamma <- rotated$loadings
        values <- diag(crossprod(Gamma))
        ord <- order(values, decreasing = TRUE)
        Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
        values <- values[ord]
        if (rotated$orthogonal){
          communalities <- rowSums(Gamma^2); Phi <- NULL
          uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
        } else if (!rotated$orthogonal){
          Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
        }
        Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
        scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
      }
    }
    else{
      rotation <- "none"; Phi <- NULL
      scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/"), solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
    }
    complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
    fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
    colnames(Gamma) <- colnames(scores) <- paste0("FA", 1:ncol(Gamma))
    rownames(Gamma) <- names(uniquenesses) <- names(communalities) <- columnNames
    out <- list(loadings=Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity,
                uniquenesses = uniquenesses, communalities = communalities, complexity = complexity,
                Phi = Phi, scores = scores, factors = nfac, rotation = rotation, n.obs = nrow(Y), niter = iter,
                converged = is.converged)
    if (exists("structure") && exists("pattern")){
      colnames(structure) <- colnames(pattern) <- colnames(Gamma)
      rownames(structure) <- rownames(pattern) <- rownames(Gamma)
      out$structure <- structure; out$pattern <- pattern
    }
    return(out)
  }

  out <- suppressWarnings(.alphafit(Y, ycor, nfac, rotate, e.values, tol, max.iter, columnNames))
  if (screen && any(out$values < 1)){
    all1ormore <- FALSE
    while(!all1ormore && nfac != 1){
      nfac <- max(1, sum(out$values >= 1))
      out <- suppressWarnings(.alphafit(Y, ycor, nfac, rotate, e.values, tol, max.iter, columnNames))
      all1ormore <- all(out$values >= 1)
    }
  }

  out$orthdist <- .orthdist(Y, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$total.var <- sum(out$communalities + out$uniquenesses)
  out$cum.exp <- cumsum(colSums(out$loadings^2))/out$total.var
  out$stats <- .FAfitstats(out$loadings, out$Phi, rhomat, Y)
  return(structure(out, class = "facanal", model = "Alpha Factor Analysis"))
}



#' Principal Factors Analysis (Principal Axis Factoring)
#'
#' @description This implements the original factor analysis method, principal axis
#' factoring. The only augmentation besides rotation of the loadings is shrinkage can
#' be used to acheive a more numerically stable fit, particuarly under high dimensional
#' settings. When n < p it is switched on automatically.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to attempt to extract.
#' @param shrink if TRUE, the adaptive non-linear shrinkage method from the \code{\link{covShrink}}
#' function is used. It is used regardless of the setting here if n < p.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param corr one of "pearson", "robust", or "spearman".
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-12.
#' @param max.iter maximum number of iterations. defaults to 4000.
#'
#'
#' @return a factanal object
#' @export
#'
pfa <- function(Y, nfac = min(nrow(Y)-3, ncol(Y)-2), rotate = Varimax, scale = T, corr = c("pearson","robust","spearman"), max.iter = 4000, tol = 1e-12){

  numcheck <- sapply(Y, is.numeric)
  if (any(isFALSE(numcheck))) {
    return(cat(crayon::red(("'Y' must not contain character or factor variables!"))))
  }
  corr <- match.arg(corr)
  columnNames <- colnames(Y)
  Y <- as.matrix(Y)
  n <- nrow(Y); p <- ncol(Y)

  if (corr=="robust"){
    if (n < p){
      cov <- cov.mrcd(Y); ycor <- rhomat <- cov2cor(cov$cov)
    }
    else{
      if (p > 10){cov <- cov.rocke(Y)} else{cov <- cov.shr(Y)}
      ycor <- rhomat <- cov2cor(mpd(cov$cov))
    }
    Ymu <- cov$center; YVar <- diag(cov$cov)
  }
  else if (corr=="spearman"){
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(apply(Y,2,rank), target = "adaptive"))
    }
    else{
      ycor <- rhomat <- mpd(cor(Y,method="s"))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
  }
  else if (corr=="pearson"){
    if (n < p){
      ycor <- rhomat <- cov2cor(covShrink(Y, target = "adaptive"))
    }
    else{
      ycor <- rhomat <- cov2cor(mpd(cov(Y)))
    }
    Ymu <- colMeans(Y); YVar <- apply(Y, 2, var)
  }
  Y <- sweep(Y, 2, Ymu)
  if (scale){Y <- sweep(Y, 2, sqrt(YVar), "/"); Yvar <- apply(Y, 2, var)}
  e.values <- eigen(rhomat, T, T)$values
  nfac <- min(nfac, .lederman(Y))
  rsse <- Inf; iter <- 0; h.old <- 1e100
  while (rsse > tol && iter <= max.iter) {
    eig <- eigen(rhomat, T)
    if (sum(sign(eig$vectors[,1]))<=0){eig$vectors <- eig$vectors * -1}
    if (nfac > 1) {
      Gamma <- eig$vectors[,1:nfac] %*% diag(sqrt(eig$values[1:nfac]))
    }
    else {
      Gamma <- as.matrix(eig$vectors[,1] * sqrt(eig$values[1]))
    }
    M <- tcrossprod(Gamma)
    h.new <- diag(M)
    diag(rhomat) <- h.new
    rsse <- sqrt(sum((h.old - h.new)^2))
    h.old <- h.new
    iter <- iter + 1
  }
  values <- eig$values[1:nfac]
  is.converged <- rsse <= tol
  if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
  communalities <- rowSums(Gamma^2)
  uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
  if (nfac > 1){
    vectors <- Gamma %*% diag(1/sqrt(values))
  }
  else{
    vectors <- as.matrix(Gamma * 1/sqrt(values))
  }

  if (!is.null(rotate) && nfac > 1){
    if (!is.function(rotate)) {
      stop("please supply a function from the GPArotation package to the argument rotate")
    } else {
      rotated <- rotate(Gamma)
      rotation <- rotated$method
      GammaOld <- Gamma
      Gamma <- rotated$loadings
      values <- diag(crossprod(Gamma))
      ord <- order(values, decreasing = TRUE)
      Gamma <- Gamma[,ord]; if (sum(sign(Gamma[,1]))<=0){Gamma <- Gamma * -1}
      values <- values[ord]
      if (rotated$orthogonal){
        communalities <- rowSums(Gamma^2); Phi <- NULL
        uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
      } else if (!rotated$orthogonal){
        Phi <- rotated$Phi[ord,ord]; structure <- Gamma%*%Phi; pattern <- Gamma
        communalities <- rowSums(Gamma^2); uniquenesses <- Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
      }
      Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
      scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
    }
  }
  else{
    rotation <- "none"; Phi <- NULL; Psi <- as.vector(YVar - (Gamma^2 %*% rep(1, nfac))[,1])
    scores = Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , solve(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/"))))
  }
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  colnames(Gamma) <- colnames(scores) <- paste0("FA", 1:ncol(Gamma))
  rownames(Gamma) <- names(uniquenesses) <- names(communalities) <- columnNames
  out <- list(loadings=Gamma, vectors = vectors, values = values, fac.complexity = fac.complexity, uniquenesses = uniquenesses, communalities = communalities,
              complexity = complexity, Phi = Phi, scores = scores, factors = nfac, rotation = rotation, n.obs = nrow(Y), niter = iter,
              converged = is.converged)

  out$orthdist <- .orthdist(Y, out$loadings)
  out$scoredist <- .scoredist(out$scores, out$values)
  out$total.var <- sum(out$communalities + out$uniquenesses)
  out$cum.exp <- cumsum(colSums(out$loadings^2))/out$total.var
  if (exists("structure") && exists("pattern")){
    colnames(structure) <- colnames(pattern) <- colnames(Gamma)
    rownames(structure) <- rownames(pattern) <- rownames(Gamma)
    out$structure <- structure; out$pattern <- pattern
  }
  out$stats <- .FAfitstats(Gamma, Phi, ycor, Y)
  return(structure(out, class = "facanal", model = "Principal Axis Factoring"))
}

#' Independent Factors Analysis
#'
#' @description Independent Factors Analysis (IFA) is an extension and synthesis of the factor analysis
#' and independent components analysis methods. Note that rotation is not implemented in this model, as
#' the IFA solution is not invariant under orthogonal rotations. Any orthogonal rotation of independent
#' variables will result in correlated ones, breaking the independence of the latent variables extracted
#' by an independent components or independent factors analysis. Uncorrelatedness is a neccessary, but not
#' sufficient condition for independence. Independence is a stronger condition: a set of random variables
#' has the property of independence when the joint probability density function of the set is the product
#' of each individual variable's probability density function, i.e. \emph{\eqn{f(x_1, x_2, ..., x_p) =
#' \sum f(x_p)}}. However, the property of being uncorrelated only means that the linear relationship between
#' a set of variables is null, which obviously leaves open other types of relationships. Likewise orthogonality
#' only implies linear independence. While independent components/factors are orthogonal to one another,
#' the basis vectors are \emph{not} neccessarily orthogonal. This is unlike PCA, where the basis vectors
#' are orthogonal to each other. Therefore, orthogonal rotation of the basis vectors of ICA/IFA will not
#' be guaranteed to preserve orthogonality of the components, as the orthogonal rotation only guarantees
#' \emph{linear} independence.
#'
#' @param Y a numeric matrix or data frame of only numeric variables.
#' @param nfac the number of factors to extract.
#' @param ic an integer vector of length \emph{nfac} detailing how many independent components comprise each factor.
#' for example, c(4,3,3,1). the default is NULL, which yields 2 independent components per factor.
#' @param rotate a rotation function from the GPArotation package. Defaults to Varimax.
#' @param scale should the variables be scaled prior to analysis? Defaults to TRUE.
#' @param tol a tolerance value for convergence. defaults to 1e-4.
#' @param max.iter maximum number of iterations. defaults to 1000.
#'
#' @return an object of class "facanal"
#' @export
#' @references
#' Attias, H. (1999). Independent factor analysis, Neural Comput. 11(4), 803–851. \cr \cr
#' Montanari, A. & Viroli, C. (2010). The independent factor analysis approach to latent variable modelling, Statistics, 44:4, 397-416, doi:10.1080/02331880903189125

ifa <-function(Y, nfac = max(nrow(Y)-2, ncol(Y)), ic = NULL, scale=TRUE, max.iter = 1000, tol=1e-4){

  columnNames <- colnames(Y)
  if (is.null(nfac)){nfac <- .lederman(Y)}

  if (is.null(ic)){
    ic <- c(2, rep(1, nfac-1))
  }
  else if (length(ic) < nfac){
    ic <- c(ic, rep(1, nfac-length(ic)))[1:nfac]
  }
  else if (length(ic) > nfac){
    ic <- ic[1:nfac]
  }
  else {
    ic <- ic
  }
  n<-nrow(Y); p<-ncol(Y); ymu<- colMeans(Y)
  if (scale) Y<-scale(Y) else Y <- sweep(Y,2,ymu)
  ic<-as.matrix(ic)
  lik <- -100000
  totalICs<-prod(ic)
  maxIC<-max(ic)
  nfac <- min(nfac, .lederman(Y));
  output.init<-.ifa_init(Y,nfac,scale)
  Psi<-output.init$Psi
  Psi<-diag(diag(Psi))
  Gamma<-output.init$Gamma
  w<-matrix(0,nfac,maxIC)
  ic.means<-matrix(0,nfac,maxIC)
  ic.var<-matrix(0,nfac,maxIC)

  for (i in 1:nfac){
    for (j in 1:ic[i]){
      w[i,j] <- 1
      ic.means[i,j] <- if(j==1){0}else if(j==2){-1}else{ (0.25 + abs(ic.means[i,j-1])) * -1 * sign(ic.means[i,j-1])}
      ic.var[i,j]  <- 1
    }
  }
  w<-w/rowSums(w)


  out<-.ifa_fit(Y,n,nfac,p,ic,totalICs,maxIC,max.iter,Gamma,w,ic.means,ic.var,tol,Psi,lik)

  likelihood<-out$likelihood
  sigma<-out$sigma
  pqy<-out$pqy
  Gamma<-out$Gamma
  values <- colSums(Gamma^2)
  vectors <- Gamma %*% diag(1/sqrt(values))
  w<-out$w
  ic.means<-out$ic.means
  ic.var<-out$ic.var
  Psi<-out$Psi
  uniquenesses <- Psi<-diag(Psi)
  communalities <- 1 - uniquenesses
  complexity <- (rowSums(Gamma^2)^2)/rowSums(Gamma^4)
  fac.complexity <- (colSums(Gamma^2)^2)/colSums(Gamma^4)
  likelihood<-matrix(likelihood[!likelihood==0])
  scores <- Y %*% crossprod(sweep(t(Gamma),2,Psi,"/") , pseudoinverse(tcrossprod(sweep(t(Gamma),2,sqrt(Psi),"/")))); scores <- scores %*% matpower(cov(scores), -0.5)
  orthdist <- .orthdist(Y, Gamma)
  scoredist <- .scoredist(scores, values)
  total.var <- sum(communalities + uniquenesses)
  cum.exp <- cumsum(colSums(Gamma^2))/total.var
  stats <- .FAfitstats(Gamma, NULL, cor(Y), Y)
  rotation <- "none"; Phi <- NULL
  colnames(Gamma) <- colnames(vectors) <- colnames(scores) <- paste0("FA", 1:ncol(Gamma))
  rownames(vectors) <- rownames(Gamma) <- names(uniquenesses) <- names(communalities) <- columnNames

  ic.info <- list(totalICs = totalICs, ic = ic, sigma = sigma, pqy = pqy, w = w, ic.means=ic.means,ic.var=ic.var)

  output<-list(loadings=Gamma,vectors=vectors,values=values, scores=scores, orthdist = orthdist, scoredist = scoredist,
               uniquenesses=uniquenesses,communalities=communalities,complexity=complexity, fac.complexity = fac.complexity,
               rotation = rotation, Phi = Phi, factors=nfac, n.obs=n, ic.info = ic.info, stats = stats)

  return(structure(output, class = "facanal", model = "Independent Factor Analysis"))
}


#' print method for PrincipalComp objects
#'
#' @param fit model fit
#' @param tol absolute loadings smaller than this value will be ommitted from the display. defaults to 1e-4.
#' @export
#' @method print PrincipalComp
#' @keywords internal
print.PrincipalComp <- function(fit, tol = 1e-4){

  loadings <- zapsmall(fit$loadings, 5)
  rownames(loadings) <- abbreviate(rownames(loadings), minlength = 7, method = "both.sides", named = F)
  loadings[which(abs(loadings) < tol)] <- 0
  loadings <- round(loadings, 3)
  model <- attr(fit, "model")

  if (model=="PCA" || model=="L1 PCA" || model=="R1 PCA" || model == "Robust PCA" || model == "Spherical PCA" || model == "Probablistic PCA" || model == "Mixed PCA"){
    cat(.coral("\nEigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.hotpink("Cumulative Variance Explained : \n\n"), round(fit$cum.exp, 4), "\n\n")
    cat(.rosered("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.coral2("Fitting Method: "), model, "\n")
  }
  else if (model=="Sparse PCA" || model == "Robust Sparse PCA"){
    cat(.newblack("\nEigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.bluez("Cumulative Variance Explained : \n\n"), round(fit$cum.exp, 3), "\n\n")
    cat(.bluez2("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.nurple("Fitting Method: "), model, "\n")
  }
  else if (model=="Local PCA"){
    cat(.lightgreen("\nEigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.nurple("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.seamist("Fitting Method: "), model, "\n")
  }
  else if (model=="Curvilinear PCA"){
    cat(.lightgreen("\nApprox. Eigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.bluez("Approx. Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.nurple("Fitting Method: "), model, "\n")
  }
  else if (model=="Kernel PCA"){
    cat(.nurple("\n Eigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.purple("Fitting Method: "), model, "\n")
  }
  else if (model=="Sample Dependent LPP"){
    cat(.nurple("\n Eigenvalues : \n\n"), round(fit$values, 3), "\n\n")
    cat(.purple("Loadings : \n\n"))
    Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
    cat("\n\n")
    cat(.pipple("Fitting Method: "), model, "\n")
  }
}


#' print method for facanal objects
#'
#' @param fit model fit
#' @param tol absolute loadings smaller than this value will be ommitted from the display. defaults to 0.075.
#' @export
#' @method print facanal
#' @keywords internal
print.facanal <- function(fit, tol = 0.075){
  model <- attr(fit, "model")
  values <- fit$values
  e.values <- fit$e.values
  communalities <- round(unname(fit$communalities), 3)
  uniquenesses <-  1 - communalities
  complexity <- round(fit$complexity, 2)
  fac.complexity <- fit$fac.complexity
  names(fac.complexity) <- names(values) <- paste0("FA", 1:length(values))
  names(complexity) <- names(communalities) <-  names(uniquenesses) <- abbreviate(rownames(fit$loadings),
                                    minlength = 7, method = "both.sides", named = F)
  cat(.coral("\nEigenvalues of Factor Solution: \n"))
  print(noquote(format(values, digits = 2, nsmall = 2)))
  cat(.coral2("\nFactor Complexities: \n"))
  print(noquote(format(fac.complexity, digits = 2, nsmall = 2)))
  cat(.hotpink("\nLoadings, Uniquenesses, & Communalities: \n"))
  loadings <- zapsmall(fit$loadings, 4)
  rownames(loadings) <- abbreviate(rownames(loadings), minlength = 7, method = "both.sides", named = F)
  loadings[which(abs(loadings) < tol)] <- 0
  loadings <- round(loadings, 3)
  newcolnames <- c(colnames(loadings), "", "c2", "u2", "k")
  loadings <- cbind(loadings, rep(0, length(complexity)),  communalities, uniquenesses, complexity)
  colnames(loadings) <- newcolnames
  Matrix::printSpMatrix(as(loadings, "sparseMatrix"), col.names = TRUE, zero.print = " ")
  cat("\n")
  if (any(communalities > 1)){
    cat(crayon::bold(.crimson("An ultra Heywood case was detected. Factor solution is invalid. \n")))
  }
  if (fit$stats$KMO <= 0.50){
    cat(crayon::bold(.red2("The Kaiser-Meyer-Olkin index indicates data lacks sufficient structure for legitimate factors to be recovered.  \n")))
  }
  cat(.nurple2("Fitting Method: "), model, .nurple(" Rotation Method: "), fit$rotation, "\n")
  cat(.pipple2("Bentler-Bonett Normed Fit Index: "), fit$stats$NFI, .pipple2("KMO Sampling Adequacy Index: "), round(fit$stats$KMO,3), "\n")
  cat(.pipple(paste0("Chi-Squared (", fit$stats$dof, " df) : ")), fit$stats$chisq, .pipple(" BIC:"), fit$stats$BIC, .pipple(" Evidence Ratio:"), Rmpfr::formatDec(Rmpfr::mpfr(1/Rmpfr::mpfr(exp(-0.5*(fit$stats$null.BIC-fit$stats$BIC)),500),500),precBits=15,digits=5), "\n")
}



#' Promax Factor Rotation (GPArotation format)
#'
#' @description This implements the promax rotation with output consistent with the naming
#' conventions of the GPArotation package. Promax rotation is a factor rotation method which
#' seeks a solution that is as orthogonal as possible, but allows obliqueness rather than
#' forcing an orthogonal solution.
#'
#' @param L A factor loading matrix
#' @param m The power used for promax. Values of 2 to 6 are recommended. Defaults to 4.
#'
#' @return A list (which includes elements used by facanal) with: \cr
#' \describe{
#' \item{loadings}{The new loadings obtained by L \%*\% Th.}
#' \item{Th}{The rotation matrix.}
#' \item{method}{A string indicating the rotation objective function.}
#' \item{orthogonal}{A logical indicating if the rotation is orthogonal. Is FALSE for Promax.}
#' \item{Phi}{t(Th) \%*\% Th. The covariance matrix of the rotated factors.}
#' }
#' @export
#' @references
#' Hendrickson, A. E. and White, P. O. (1964). Promax: a quick method for rotation to orthogonal oblique structure. British Journal of Statistical Psychology, 17, 65–70. doi: 10.1111/j.2044-8317.1964.tb00244.x.
#'
Promax <- function (L, m = 4){
  if (missing(m))
    m <- 4
  if (!is.matrix(L) & !is.data.frame(L)) {
    if (!is.null(L$loadings))
      L <- as.matrix(L$loadings)
  }
  else {
    L <- L
  }
  if (ncol(L) < 2)
    return(L)
  dn <- dimnames(L)
  xx <- GPArotation::Varimax(L, normalize = F, eps = 1e-4, maxit = 5000)
  L <- xx$loadings
  Q <- L * abs(L)^(m - 1)
  U <- lm.fit(L, Q)$coefficients
  d <- try(diag(pseudoinverse(crossprod(U))), silent = TRUE)
  if (class(d)=="try.error")
    d <- try(diag(cvreg:::.ridgeinv(crossprod(U))), silent = TRUE)
  U <- U %*% diag(sqrt(d))
  dimnames(U) <- NULL
  z <- L %*% U
  U <- xx$Th %*% U
  ui <- pseudoinverse(U)
  Phi <- tcrossprod(ui)
  dimnames(z) <- dn
  class(z) <- "loadings"
  result <- structure(list(loadings = z, Th = U, Phi = Phi, orthogonal = FALSE, method = "Promax"),class="GPArotation")
  return(result)
}



#' Kaiser Meyer Olkin Sampling Adequacy Index
#'
#' @param x a data frame or matrix of numeric variables
#' @param rho a custom correlation or covariance matrix if one is so desired
#' (ie one of the robust estimators). if left as NULL the pearson correlation
#' matrix is calculated.
#'
#' @return a list containing the measure of sampling adequacy for each variable, the KMO
#' index for overall adequacy, and the Lederman bound which defines the maximum number of
#' latent variables that can be extracted.
#' @export
#'
kmo <- function(x, rho=NULL){
  if (is.null(rho))
    cormat  <- cor(x)
  else
    cormat <- cov2cor(rho)
  pcormat <- cov2pcor(mpd(cormat))
  n <- nrow(x); p <- ncol(x)
  seq_p <- seq_len(p)
  MSA <- unlist(lapply(seq_p, function(i){sum((cormat[i,-i])^2) / ( sum((cormat[i,-i])^2) + sum((pcormat[i,-i])^2))}))
  names(MSA) <- colnames(x)
  KMO <- sum((cormat[!diag(p)])^2) / ( sum((cormat[!diag(p)])^2) + sum((pcormat[!diag(p)])^2) )
  eig <- eigen(cormat)$values
  res <- list("MSA" = MSA, "KMO" = KMO, "LedermanBound" = floor(.lederman(x,eig)))
  return(res)
}


#' Estimate the number of latent factors for exploratory factor analysis
#'
#' @description This implements the estimator of the number of latent variables for exploratory factor analysis
#' derived by Onatski (2010) from random matrix theory.  The number of factors bounded from above using Lederman's
#' boundary, which is the maximum number of factors possible to derive from a given data set.
#'
#' @param Y a matrix or data frame of numeric covariates
#' @param max.fac maximum number of factors (if larger than the Lederman boundary this will be ignored)
#' @param max.iter maximum number of iterations. defaults to 10.
#' @param scale should the data be scaled first? defaults to TRUE.
#'
#' @return a list containing a numeric value and a vector of the eigenvalue differences.
#' @export
#'
#' @references
#' Onatski, A. (2010). Determining the Number of Factors from Empirical Distribution of Eigenvalues. Review of Economics and Statistics, 92(4), 1004–1016. doi:10.1162/rest_a_00043
#'
nfac.est <- function(Y, max.fac = NULL, max.iter = 10, scale = TRUE) {

  Y <- as.matrix(Y)
  n <- nrow(Y); p <- ncol(Y)
  if (scale)
    Y <- scale(Y)
  else
    Y <- sweep(Y, 2, colMeans(Y))

  ev <- wsvd(Y)$d^2 / (n-1)
  lederman.bound <- .lederman(Y, ev)
  if (is.null(max.fac))
    max.fac <- lederman.bound
  else
    max.fac <- min(c(lederman.bound, max.fac))

  j <- max.fac + 1
  diffs <- ev - c(ev[-1], 0)

  for (i in 1:max.iter) {
    y <- ev[j:(j+4)]
    x <- ((j-1):(j+3))^(0.666)
    lm.coef <- lm(y ~ x)
    delta <- 2 * abs(lm.coef$coef[2])
    idx <- which(diffs[1:rmax] > delta)
    if (length(idx) == 0)
      hatr <- 0
    else hatr <- max(idx)

    newj = hatr + 1
    if (newj == j) break
    j = newj
  }
  return(list(est.facs = hatr, diffs = diffs))
}




.distgraph <- function(data, p = 0.25, Lp=1.4){
  D = cvreg:::minkowski_dist(as.matrix(data), Lp)
  ndata = nrow(D)
  nnpar = max(as.double(p*ndata,1))
  nnpar = as.integer(min((ceiling(nnpar)),ndata))
  outMask = array(0,c(ndata,ndata))
  for (i in 1:ndata){
    tgt = D[i,]
    idx = as.vector(which(tgt<=max((sort(tgt,decreasing=FALSE))[1:min((nnpar+1),ndata)]),arr.ind=T))
    outMask[i,idx] = 1
  }
  for (i in 1:ndata){
    outMask[i,i] = 0
  }
  outMask = matrix(as.logical(as.integer(outMask)),nrow=ndata,ncol=ndata)
  outMask = matrix(as.logical(outMask * t(outMask)),nrow=ndata,ncol=ndata)
  outD1 = outMask * D
  outD2 = (!outMask) * array(-Inf,c(ndata,ndata))
  outD2[which(is.na(outD2))] = 0
  outD = outD1 + outD2
  for (i in 1:ndata){
    outD[i,i] = 0
  }
  result = list()
  result$mask = outMask
  result$dist = outD
  return(result)
}

## For adjusting Laplacian eigenvalues
.adjEigLaplace <- function(eigL){
  if(min(eigL$values)<=0){
    minpos <- min(eigL$values[-which(eigL$values<=0)])
    new <- minpos * rev(seq(0.98, 0.99, len = sum(eigL$values<=0)))
    eigL$values[which(eigL$values<=0)]<-new;
    eigL$values <- eigL$values + 1e-16
    eigL$values<-sort(eigL$values,decreasing=T)
    return(eigL)
  }
  else{
    return(eigL)
  }
}


.adjLoadings <- function(P){
  n = nrow(P); p = ncol(P)
  output = array(0,c(n,p))
  for (i in 1:p){
    cvec = as.vector(P[,i])
    output[,i] = cvec/sqrt(sum(cvec*cvec))
  }
  return(output)
}


.biScalePCA <- function (u, delta = 0.5, c = 1.547645, maxit = 100, tol = 1e-04){
  s0 <- median(abs(u))
  if (s0 == 0){s0 <- mean(abs(u))/0.7978846}
  else{s0 <- s0/0.6744898}
  err <- tol + 1
  it <- 0
  while ((err > tol) && (it < maxit)) {
    it <- it + 1
    s1 <- sqrt(s0^2 * mean(rho.Bisquare(u/s0, c = c))/delta)
    err <- abs(s1 - s0)/s0
    s0 <- s1
  }
  return(s0)
}


.gsvd = function (X, row.w = NULL, col.w = NULL,ncp=Inf) {
  tryCatch.W.E <- function(expr){
    W <- NULL
    w.handler <- function(w){
      W <<- w
      invokeRestart("muffleWarning")
    }
    list(value = withCallingHandlers(tryCatch(expr, error = function(e) e),
                                     warning = w.handler),
         warning = W)
  }


  if (is.null(row.w)) row.w <- rep(1/nrow(X), nrow(X))
  if (is.null(col.w)) col.w <- rep(1, ncol(X))
  ncp <- min(ncp,nrow(X)-1,ncol(X))
  row.w <- row.w / sum(row.w)
  X <- t(t(X)*sqrt(col.w))*sqrt(row.w)
  if (ncol(X)<nrow(X)){
    svd.usl <- tryCatch.W.E(svd(X,nu=ncp,nv=ncp))$val
    if (names(svd.usl)[[1]]=="message"){
      svd.usl <- tryCatch.W.E(svd(t(X),nu=ncp,nv=ncp))$val
      if (names(svd.usl)[[1]]=="d"){
        aux <- svd.usl$u
        svd.usl$u <- svd.usl$v
        svd.usl$v <- aux
      } else{
        bb <- eigen(crossprod(X),symmetric=TRUE)
        svd.usl <- vector(mode = "list", length = 3)
        svd.usl$d[svd.usl$d<0]=0
        svd.usl$d <- sqrt(svd.usl$d)
        svd.usl$v <- bb$vec[,1:ncp]
        svd.usl$u <- t(t(crossprod(t(X),svd.usl$v))/svd.usl$d[1:ncp])
      }
    }
    U <- svd.usl$u
    V <- svd.usl$v
    if (ncp >1){
      mult <- sign(as.vector(crossprod(rep(1,nrow(V)),as.matrix(V))))
      mult[mult==0] <- 1
      U <- t(t(U)*mult)
      V <- t(t(V)*mult)
    }
    U <- U/sqrt(row.w)
    V <- V/sqrt(col.w)
  }
  else{
    svd.usl <- tryCatch.W.E(svd(t(X),nu=ncp,nv=ncp))$val
    if (names(svd.usl)[[1]]=="message"){
      svd.usl <- tryCatch.W.E(svd(X,nu=ncp,nv=ncp))$val
      if (names(svd.usl)[[1]]=="d"){
        aux <- svd.usl$u
        svd.usl$u <- svd.usl$v
        svd.usl$v <- aux
      } else{
        bb <- eigen(crossprod(t(X),t(X)),symmetric=TRUE)
        svd.usl <- vector(mode = "list", length = 3)
        svd.usl$d[svd.usl$d<0]=0
        svd.usl$d <- sqrt(svd.usl$d)
        svd.usl$v <- bb$vec[,1:ncp]
        svd.usl$u <- t(t(crossprod(X,svd.usl$v))/svd.usl$d[1:ncp])
      }
    }
    U <-  svd.usl$v
    V <- svd.usl$u
    mult <- sign(as.vector(crossprod(rep(1,nrow(V)),as.matrix(V))))
    mult[mult==0] <- 1
    V <- t(t(V)*mult)/sqrt(col.w)
    U <- t(t(U)*mult)/sqrt(row.w)
  }
  vs <- svd.usl$d[1:min(ncol(X),nrow(X)-1)]
  num <- which(vs[1:ncp]<1e-15)
  if (length(num)==1){
    U[,num] <- U[,num,drop=FALSE]*vs[num]
    V[,num] <- V[,num,drop=FALSE]*vs[num]
  }
  if (length(num)>1){
    U[,num] <- t(t(U[,num])*vs[num])
    V[,num] <- t(t(V[,num])*vs[num])
  }
  res <- list(vs = vs, U = U, V = V)
  return(res)
}

.recodeData <- function(x,cat,rename.level=FALSE){
  G <- NULL
  Gcod <- NULL
  nbcat <- NULL
  if (!is.null(x)){
    if (is.factor(x))
      stop("All variables in x must be numeric",call. = FALSE)
    if (is.numeric(x))
      x <- data.frame(x)
    for (v in 1:ncol(x)) {
      if (!is.numeric(x[, v]))
        stop("All variables in x must be numeric",call. = FALSE)
    }
    n1 <- nrow(x)
    p1 <- ncol(x)

    recodqt <- .recodecat(x)
    Z1 <- recodqt$Z
    g1 <- recodqt$g
    s1 <- recodqt$s
    Y1 <- recodqt$Xcod
  }
  if (!is.null(cat)){
    if (is.numeric(cat))
      stop("All variables in cat must be categorical",call.=FALSE)
    if (is.factor(cat))
      cat <- data.frame(cat)
    for (v in 1:ncol(cat)) {
      if (is.numeric(cat[, v]))
        stop("All variables in cat must be categorical",call.=FALSE) }
    vect.all.levels<-as.character(unlist(apply(cat,2,unique)))
    vect.all.levels.unique<-unique(vect.all.levels)
    test.name.categ<-length(vect.all.levels)==length(vect.all.levels.unique)
    if(test.name.categ==FALSE && rename.level==FALSE)
      stop("Some categorical variables have same names of categories,
             rename categories or use the option rename.level=TRUE to rename it automatically",call.=FALSE)
    for (v in 1:ncol(cat)) cat[,v] <- factor(as.character(cat[,v]))
    n2 <- nrow(cat)
    p2 <- ncol(cat)
    G <- .recodex(cat,rename.level)
    g2 <- apply(G,2,mean)
    ns <- apply(G,2,sum)
    s2 <- sqrt(ns/nrow(G))

    Gcod <- sweep(G,MARGIN=2,STATS=s2,FUN="/")
    moy<-apply(Gcod,2,mean)
    Z2 <- sweep(Gcod,MARGIN=2,STATS=moy,FUN="-")
    G.cent<-sweep(G,MARGIN=2,STATS=ns/nrow(G),FUN="-")
    nb.cat <- function(cat) {
      cat <- as.factor(cat)
      length(levels(cat))
    }
    nbcat <- apply(cat,2, nb.cat)
    indexj2<-NULL
    for (j in 1:ncol(cat)) {
      indexj2 <- c(indexj2,rep(j,nbcat[j]))}
  }
  if (!is.null(x)&& !is.null(cat))
    if (n1==n2) {
      n <- n1
      p <- p1+p2
      Z <- cbind(Z1,Z2)
      Y <- cbind(Y1,G)
      W <- cbind(Z1,G.cent)
      g <- c(g1,g2)
      s <- c(s1,s2)
      X <- cbind.data.frame(x,cat)
      indexj <- c(1:p1,indexj2+p1)
    } else
      stop("number of objects in x and cat must be the same",call.=FALSE)
  if (!is.null(x)&& is.null(cat)) {
    n <- n1
    p <- p1
    p2 <- 0
    Z <- Z1
    Y <- Y1
    W <- Z1
    g <- g1
    s <- s1
    X <- x
    indexj <- 1:p1
  }
  if (is.null(x)&& !is.null(cat)){
    n <- n2
    p <- p2
    p1 <- 0
    Z <- Z2
    Y <- G
    W <- G.cent
    g <- g2
    s <- s2
    X <- cat
    indexj <- indexj2
  }
  if (is.null(x)&& is.null(cat))
    stop("A data matrix must be given",call.=FALSE)

  if (is.null(colnames(X)))
    colnames(X) <- paste("V", 1:ncol(X), sep = "")
  for (j in 1:ncol(X)) if (colnames(X)[j] == "")
    colnames(X)[j] <- paste("V", j, sep = "")

  return(list(X=X,Y=Y,Z=Z,W=W,n=n,p=p,p1=p1,p2=p2,g=g,s=s,indexj=indexj,
              G=G,Gcod=Gcod,x=x,cat=cat,nbcat=nbcat))
}


.recodecat <- function(X){
    X <- as.matrix(X)
    missing.mean <-
      function(C1){
        moy <- mean(C1,na.rm=T)
        ind <- which(is.na(C1)==T)
        if(length(ind)>=1){C1[ind]<-moy
        }
        return(C1)
      }
    if (nrow(X)==1){
      Xcod <- X
      Z <- NULL
      mean.Xcod <- X
      sd.Xcod <- NULL
    }
    else{
      Xcod <- apply(X,2,missing.mean)
      red <- sqrt((nrow(X)-1)/nrow(X))
      sd.Xcod <- apply(Xcod,2,sd)*red
      mean.Xcod <- apply(Xcod,2,mean)
      Z<- scale(Xcod,scale=sd.Xcod)
      apply(Z,1,function(x) sum(is.na(x)))
      if (sum(is.na(Z))!= 0) stop("There are columns in 'x' where all the values are identical",call.=FALSE)
    }
    return(list(Z=Z,g=mean.Xcod,s=sd.Xcod,Xcod=Xcod))
  }

.recodex <-function(X,rename.level=FALSE){
  #X <- as.matrix(X)
  GNA <- .tabdisna(X,rename.level)
  G <- replace(GNA,is.na(GNA),0)
  n <- nrow(GNA)
  if (n > 1){
    ns <- apply(G,2,sum)
    nmiss <- apply((apply(GNA,2,is.na)),2,sum)
    if(sum((n-nmiss)==ns)!=0) stop("There are columns in 'cat' where all the categories are identical",call.=FALSE)
  }
  return(G)
}


.tabdisna <- function (tab,rename.level=FALSE) {
  tab <- as.data.frame(tab)
  .mdis <- function(i) {
    cat <- tab[, i]
    nom <- names(tab)[i]
    n <- length(cat)
    cat <- as.factor(cat)
    x <- matrix(0, n, length(levels(cat)))
    ind<-(1:n) + n * (unclass(cat) - 1)
    indNA<-which(is.na(ind))

    x[(1:n) + n * (unclass(cat) - 1)] <- 1
    x[indNA,]<-NA
    if (rename.level==TRUE){
      dimnames(x) <- list(row.names(tab), paste(nom, levels(cat), sep = "="))
    }    else{
      dimnames(x) <- list(row.names(tab), levels(cat))
    }
    return(x)
  }
  if (ncol(tab) == 1)
    res <- .mdis(1)
  else {
    res <- lapply(1:ncol(tab), .mdis)
    res <- as.matrix(data.frame(res, check.names = FALSE))
  }
  return(res)
}

.orthdist <- function(xc, loadings){
  orthDist<-xc-xc%*%loadings%*%t(loadings)
  return(sqrt(rowSums(orthDist*orthDist)))
}

.scoredist <- function(scores, values){
  scores <- as.matrix(scores)
  if (length(values) > 1)
    val <- try(sqrt(mahalanobis_dist(scores, colMeans(scores), diag(values))), silent =T)
  else if (length(values)==1)
    val <- try(sqrt(mahalanobis_dist(scores, mean(scores), values)), silent =T)
  if (class(val) != "try.error") return(val) else return(rep(NA, NROW(scores)))
}


.lederman <- function(x,evals=NULL){
  if (is.null(evals))
    p <- sum(svd(x)$d > 1e-4)
  else
    p <- sum(evals > 1e-4)

  floor( 0.5 * ((2*p) + (1 - sqrt((8*p)+1))))
}


.FAfitstats <- function(loadings, Phi = NULL, rho, Y) {

  if (is.null(rho))
    r <- cor(Y)
  else
    r <- rho
  results1 <- .fafs_aux(loadings, Phi, rho, Y)
  BIC <- results1$BIC
  SABIC <- results1$SABIC
  null.BIC <- results1$null.BIC
  null.SABIC <- results1$null.SABIC
  chisq <- results1$STATISTIC
  TLI <- results1$TLI
  NFI <- results1$NFI
  kmsstats <- kmo(Y, r)
  KMO <- kmsstats$KMO
  MSA <- kmsstats$MSA
  null.chisq <- results1$null.chisq
  n.obs <- nrow(Y)
  p <- ncol(Y)
  nfac <- ncol(loadings)
  if (is.null(Phi)) { M <- tcrossprod(loadings)} else {M <- loadings %*% tcrossprod(Phi, loadings)}
  M <- mpd(M)
  residual <- r - M
  r2 <- sum(r^2)
  rstar2 <- sum(residual * residual)
  result <- list(residual = residual, M = M)
  result$dof <- dof <- p * (p - 1)/2 - p * nfac + (nfac * (nfac - 1)/2)
  r2.off <- r2 - tr(r)
  diag(residual) <- 0
  rstar.off <- sum(residual^2)
  result$ENull <- r2.off * n.obs
  result$eChisq <- rstar.off * n.obs
  result$rms <- sqrt(rstar.off/(p * (p - 1)))
  result$nh <- n.obs
  if (result$dof > 0) {
    result$eBIC <- result$eChisq - result$dof * log(n.obs)
    result$eSABIC <- result$eChisq - result$dof * log((n.obs + 2)/24)
  }
  else {
    result$eBIC <- result$eSABIC <- NA
  }
  results <- list()
  results$model <- M; results$residuals <- result$residual; results$TLI <- TLI; results$NFI <- NFI;
  results$KMO <- KMO; results$MSA <- MSA; results$dof <- result$dof; results$chisq <- chisq;
  results$BIC <- BIC; results$SABIC <- SABIC; results$null.chisq <- results$null.BIC <- null.BIC;
  results$null.SABIC <- null.SABIC; results$echisq <- result$eChisq; results$eBIC <- result$eBIC;
  results$eSABIC <- result$eSABIC
  return(results)
}

.fafs_aux <- function(loadings, Phi = NULL, rho, Y) {
  conf.level <- 0.95
  if (is.null(rho))
    r <- mpd(cor(Y))
  else
    r <- mpd(rho)
  n.obs <- nrow(Y)
  p <- ncol(Y)
  nfac <- ncol(loadings)
  if (is.null(Phi)) { M <- tcrossprod(loadings)} else {M <- loadings %*% tcrossprod(Phi, loadings)}
  diag(M) <- diag(r)
  m.inv.r <- pseudoinverse(mpd(M)) %*% r
  result <- list()
  result$n.obs = n.obs
  result$dof <- p * (p - 1)/2 - p * nfac + (nfac * (nfac - 1)/2)
  result$objective <- sum(diag((m.inv.r))) - log(det(m.inv.r)) - p
  result$criteria <- c(objective = result$objective, NA, NA)
  result$STATISTIC <- chisq <- result$objective * ((n.obs - 1) - (2 * p + 5)/6 - (2 * nfac)/3)
  if (!is.nan(result$STATISTIC))
    if (result$STATISTIC < 0) {
      result$STATISTIC <- 0
    }
  if (result$dof > 0) {
    result$PVAL <- pchisq(result$STATISTIC, result$dof, lower.tail = FALSE)
  }
  else {
    result$PVAL <- NA
  }

  F0 <- sum(diag((r))) - log(det(r)) - p
  if (is.infinite(F0)) { F0 <- r2 }
  Fm <- result$objective
  Mm <- Fm/(p * (p - 1)/2 - p * nfac + (nfac * (nfac - 1)/2))
  M0 <- F0 * 2/(p * (p - 1))
  nm <- ((n.obs - 1) - (2 * p + 5)/6 - (2 * nfac)/3)
  result$null.model <- F0
  result$null.dof <- p * (p - 1)/2
  result$null.chisq <- F0 * ((n.obs - 1) - (2 * p + 5)/6)
  result$TLI <- (M0 - Mm)/(M0 - 1/nm)
  result$NFI <- (result$null.chisq-chisq)/result$null.chisq
  result$null.BIC <- result$null.chisq  - result$null.dof * log(n.obs)
  result$null.SABIC <- result$null.chisq  - result$null.dof * log((n.obs + 2)/24)
  result$BIC <- chisq - result$dof * log(n.obs)
  result$SABIC <- chisq - result$dof * log((n.obs + 2)/24)

  return(result)
}




.ifa_fit<-function(Y,n,nfac,p,ic,totalICs,maxIC,max.iter,Gamma,w,ic.means,ic.var,tol,Psi,lik){
  likelihood<-NULL
  iter<-0
  ratio<-1000

  while ((iter < max.iter) & (ratio > tol )) {
    iter<-iter+1
    wq<-matrix(1,1,totalICs)
    muq<-matrix(0,nfac,totalICs)
    vuq<-array(0,c(nfac,nfac,totalICs))
    pr<-totalICs
    cont<-1
    for (k in 1:nfac) {
      j<-1
      pr<-pr/ic[k]
      for (i in 1:totalICs){
        muq[k,i]<-ic.means[k,j]
        vuq[k,k,i]<-ic.var[k,j]
        wq[,i]<-wq[,i]*w[k,j]
        cont<-cont+1
        if (cont>pr) {
          j <- ifelse(j==ic[k], j, j+1)
          cont<-1}
      }
    }

    sigma<-array(1e-6,c(nfac,nfac,totalICs))
    psiInv<-pseudoinverse(Psi)
    roqy<-array(0,c(nfac,n,totalICs))

    sigma<-array(apply((array(crossprod(Gamma, psiInv %*% Gamma),c(nfac,nfac,totalICs))+array(apply(vuq,3,solve),c(nfac,nfac,totalICs))),3,solve),c(nfac,nfac,totalICs))


    for (i in 1:totalICs) {
      roqy[,,i]<-sigma[,,i]%*%((crossprod(Gamma, tcrossprod(psiInv,Y))) + matrix(pseudoinverse(vuq[,,i])%*%muq[,i],nfac,n))}

    exqy<-roqy
    exxqy<-array(diag(rep(1e-3, nfac)),c(nfac,nfac,n,totalICs))
    pyq<-matrix(0,n,totalICs)

    for (i in 1:n) {
      for (j in 1 :totalICs) {
        exxqy[,,i,j] <- exxqy[,,i,j] + sigma[,,j] + tcrossprod(roqy[,i,j], roqy[,i,j])
        pyq[i,j]<-dmvnorm(x=t(Y[i,]),t(Gamma%*%muq[,j]),(Gamma%*%vuq[,,j]%*%t(Gamma)+Psi), log  = F)
      }
    }

    pqy<-matrix(0,n,totalICs)
    den<- tcrossprod(wq, pyq)
    temp<-t(matrix(wq,totalICs,n))
    pqy<-pyq*temp/den[,]
    pqy<-ifelse(is.na(pqy),mean(pqy, na.rm=TRUE),pqy)
    pqiy<-array(0,c(n,nfac,maxIC))
    nummu<-array(0,c(n,nfac,maxIC))
    numvu<-array(0,c(n,nfac,maxIC))

    pr<-totalICs
    cont<-1
    for (k in 1:nfac) {
      j<-1
      pr<-pr/ic[k]
      for (i in 1:totalICs){
        nummu[,k,j]<-nummu[,k,j]+(pqy[,i]*exqy[k,,i])
        numvu[,k,j]<-numvu[,k,j]+(pqy[,i]*exxqy[k,k,,i])
        pqiy[,k,j]<-pqiy[,k,j]+pqy[,i]
        cont<-cont+1
        if (cont>pr){
          j <- ifelse(j==ic[k], j, j+1)
          cont<-1
        }
      }
    }

    exy<-matrix(1e-9, nfac ,n)
    exxy<-array(diag(rep(1e-4,nfac)), c(nfac,nfac,n))

    for (j in 1:totalICs){
      exy<-exy+(t(matrix(pqy[,j],n,nfac))*exqy[,,j])
      exxy<-exxy+(array(rep(pqy[,j],each=nfac*nfac),c(nfac,nfac,n))*exxqy[,,,j])
    }

    exy<-ifelse(is.na(exy),mean(exy, na.rm=TRUE),exy)
    exxy<-ifelse(is.na(exxy),mean(exxy, na.rm=TRUE),exxy)
    eexxy<-rowMeans(exxy, na.rm=TRUE, dims=2)
    eexxyInv<-pseudoinverse(eexxy)
    Gamma<-matrix(0,p,nfac)
    Psi<-matrix(0,p,p);
    for (i in 1:n){Gamma <- Gamma + (crossprod(t(Y[i,]), crossprod(exy[,i], eexxyInv)))}; Gamma<-Gamma/n
    for (i in 1:n) {Psi<- Psi+ (crossprod(t(Y[i,]), Y[i,])-(crossprod(t(Y[i,]), tcrossprod(exy[,i], Gamma))))}; Psi<-Psi/n

    Enummu<-matrix(0,nfac,maxIC)
    Enumvu<-matrix(0,nfac,maxIC)
    Eden<-matrix(0,nfac,maxIC)
    ic.means<-matrix(0,nfac,maxIC)
    ic.var<-matrix(0,nfac,maxIC)
    w<-matrix(0,nfac,maxIC)

    for (i in 1:nfac){
      for (j in 1:ic[i]){
        Enummu[i,j]<-sum(nummu[,i,j])
        Eden[i,j]<-sum(pqiy[,i,j])
        Enumvu[i,j]<-sum(numvu[,i,j])
      }
    }

    for (i in 1:nfac) {
      for (j in 1:ic[i]){
        ic.means[i,j]<-Enummu[i,j]/Eden[i,j]
        ic.var[i,j]<-Enumvu[i,j]/Eden[i,j]-(ic.means[i,j]*ic.means[i,j])
        w[i,j] <- mean(pqiy[,i,j])
      }
    }


    sigmascale<-matrix(0,nfac)

    for (i in 1:nfac){
      cont1<-0
      cont2<-0
      for (j in 1:ic[i]) {
        cont1<-cont1+ ( w[i,j]*(ic.means[i,j]*ic.means[i,j]+ic.var[i,j]) )
        cont2<-cont2+(w[i,j]*ic.means[i,j])    }

      sigmascale[i]<-cont1-(cont2*cont2)
    }


    for (i in 1:nfac) {
      for (j in 1:ic[i]) {
        ic.means[i,j]<-ic.means[i,j]/sqrt(sigmascale[i])
        ic.var[i,j]<-ic.var[i,j]/sigmascale[i]
      }
      for (j in 1:p)  Gamma[j,i]=Gamma[j,i]*sqrt(sigmascale[i])
    }

    temp<-sum(log(tcrossprod(pyq,wq)))/n
    likelihood<-c(likelihood,temp)
    ratio<-abs((temp-lik)/lik)
    if ((temp < lik) & (iter > 8)) ratio<-tol
    lik<-temp
  }
  out<-list(Gamma=Gamma,w=w,ic.means=ic.means,ic.var=ic.var,Psi=Psi,likelihood=likelihood,sigma=sigma,pqy=pqy)
  return(out)
}


.ifa_init <- function(x,nfac,scaling=TRUE){

  fit <- pca(x, nfac, scale = scaling)
  Gamma <- try(infomaxT(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
  if (class(Gamma)=="try.error"){
    Gamma <- try(entropy(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
      if (class(Gamma)=="try.error"){
        Gamma <- try(quartimax(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
          if (class(Gamma)=="try.error"){
            Gamma <- try(Varimax(fit$loadings, normalize = TRUE, eps = 1e-4, maxit = 1500)$loadings)
              if (class(Gamma)=="try.error"){
                Gamma <- fit$loadings
              }
          }
    }
    ord <- order(colSums(Gamma^2), decreasing = T)
  }
  else{
    ord <- order(colSums(Gamma^2), decreasing = T)
  }
  Gamma <- Gamma[,ord]
  if (scaling)
    Psi <- cor(x) - tcrossprod(Gamma)
  else
    Psi <- cov(x) - tcrossprod(Gamma)

  diag(Psi) <- pmax(diag(Psi), 0.001)
  output<-list(Psi=Psi, Gamma=Gamma)
  return(output)
}





.newblack   <- function(x){crayon::make_style(rgb(0.900, 0.278, 0.106))(x)}
.coral   <- function(x){crayon::make_style(rgb(0.994,0.165,0.219))(x)}
.nurple2  <- function(x){crayon::make_style(rgb(0.808, 0.104, 0.709))(x)}
.nurple  <- function(x){crayon::make_style(rgb(0.72, 0.10, 0.78))(x)}
.seamist <- function(x){crayon::make_style(rgb(0.163, 0.778, 0.683))(x)}
.hotpink <- function(x){crayon::make_style(rgb(1.00, 0.098, 0.700))(x)}
.lightgreen <- function(x){crayon::make_style(rgb(0.72, 0.81, 0.16))(x)}
.bluez <- function(x){crayon::make_style(rgb(0.063, 0.230, 1.00))(x)}
.bluez2 <- function(x){crayon::make_style(rgb(0.00, 0.278, 0.563))(x)}
.red2  <- function(x){crayon::make_style(rgb(1.00, 0.078, 0.0263))(x)}
.crimson <- function(x){crayon::make_style(rgb(0.823,0.048,0.04))(x)}
.rosered <- function(x){crayon::make_style(rgb(1.00,0.00,0.506))(x)}
.coral2 <- function(x){crayon::make_style(rgb(0.999,0.112,0.619))(x)}
.green2 <- function(x){crayon::make_style(rgb(0.031, 0.353, 0.220))(x)}
.pipple  <- function(x){crayon::make_style(rgb(0.408, 0.074, 0.979))(x)}
.pipple2  <- function(x){crayon::make_style(rgb(0.631, 0.094, 0.839))(x)}
.purple  <- function(x){crayon::make_style(rgb(0.42, 0.00, 0.42))(x)}
.purple2 <- function(x){crayon::make_style(rgb(0.46, 0.01, 0.63))(x)}
.orange <- function(x){crayon::make_style(rgb(1,0.6125,0))(x)}
abnormally-distributed/cvreg documentation built on May 3, 2020, 3:45 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
abnormally-distributed/cvreg
Cross Validation and Robust Estimation Utilities

R/PCAandFA.R
In abnormally-distributed/cvreg: Cross Validation and Robust Estimation Utilities

Documented in alfa empLPP facanal ifa kmo nfac.est pca pcaCurve pcaKern pcaL1 pcaLocal pcaMix pcaR1 pcaRobSparse pcaSparse pcaSphere pfa ppca print.facanal print.PrincipalComp robfa

R Package Documentation

Browse R Packages

We want your feedback!

abnormally-distributed/cvreg Cross Validation and Robust Estimation Utilities

R/PCAandFA.R In abnormally-distributed/cvreg: Cross Validation and Robust Estimation Utilities

Documented in alfa empLPP facanal ifa kmo nfac.est pca pcaCurve pcaKern pcaL1 pcaLocal pcaMix pcaR1 pcaRobSparse pcaSparse pcaSphere pfa ppca print.facanal print.PrincipalComp robfa

R Package Documentation

Browse R Packages

We want your feedback!

abnormally-distributed/cvreg
Cross Validation and Robust Estimation Utilities

R/PCAandFA.R
In abnormally-distributed/cvreg: Cross Validation and Robust Estimation Utilities