R/groupPCA.r

Defines functions predict.bgPCA groupPCA

Documented in groupPCA predict.bgPCA

#' Perform PCA based of the group means' covariance matrix
#' 
#' Calculate covariance matrix of the groupmeans and project all observations
#' into the eigenspace of this covariance matrix. This displays a low
#' dimensional between group structure of a high dimensional problem.
#' 
#' 
#' @param dataarray Either a k x m x n real array, where k is the number of
#' points, m is the number of dimensions, and n is the sample size. Or
#' alternatively a n x m Matrix where n is the numeber of observations and m
#' the number of variables (this can be PC scores for example)
#' @param groups a character/factor vector containgin grouping variable.
#' @param rounds integer: number of permutations if a permutation test of the
#' euclidean distance between group means is requested.If rounds = 0, no test
#' is performed.
#' @param tol threshold to ignore eigenvalues of the covariance matrix.
#' @param cv logical: requests leaving-one-out crossvalidation
#' @param mc.cores integer: how many cores of the Computer are allowed to be
#' used. Default is use autodetection by using detectCores() from the parallel
#' package. Parallel processing is disabled on Windows due to occasional
#' errors.
#' @param weighting logical:weight between groups covariance matrix according
#' to group sizes.
#' @return
#' \item{eigenvalues }{Non-zero eigenvalues of the groupmean covariance
#' matrix}
#' \item{groupPCs }{PC-axes - i.e. eigenvectors of the groupmean covariance
#' matrix}
#' \item{Variance }{table displaying the between-group variance explained by each between group PC - this only reflects the variability of the group means and NOT the variability of the data projected into that space}
#' \item{Scores }{Scores of all observation in the PC-space}
#' \item{probs }{p-values of pairwise groupdifferences - based on
#' permuation testing}
#' \item{groupdists }{Euclidean distances between groups' averages}
#' \item{groupmeans }{matrix with rows containing the Groupmeans, or a k x m x groupsize array if the input is a k x m x n landmark array}
#' \item{Grandmean }{vector containing the Grand mean, or a matrix if the input is a k x m x n landmark array}
#' \item{CV }{Cross-validated scores}
#' \item{groups }{grouping Variable}
#' \item{resPCs}{PCs orthogonal to the between-group PCs}
#' \item{resPCscores}{Scores of the residualPCs}
#' \item{resVar}{table displaying the residual variance explained by each residual PC. }
#' @author Stefan Schlager
#' @seealso \code{\link{CVA}}
#' @references Mitteroecker P, Bookstein F 2011. Linear Discrimination,
#' Ordination, and the Visualization of Selection Gradients in Modern
#' Morphometrics. Evolutionary Biology 38:100-114.
#' 
#' Boulesteix, A. L. 2005: A note on between-group PCA, International Journal
#' of Pure and Applied Mathematics 19, 359-366.
#' 
#' @examples
#' 
#' data(iris)
#' vari <- iris[,1:4]
#' facto <- iris[,5]
#' pca.1 <-groupPCA(vari,groups=facto,rounds=100,mc.cores=1)
#' 
#' ### plot scores
#' if (require(car)) {
#' scatterplotMatrix(pca.1$Scores,groups=facto, ellipse=TRUE,
#'         by.groups=TRUE,var.labels=c("PC1","PC2","PC3"))
#' }
#' ## example with shape data
#' data(boneData)
#' proc <- procSym(boneLM)
#' pop_sex <- name2factor(boneLM, which=3:4)
#' gpca <- groupPCA(proc$orpdata, groups=pop_sex, rounds=0, mc.cores=2)
#' \dontrun{
#' ## visualize shape associated with first between group PC
#' dims <- dim(proc$mshape)
#' ## calculate matrix containing landmarks of grandmean
#' grandmean <-gpca$Grandmean
#' ## calculate landmarks from first between-group PC
#' #                   (+2 and -2 standard deviations)
#' gpcavis2sd<- restoreShapes(c(-2,2)*sd(gpca$Scores[,1]), gpca$groupPCs[,1], grandmean)
#' deformGrid3d(gpcavis2sd[,,1], gpcavis2sd[,,2], ngrid = 0,size=0.01)
#' require(rgl)
#' ## visualize grandmean mesh
#' 
#' grandm.mesh <- tps3d(skull_0144_ch_fe.mesh, boneLM[,,1],grandmean,threads=1)
#' wire3d(grandm.mesh, col="white")
#' spheres3d(grandmean, radius=0.01)
#' }
#' 
#' 
#' @export
groupPCA <- function(dataarray, groups, rounds = 10000,tol=1e-10,cv=TRUE,mc.cores=parallel::detectCores(), weighting=TRUE)
{
    pmatrix.proc <- NULL
    proc.distout <- NULL
    lev <- NULL	
    
    groups <- factor(groups)
    lev <- levels(groups)
    ng <- length(lev)
    gsizes <- as.vector(tapply(groups, groups, length))
    if (1 %in% gsizes) {
        cv <- FALSE
        warning("group with one entry found - crossvalidation will be disabled.")
    }
    lmdim <- NULL
    N <- dataarray
    if (length(dim(N)) == 3) {
        N <- vecx(N)
        lmdim <- dim(dataarray)[2]
    }
    N <- as.matrix(N)
    n <- dim(N)[1]
    l <- dim(N)[2]
    if (length(groups) != n)
        warning("group affinity and sample size not corresponding!")
    
    Gmeans <- matrix(0, ng, l)
    rownames(Gmeans) <- lev
    for (i in 1:ng) {
        Gmeans[i, ] <- colMeans(N[groups==lev[i], ,drop=F])
    }
    if (weighting)
        wt <- gsizes
    else
        wt <- rep(1,ng)
    if (weighting)
        wcov <- cov.wt(Gmeans,wt=wt)
    else
        wcov <- list(cov=cov(Gmeans),center=colMeans(Gmeans))
    Grandm <- wcov$center
    eigenGmeans <- eigen(wcov$cov)
                                        #resGmeans <- sweep(Gmeans, 2, Grandm)
    Tmatrix <- N
    N <- sweep(N, 2, Grandm)
    valScores <- which(eigenGmeans$values > tol)
    groupScores <- N%*%(eigenGmeans$vectors[,valScores,drop=FALSE])
    groupPCs <- eigenGmeans$vectors[,valScores,drop=FALSE]
    residuals <- N-groupScores%*%t(groupPCs)
    resPrcomp <- prcompfast(residuals,center = F,tol=sqrt(tol))
   
    
###### create a neat variance table for the groupmean PCA ###### 
    values <- eigenGmeans$values[valScores]
    bgnames <- c(paste("bgPC",1:length(values),sep="_"))
    Var <- createVarTable(values,FALSE,rownames = bgnames)
    cnames <- c(paste("bgPC",1:length(values),sep="_"),paste("resPC",1:length(resPrcomp$sdev),sep="_"))
    ## combinedVar <- createVarTable(c(values,resPrcomp$sdev^2),square = FALSE,rownames = cnames)
    resnames <- paste("resPC",1:length(resPrcomp$sdev),sep="_")
    resVar <- createVarTable(resPrcomp$sdev,square = TRUE,rownames = resnames)
### calculate between group distances ###
    
### Permutation Test for Distances	
    
    shakeitall <- permudist(N,groups,rounds=rounds)
    if (rounds > 0)
        pmatrix.proc <- shakeitall$p.value
    else
        pmatrix.proc <- NULL
    proc.distout <- shakeitall$dist
    
    crovafun <- function(x)
        {
        crovtmp <- .groupPCAcrova(Tmatrix[-x,,drop=F],groups[-x],tol=tol,groupPCs=groupPCs,weighting=weighting)
        out <- as.vector(Tmatrix[x,]-crovtmp$Grandmean) %*% as.matrix(crovtmp$PCs)
        return(out)
    }

CV=NULL
    if (cv) {
        win <- FALSE
        if(.Platform$OS.type == "windows")
            win <- TRUE
        else
            registerDoParallel(cores=mc.cores)### register parallel backend
        
        if (win)
            crossval <- foreach(i=1:n) %do% crovafun(i)
        else
            crossval <- foreach(i = 1:n) %dopar% crovafun(i)
        CV <- groupScores
        for (i in 1:n) {
            if (is.matrix(CV))
                CV[i,] <- crossval[[i]]
            else
                CV[i] <- crossval[[i]]
        }
    }
    
    if (!is.null(lmdim)) {
        Gmeans <- vecx(Gmeans,revert=TRUE,lmdim=lmdim)
        Grandm <- vecx(t(Grandm),revert=TRUE,lmdim=lmdim)[,,1]
    }
    out <- list(eigenvalues=values,groupPCs=groupPCs,Variance=Var,Scores=groupScores,probs=pmatrix.proc,groupdists=proc.distout,groupmeans=Gmeans,Grandmean=Grandm,CV=CV,groups=groups,resPCs=resPrcomp$rotation,resPCscores=resPrcomp$x,resVar=resVar)
    class(out) <- "bgPCA"
    return(out)
}


#' Compute between-group-PC scores from new data
#'
#' Compute between-group-PC scores from new data
#' @param object object of class \code{bgPCA} returned from \code{\link{groupPCA}}
#' @param newdata matrix or 3D array containing data in the same format as originally used to compute groupPCA
#' @param ... currently not used.
#' @return returns the between-group-PC scores for new data
#' @examples
#' data(boneData)
#' 
#' boneLMPart <- boneLM[,,-(1:2)]
#' procPart <- procSym(boneLMPart)
#' pop_sex <- name2factor(boneLMPart, which=3:4)
#' ## compute group PCA without first 2 specimens
#' gpcaPart <- groupPCA(procPart$orpdata, groups=pop_sex, rounds=0, mc.cores=2,cv=FALSE)
#' ## align new data to Procrustes analysis
#' newdata <- align2procSym(procPart,boneLM[,,1:2])
#' ## get scores for new data
#' newscores <- predict(gpcaPart,newdata)
#' @export
predict.bgPCA <- function(object,newdata,...) {
    Grandm <- object$Grandmean
    if (is.matrix(Grandm)) {
        if (is.matrix(newdata)) {
            newdata <- as.vector(newdata-Grandm)
        } else {
            newdata <- vecx(sweep(newdata,1:2,Grandm))
            newdata <- as.matrix(newdata)
        }
    } else {
        newdata <- sweep(newdata,2,Grandm)
        newdata <- as.matrix(newdata)
    }
    
    scores <- newdata%*%object$groupPCs
    return(scores)
    
}
zarquon42b/Morpho documentation built on Jan. 28, 2024, 2:11 p.m.