R/Pca.R
In rrcov: Scalable Robust Estimators with High Breakdown Point

Documented in pca.distances pca.scoreplot

setMethod("names", "Pca", function(x) slotNames(x))
setMethod("$", c("Pca"),  function(x, name) slot(x, name))

setMethod("getCenter", "Pca", function(obj) obj@center)
setMethod("getScale", "Pca", function(obj) obj@scale)
setMethod("getLoadings", "Pca", function(obj) obj@loadings)
setMethod("getEigenvalues", "Pca", function(obj) obj@eigenvalues)
setMethod("getSdev", "Pca", function(obj) sqrt(obj@eigenvalues))
setMethod("getScores", "Pca", function(obj) obj@scores)
setMethod("getPrcomp", "Pca", function(obj) {
    ret <- list(sdev=sqrt(obj@eigenvalues),
         rotation=obj@loadings,
         center=obj@center,
         scale=obj@scale,
         x=obj@scores)
    class(ret) <- "prcomp"
    ret
})

##
## Follow the standard methods: show, print, plot
##
setMethod("show", "Pca", function(object) myPcaPrint(object))
setMethod("summary", "Pca", function(object, ...){
    vars <- getEigenvalues(object)
    vars <- vars/sum(vars)

    ## If k < p, use the stored initial eigenvalues and total explained variance, if any
    if(length(vars) < object@rank)
    {
        if(length(object@eig0) > 0 && length(object@totvar0) > 0)
        {
            vars <- object$eig0/object$totvar0
            vars <- vars[1:object$k]
        } else
            vars <- NULL
    }
    importance <- if(is.null(vars)) rbind("Standard deviation" = getSdev(object))
                  else              rbind("Standard deviation" = getSdev(object),
                                         "Proportion of Variance" = round(vars,5),
                                         "Cumulative Proportion" = round(cumsum(vars), 5))

    colnames(importance) <- colnames(getLoadings(object))
    new("SummaryPca", pcaobj=object, importance=importance)
})

setMethod("show", "SummaryPca", function(object){

    cat("\nCall:\n")
    print(object@pcaobj@call)

    digits = max(3, getOption("digits") - 3)

    cat("Importance of components:\n")
    print(object@importance, digits = digits)

    if(nrow(object@importance) == 1)
        cat("\nNOTE: Proportion of Variance and Cumulative Proportion are not shown",
            "\nbecause the chosen number of components k =", object@pcaobj@k,
            "\nis smaller than the rank of the data matrix =", object@pcaobj@rank, "\n")

    invisible(object)
})

## setMethod("print",     "Pca", function(x, ...) myPcaPrint(x, ...))

setMethod("predict",   "Pca", function(object, ...){
    predict(getPrcomp(object), ...)
})
setMethod("screeplot", "Pca", function(x, ...){
    pca.screeplot(x, ...)
})

setMethod("biplot",    "Pca", function(x, choices=1L:2L, scale=1, ...){

    if(length(getEigenvalues(x)) < 2)
        stop("Need at least two components for biplot.")

    lam <- sqrt(getEigenvalues(x)[choices])
    scores <- getScores(x)
    n <- NROW(scores)
    lam <- lam * sqrt(n)

    if(scale < 0 || scale > 1)
        warning("'scale' is outside [0, 1]")

    if(scale != 0)
        lam <- lam^scale
    else
        lam <- 1

    xx <- t(t(scores[, choices]) / lam)
    yy <- t(t(getLoadings(x)[, choices]) * lam)
    .biplot(xx, yy, ...)
    invisible()
})

setMethod("scorePlot", "Pca", function(x, i=1, j=2, ...){
    pca.scoreplot(obj=x, i=i, j=j, ...)
})

##  The __outlier map__ (diagnostic plot, distance-distance plot)
##  visualizes the observations by plotting their orthogonal
##  distance to the robust PCA subspace versus their robust
##  distances within the PCA subspace. This allows to classify
##  the data points  into 4 types: regular observations, good
##  leverage points, bad leverage points and orthogonal outliers.
##  The outlier plot is only possible when k < r (the number of
##  selected components is less than the rank of the matrix).
##  Otherwise a __distance plot__ will be shown (distances against
##  index).
##
##  The __screeplot__ shows the eigenvalues and is helpful to select
##  the number of principal components.
##
##  The __biplot__ is plot which aims to represent both the
##  observations and variables of a matrix of multivariate data
##  on the same plot.
##
## The __scoreplot__ shows a scatterplot of i-th against j-th score
##  of the Pca object with superimposed tollerance (0.975) ellipse
##
## VT::17.06.2008
##setMethod("plot", "Pca", function(x, y="missing",
setMethod("plot", signature(x="Pca", y="missing"), function(x, y="missing",
                                id.n.sd=3,
                                id.n.od=3,
                                ...){

    if(all(x@od > 1.E-06))
        pca.ddplot(x, id.n.sd, id.n.od, ...)
    else
        pca.distplot(x, id.n.sd, ...)
})

myPcaPrint <- function(x, print.x=FALSE, print.loadings=FALSE, ...) {
    if(!is.null(cl <- x@call)) {
        cat("Call:\n")
        dput(cl)
        cat("\n")
    }

    cat("Standard deviations:\n"); print(sqrt(getEigenvalues(x)), ...)
    if(print.loadings)
    {
        cat("\nLoadings:\n");          print(getLoadings(x), ...)
    }

    if (print.x) {
        cat("\nRotated variables:\n"); print(getScores(x), ...)
    }
    invisible(x)
}

## Internal function to calculate the score and orthogonal distances and the
##  appropriate cutoff values for identifying outlying observations
##
##  obj  - the Pca object. From this object will be used:
##          - k, eigenvalues, scores
##          - center and scale
##  data - the original data (not centered and scaled)
##  r    - rank
##  crit - criterion for computing cutoff for SD and OD
##
##  - cutoff for score distances: sqrt(qchisq(crit, k))
##  - cutoff for orthogonal distances: Box (1954)
##

pca.distances <- function(obj, data, r, crit=0.975) {
    .distances(data, r, obj, crit)
}

.distances <- function(data, r, obj, crit=0.975) {

##  VT::28.07.2020 -  - add adjusted for skewed data mode. The 'skew'
##      parameter will be used only in PcaHubert() to control how to
##      calculate the distances and their cutoffs.

    skew <- inherits(obj,"PcaHubert") && obj$skew

    ## remember the criterion, could be changed by the user
    obj@crit.pca.distances <- crit

    ## compute the score distances and the corresponding cutoff value
    n <- nrow(data)
    smat <- diag(obj@eigenvalues, ncol=ncol(obj@scores))

    ## VT::02.06.2010: it can happen that the rank of the matrix
    ##  is nk=ncol(scores), but the rank of the diagonal matrix of
    ##  eigenvalues is lower: for example if the last singular
    ##  value was 1E-7, the last eigenvalue will be sv^2=1E-14
    ##
    nk <- min(ncol(obj@scores), rankMM(smat))
    if(nk < ncol(obj@scores))
        warning(paste("Too small eigenvalue(s): ", obj@eigenvalues[ncol(obj@scores)], "- the diagonal matrix of the eigenvalues cannot be inverted!"))

    obj@sd <- sqrt(mahalanobis(as.matrix(obj@scores[,1:nk]), rep(0, nk), diag(obj@eigenvalues[1:nk], ncol=nk)))
    obj@cutoff.sd <- sqrt(qchisq(crit, obj@k))

    if(skew)
    {
        ## In this case (only valid when called from PcaHubert) the
        ##  SD are the observations' adjusted outlyingness, and the corresponding
        ##  cutoff value is derived in the same way as for the orthogonal distances.
        obj@sd <- obj@ao
        obj@cutoff.sd <- .crit.od(obj@sd, crit=crit, method="skewed")
    }

    ## Compute the orthogonal distances and the corresponding cutoff value
    ##  For each point this is the norm of the difference between the
    ##  centered data and the back-transformed scores
##    obj@od <- apply(data - repmat(obj@center, n, 1) - obj@scores %*% t(obj@loadings), 1, vecnorm)

##  VT::21.06.2016 - the data we get here is the original data - neither centered nor scaled.
##      - center and scale the data
##    obj@od <- apply(data - matrix(rep(obj@center, times=n), nrow=n, byrow=TRUE) - obj@scores %*% t(obj@loadings), 1, vecnorm)
    obj@od <- apply(scale(data, obj@center, obj@scale) - obj@scores %*% t(obj@loadings), 1, vecnorm)

    if(is.list(dimnames(obj@scores))) {
        names(obj@od) <- dimnames(obj@scores)[[1]]
    }

    ## The orthogonal distances make sence only if the number of PCs is less than
    ##  the rank of the data matrix - otherwise set it to 0
    obj@cutoff.od <- 0
    if(obj@k != r) {
        ## the method used for computing the cutoff depends on (a) classic/robust and (b) skew
        mx <- if(inherits(obj,"PcaClassic")) "classic" else if(skew) "skewed" else "medmad"
        obj@cutoff.od <- .crit.od(obj@od, crit=crit, method=mx)
    }

    ## flag the observations with 1/0 if the distances are less or equal the
    ##  corresponding  cutoff values
    obj@flag <- obj@sd <= obj@cutoff.sd
    if(obj@cutoff.od > 0)
        obj@flag <- (obj@flag & obj@od <= obj@cutoff.od)

    return(obj)
}

## Adjusted for skewness cutoff of the orthogonal distances:
##  - cutoff = the largest od_i smaller than Q3({od}) + 1.5 * exp(3 * medcouple({od})) * IQR({od})
##
.crit.od <- function(od, crit=0.975, method=c("medmad", "classic", "umcd", "skewed"), quan)
{
    method <- match.arg(method)
    if(method == "skewed")
    {
        mc <- robustbase::mc(od, maxit=1000)
        e3mc <- if(mc < 0) 1 else exp(3*mc)
        cx <- quantile(od, 0.75) + 1.5 * e3mc * IQR(od)
        cv <- max(od[which(od < cx)])   # take the largest od, smaller than cx
    } else
    {
        od <- od^(2/3)
        if(method == "classic")
        {
            t <- mean(od)
            s <- sd(od)
        }else if(method == "umcd")
        {
            ms <- unimcd(od, quan=quan)
            t <- ms$tmcd
            s <- ms$smcd
        }else
        {
            t <- median(od)
            s <- mad(od)
        }
        cv <- (t + s * qnorm(crit))^(3/2)
    }
    cv
}

## Flip the signs of the loadings
##  - comment from Stephan Milborrow
##
.signflip <- function(loadings)
{
    if(!is.matrix(loadings))
        loadings <- as.matrix(loadings)
    apply(loadings, 2, function(x) if(x[which.max(abs(x))] < 0) -x else x)
}

## This is from MM in robustbase, but I want to change it and
##  therefore took a copy. Later will update in 'robustbase'
##  I want to use not 'scale()', but doScale to which I can pass also
##   a function.
.classPC <- function(x, scale=FALSE, center=TRUE,
		    signflip=TRUE, via.svd = n > p, scores=FALSE)
{
    if(!is.numeric(x) || !is.matrix(x))
	stop("'x' must be a numeric matrix")
    else if((n <- nrow(x)) <= 1)
	stop("The sample size must be greater than 1 for svd")
    p <- ncol(x)

    if(is.logical(scale))
        scale <- if(scale) sd else vector('numeric', p) + 1
    else if(is.null(scale))
        scale <- vector('numeric', p) + 1
    if(is.logical(center))
        center <- if(center) mean else vector('numeric', p)
    else if(is.null(center))
        center <- vector('numeric', p)

    x.scaled <- doScale(x, center=center, scale=scale)
    x <- x.scaled$x
    center <- x.scaled$center
    scale <- x.scaled$scale

    if(via.svd) {
	svd <- svd(x, nu=0)
	rank <- rankMM(x, sv=svd$d)
	loadings <- svd$v[,1:rank]
	eigenvalues <- (svd$d[1:rank])^2 /(n-1) ## FIXME: here .^2; later sqrt(.)
    } else { ## n <= p; was "kernelEVD"
	e <- eigen(tcrossprod(x), symmetric=TRUE)
        evs <- e$values
	tolerance <- n * max(evs) * .Machine$double.eps
	rank <- sum(evs > tolerance)
        evs <- evs[ii <- seq_len(rank)]
	eigenvalues <- evs / (n-1)
	## MM speedup, was:  crossprod(..) %*% diag(1/sqrt(evs))
	loadings <- crossprod(x, e$vectors[,ii]) * rep(1/sqrt(evs), each=p)
    }

    ## VT::15.06.2010 - signflip: flip the sign of the loadings
    if(signflip)
	loadings <- .signflip(loadings)

    list(rank=rank, eigenvalues=eigenvalues, loadings=loadings,
	 scores = if(scores) x %*% loadings,
	 center=center, scale=scale)
}

##  VT::19.08.2016
##  classSVD and kernelEVD are no more used - see .classPC
##
## VT::15.06.2010 - Added scaling and flipping of the loadings
##
classSVD <- function(x, scale=FALSE, signflip=TRUE){
    if(!is.numeric(x) || !is.matrix(x))
        stop("'x' must be a numeric matrix")
    else if(nrow(x) <= 1)
        stop("The sample size must be greater than 1 for svd")

    n <- nrow(x)
    p <- ncol(x)

    center <- apply(x, 2, mean)
    x <- scale(x, center=TRUE, scale=scale)
    if(scale)
        scale <- attr(x, "scaled:scale")

    svd <- svd(x/sqrt(n-1))

    rank <- rankMM(x, sv=svd$d)
    eigenvalues <- (svd$d[1:rank])^2
    loadings <- svd$v[,1:rank]

    ## VT::15.06.2010 - signflip: flip the sign of the loadings
    if(!is.matrix(loadings))
        loadings <- data.matrix(loadings)
    if(signflip)
        loadings <- .signflip(loadings)
    scores <- x %*% loadings

    list(loadings=loadings,
         scores=scores,
         eigenvalues=eigenvalues,
         rank=rank,
         center=center,
         scale=scale)
}

## VT::15.06.2010 - Added scaling and flipping of the loadings
##
kernelEVD <- function(x, scale=FALSE, signflip=TRUE){
    if(!is.numeric(x) || !is.matrix(x))
        stop("'x' must be a numeric matrix")
    else if(nrow(x) <= 1)
        stop("The sample size must be greater than 1 for svd")

    n <- nrow(x)
    p <- ncol(x)

    if(n > p) classSVD(x, scale=scale, signflip=signflip)
    else {
        center <- apply(x, 2, mean)
        x <- scale(x, center=TRUE, scale=scale)
        if(scale)
            scale <- attr(x, "scaled:scale")

        e <- eigen(x %*% t(x)/(n-1))

        tolerance <- n * max(e$values) * .Machine$double.eps
        rank <- sum(e$values > tolerance)

        eigenvalues <- e$values[1:rank]
        loadings <- t((x/sqrt(n-1))) %*% e$vectors[,1:rank] %*% diag(1/sqrt(eigenvalues))

        ## VT::15.06.2010 - signflip: flip the sign of the loadings
        if(signflip)
            loadings <- .signflip(loadings)

        scores <- x %*% loadings
        ret <- list(loadings=loadings,
                    scores=scores,
                    eigenvalues=eigenvalues,
                    rank=rank,
                    center=center,
                    scale=scale)
    }
}

pca.screeplot <- function (obj, k, type = c("barplot", "lines"), main = deparse1(substitute(obj)), ...)
{
    type <- match.arg(type)
    pcs <- if(is.null(obj@eig0) || length(obj@eig0) == 0) obj@eigenvalues else obj@eig0
    k <- if(missing(k)) min(10, length(pcs))
         else           min(k, length(pcs))

    xp <- seq_len(k)

    dev.hold()
    on.exit(dev.flush())
    if (type == "barplot")
        barplot(pcs[xp], names.arg = names(pcs[xp]), main = main,
            ylab = "Variances", ...)
    else {
        plot(xp, pcs[xp], type = "b", axes = FALSE, main = main,
            xlab = "", ylab = "Variances", ...)
        axis(2)
        axis(1, at = xp, labels = names(pcs[xp]))
    }
    invisible()
}

## Score plot of the Pca object 'obj' - scatterplot of ith against jth score
##  with superimposed tollerance (0.975) ellipse
pca.scoreplot <- function(obj, i=1, j=2, main, id.n, ...) {
    if(missing(main)) {
        main <- if(inherits(obj,"PcaClassic")) "Classical PCA" else "Robust PCA"
    }

    x <- cbind(getScores(obj)[,i], getScores(obj)[,j])
    rownames(x) <- rownames(getScores(obj))

## VT::11.06.2012
##  Here we assumed that the scores are not correlated and
##  used a diagonal matrix with the eigenvalues on the diagonal to draw the ellipse
##  This is not the case with PP methods, therefore we compute the covariance of
##  the scores, considering only the non-outliers
##      (based on score and orthogonal distances)
##
##    ev <- c(getEigenvalues(obj)[i], getEigenvalues(obj)[j])
##    cpc <- list(center=c(0,0), cov=diag(ev), n.obs=obj@n.obs)

    flag <- obj@flag
    cpc <- cov.wt(x, wt=flag)

## We need to inflate the covariance matrix with the proper size:
##  see Maronna et al. (2006), 6.3.2, page 186
##
##  - multiply the covariance by quantile(di, alpha)/qchisq(alpha, 2)
##  where alpha = h/n
##
##    mdx <- mahalanobis(x, cpc$center, cpc$cov)
##    alpha <- length(flag[which(flag != 0)])/length(flag)
##    cx <- quantile(mdx, probs=alpha)/qchisq(p=alpha, df=2)
##    cpc$cov <- cx * cpc$cov


    .myellipse(x, xcov=cpc,
        xlab=paste("PC",i,sep=""),
        ylab=paste("PC",j, sep=""),
        main=main,
        id.n=id.n, ...)
    abline(v=0)
    abline(h=0)
}

## Distance-distance plot (or diagnostic plot, or outlier map)
## Plots score distances against orthogonal distances
pca.ddplot <- function(obj, id.n.sd=3, id.n.od=3, main, xlim, ylim, off=0.02, ...) {

    if(missing(main))
    {
        main <- if(inherits(obj,"PcaClassic")) "Classical PCA" else "Robust PCA"
    }


    if(all(obj@od <= 1.E-06))
        warning("PCA diagnostic plot is not defined")
    else
    {
        if(missing(xlim))
            xlim <- c(0, max(max(obj@sd), obj@cutoff.sd))
        if(missing(ylim))
            ylim <- c(0, max(max(obj@od), obj@cutoff.od))

        plot(obj@sd, obj@od, xlab="Score distance",
                             ylab="Orthogonal distance",
                             main=main,
                             xlim=xlim, ylim=ylim, type="p", ...)
        abline(v=obj@cutoff.sd)
        abline(h=obj@cutoff.od)
        label.dd(obj@sd, obj@od, id.n.sd, id.n.od, off=off)
    }
    invisible(obj)
}

## Distance plot, plots score distances against index
pca.distplot <- function(obj, id.n=3, title, off=0.02, ...) {

    if(missing(title))
    {
        title <- if(inherits(obj,"PcaClassic")) "Classical PCA" else "Robust PCA"
    }

    ymax <- max(max(obj@sd), obj@cutoff.sd)
    plot(obj@sd, xlab="Index", ylab="Score distance", ylim=c(0,ymax), type="p", ...)
    abline(h=obj@cutoff.sd)
    label(1:length(obj@sd), obj@sd, id.n, off=off)
    title(title)
    invisible(obj)
}

label <- function(x, y, id.n=3, off=0.02){
    xrange <- par("usr")
    xrange <- xrange[2] - xrange[1]
    if(id.n > 0) {
        n <- length(y)
        ind <- sort(y, index.return=TRUE)$ix
        ind <- ind[(n-id.n+1):n]
        if(is.character(names(y)))
            lab <- names(y[ind])
        else
            lab <- ind
        text(x[ind] - off*xrange, y[ind], lab)
    }
}

label.dd <- function(x, y, id.n.sd=3, id.n.od=3, off=0.02){
    xrange <- par("usr")
    xrange <- xrange[2] - xrange[1]
    if(id.n.sd > 0 && id.n.od > 0) {
        n <- length(x)
        ind.sd <- sort(x, index.return=TRUE)$ix
        ind.sd <- ind.sd[(n - id.n.sd + 1):n]
        ind.od <- sort(y, index.return=TRUE)$ix
        ind.od <- ind.od[(n - id.n.od + 1):n]
        lab <- ind.od
        if(is.character(names(y)))
            lab <- names(y[ind.od])
        text(x[ind.od] - off*xrange, y[ind.od], lab)
        lab <- ind.sd
        if(is.character(names(x)))
            lab <- names(x[ind.sd])
        text(x[ind.sd] - off*xrange, y[ind.sd], lab)
    }
}

## VT::30.09.2009 - add a parameter 'classic' to generate a default caption
##  "Robust biplot" or "Classical biplot" for a robust/classical
##  PCA object, resp.
##  --- do not use it for now ---
##
.biplot <- function(x, y, classic,
                    var.axes = TRUE,
                    col,
                    cex = rep(par("cex"), 2),
                    xlabs = NULL, ylabs = NULL,
                    expand=1,
                    xlim = NULL, ylim = NULL,
                    arrow.len = 0.1,
                    main = NULL, sub = NULL, xlab = NULL, ylab = NULL, ...)
{
    n <- nrow(x)
    p <- nrow(y)

##    if(is.null(main))
##        main <- if(classic) "Classical biplot" else "Robust biplot"

    if(missing(xlabs))
    {
       xlabs <- dimnames(x)[[1L]]
       if(is.null(xlabs))
            xlabs <- 1L:n
    }
    xlabs <- as.character(xlabs)
    dimnames(x) <- list(xlabs, dimnames(x)[[2L]])
    if(missing(ylabs))
    {
       ylabs <- dimnames(y)[[1L]]
       if(is.null(ylabs))
            ylabs <- paste("Var", 1L:p)
    }
    ylabs <- as.character(ylabs)
    dimnames(y) <- list(ylabs, dimnames(y)[[2L]])

    if(length(cex) == 1L)
        cex <- c(cex, cex)

    pcol <- par("col")
    if(missing(col))
    {
       col <- par("col")
       if(!is.numeric(col))
            col <- match(col, palette(), nomatch=1L)
       col <- c(col, col + 1L)
    }
    else if(length(col) == 1L)
        col <- c(col, col)

    unsigned.range <- function(x)
        c(-abs(min(x, na.rm=TRUE)), abs(max(x, na.rm=TRUE)))
    rangx1 <- unsigned.range(x[, 1L])
    rangx2 <- unsigned.range(x[, 2L])
    rangy1 <- unsigned.range(y[, 1L])
    rangy2 <- unsigned.range(y[, 2L])

    if(missing(xlim) && missing(ylim))
    xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
    else if(missing(xlim)) xlim <- rangx1
    else if(missing(ylim)) ylim <- rangx2
    ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
    on.exit(par(op))
    op <- par(pty = "s")
    if(!is.null(main))
        op <- c(op, par(mar = par("mar")+c(0,0,1,0)))
    plot(x, type = "n", xlim = xlim, ylim = ylim, col = col[[1L]],
         xlab = xlab, ylab = ylab, sub = sub, main = main, ...)
    text(x, xlabs, cex = cex[1L], col = col[[1L]], ...)
    par(new = TRUE)
    plot(y, axes = FALSE, type = "n", xlim = xlim*ratio, ylim = ylim*ratio,
     xlab = "", ylab = "", col = col[[1L]], ...)

##    axis(3, col = col[2L], ...)
##    axis(4, col = col[2L], ...)
##    box(col = col[1L])
    axis(3, col = pcol, ...)
    axis(4, col = pcol, ...)
    box(col = pcol)

    text(y, labels=ylabs, cex = cex[2L], col = col[[2L]], ...)
    if(var.axes)
    arrows(0, 0, y[,1L] * 0.8, y[,2L] * 0.8, col = col[[2L]], length=arrow.len)
    invisible()
}