Nothing
# canonical correlation analysis, as a more general method, with method functions
# Updated to implement weights
#
# compare with :
# stats::cancor (very basic),
# yacca::cca (fairly complete, but very messy return structure)
# CCA::cc (fairly complete, but very messy return structure, no longer maintained)
cancor <- function(x, ...) {
UseMethod("cancor", x)
}
cancor.formula <- function(formula, data, subset, weights,
na.rm = TRUE, # DONE: now just use na.rm
method = "gensvd", # "qr" not implemented
# contrasts = NULL, # would it make any sense to allow factors? should we test for factors in X?
...) {
# remove the intercept [solution by John Fox]
formula <- update(formula, . ~ . - 1)
cl <- match.call()
cl$formula <- formula
mf <- match.call(expand.dots = FALSE)
mf$formula <- formula
m <- match(c("formula", "data", "subset", "weights"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- model.response(mf, "numeric")
w <- as.vector(model.weights(mf))
if (!is.null(w) && !is.numeric(w))
stop("'weights' must be a numeric vector")
x <- model.matrix(mt, mf, contrasts)
z <- cancor.default(x, y, weights=w, na.rm=na.rm, ...)
z$call <- cl
z$terms <- mt
z
}
# var-cov matrix allowing weights
# Without weights, honors na.rm and use
# With weights, na.rm --> use='complete'
Var <- function(x, na.rm = TRUE, use, weights) {
if(missing(weights) || is.null(weights))
res <- var(x, na.rm=na.rm, use=use)
else {
# cov.wt doesn't allow for missing data, and doesn't allow x, y=NULL, ...
if (na.rm) {
if (!pmatch(use, "complete")) warning("Use of weights only supports use='complete'")
OK <- complete.cases(x)
x <- x[OK,]
}
res <- cov.wt(x, wt=weights)$cov
}
res
}
# DONE: add weights component to result
# DONE: add a method=argument
# TODO: should replace X, Y by x, y throughout to avoid another copy
# DONE: allow weights, by use of cov.wt() as in dataEllipse()
# FIXED: row.names should refer to original x, because as.matrix() strips them
cancor.default <- function (x, y,
weights,
X.names = colnames(x),
Y.names = colnames(y),
row.names = rownames(x),
xcenter = TRUE, ycenter = TRUE, # not yet implemented (used implicitly)
xscale = FALSE, yscale = FALSE, # not yet implemented
ndim=min(p,q),
set.names=c("X", "Y"),
prefix=c("Xcan", "Ycan"), # s/b: paste0(set.names, "can")
na.rm = TRUE,
use = if (na.rm) "complete" else "pairwise",
method = "gensvd",
...
)
{
X <- as.matrix(x)
Y <- as.matrix(y)
p <- ncol(X)
q <- ncol(Y)
n <- length(complete.cases(X,Y)) # DONE: take account of 0 weights
if(!missing(weights)) n <- n - sum(weights==0)
C <- Var(cbind(X, Y), na.rm = TRUE, use=use, weights=weights)
Cxx <- C[1:p, 1:p]
Cyy <- C[-(1:p), -(1:p)]
Cxy <- C[1:p, -(1:p)]
res <- gensvd(Cxy, Cxx, Cyy, nu=ndim, nv=ndim)
names(res) <- c("cor", "xcoef", "ycoef")
colnames(res$xcoef) <- paste(prefix[1], 1:ndim, sep="")
colnames(res$ycoef) <- paste(prefix[2], 1:ndim, sep="")
scores <- can.scores(X, Y, res$xcoef, res$ycoef)
colnames(scores$xscores) <- paste(prefix[1], 1:ndim, sep="")
colnames(scores$yscores) <- paste(prefix[2], 1:ndim, sep="")
structure <- can.structure(X, Y, scores, use=use)
result <- list(cancor = res$cor,
names = list(X = X.names, Y = Y.names,
row.names = row.names, set.names=set.names),
ndim = ndim,
dim = list(p=p, q=q, n=n),
coef = list(X = res$xcoef, Y= res$ycoef),
scores = list(X = scores$xscores, Y=scores$yscores),
X = X, Y = Y,
weights = if (missing(weights)) NULL else weights,
structure = structure)
class(result) <- "cancor"
return(result)
}
# scores on canonical variates
can.scores <- function (X, Y, xcoef, ycoef)
{
X.aux = scale(X, center=TRUE, scale=FALSE) # TODO: incorporate xscale, yscale, etc here
Y.aux = scale(Y, center=TRUE, scale=FALSE)
X.aux[is.na(X.aux)] = 0
Y.aux[is.na(Y.aux)] = 0
xscores = X.aux %*% xcoef
yscores = Y.aux %*% ycoef
return(list(xscores = xscores, yscores = yscores))
}
# canonical structure coefficients: correlations
can.structure <- function (X, Y, scores, use="complete.obs") {
xscores <- scores$xscores
yscores <- scores$yscores
X.xscores = cor(X, xscores, use = use)
Y.xscores = cor(Y, xscores, use = use)
X.yscores = cor(X, yscores, use = use)
Y.yscores = cor(Y, yscores, use = use)
return(list(
X.xscores = X.xscores,
Y.xscores = Y.xscores,
X.yscores = X.yscores,
Y.yscores = Y.yscores))
}
# TODO: replace with equivalent qr stuff
gensvd <- function (Rxy, Rxx, Ryy, nu=p, nv=q)
{
p <- dim(Rxy)[1]
q <- dim(Rxy)[2]
if (missing(Rxx)) Rxx <- diag(p)
if (missing(Ryy)) Ryy <- diag(q)
if (dim(Rxx)[1] != dim(Rxx)[2]) stop("Rxx must be square")
if (dim(Ryy)[1] != dim(Ryy)[2]) stop("Ryy must be square")
s <- min(p, q)
if (max(abs(Rxx - t(Rxx)))/max(abs(Rxx)) > 1e-10) {
warning("Rxx not symmetric.")
Rxx <- (Rxx + t(Rxx))/2
}
if (max(abs(Ryy - t(Ryy)))/max(abs(Ryy)) > 1e-10) {
warning("Ryy not symmetric.")
Ryy <- (Ryy + t(Ryy))/2
}
Rxxinv <- solve(chol(Rxx))
Ryyinv <- solve(chol(Ryy))
Dform <- t(Rxxinv) %*% Rxy %*% Ryyinv
if (p >= q) {
result <- svd(Dform, nu=nu, nv=nv)
values <- result$d
Xmat <- Rxxinv %*% result$u
Ymat <- Ryyinv %*% result$v
}
else {
result <- svd(t(Dform), nu=nv, nv=nu)
values <- result$d
Xmat <- Rxxinv %*% result$v
Ymat <- Ryyinv %*% result$u
}
gsvdlist <- list(values=values, Xmat=Xmat, Ymat=Ymat)
return(gsvdlist)
}
# vector of stars, of max. width, corresponding to proportions
stars <- function(p, width=30) {
p <- p/sum(p)
reps <- round(p * width / max(p))
res1 <- sapply(reps, function(x) paste(rep('*', x), sep="", collapse=""))
res2 <- sapply(reps, function(x) paste(rep(' ', width-x), sep="", collapse=""))
res <- paste(res1, res2, sep="")
res
}
# DONE: move the printout of coefficients to a summary method
print.cancor <- function(x, digits=max(getOption("digits") - 2, 3), ...) {
names <- x$names
cat("\nCanonical correlation analysis of:\n")
cat( "\t", x$dim$p, " ", names$set.names[1], " variables: ", paste(names$X, collapse=', '), "\n")
cat(" with\t", x$dim$q, " ", names$set.names[2], " variables: ", paste(names$Y, collapse=', '), "\n")
cat("\n")
canr <- x$cancor
lambda <- canr^2 / (1-canr^2)
pct = 100*lambda / sum(lambda)
cum = cumsum(pct)
# DONE: add stars column, showing pct
stwidth <- getOption("width") - 40
scree <- stars(pct, width = min(30, stwidth))
canrdf <- data.frame("CanR"=canr, "CanRSQ"=canr^2, "Eigen"=lambda, "percent"=pct, "cum"=cum, "scree"=scree)
print(canrdf, digits=4)
tests <- Wilks.cancor(x)
print(tests, digits=digits)
invisible(x)
}
# moved printout of coefficients here
summary.cancor <- function(object, digits=max(getOption("digits") - 2, 3), ...) {
names <- object$names
print(object, digits=digits, ...)
cat("\nRaw canonical coefficients\n")
cat("\n ", names$set.names[1], " variables: \n")
print(object$coef$X, digits=digits)
cat("\n ", names$set.names[2], " variables: \n")
print(object$coef$Y, digits=digits)
}
# extractor functions
scores <- function(x, ...) {
UseMethod("scores")
}
# TODO: check rownames
scores.cancor <- function(x, type=c("x", "y", "both", "list", "data.frame"), ...) {
type <- match.arg(type)
switch(type,
x = x$scores$X,
y = x$scores$Y,
both = x$scores,
list = x$scores,
data.frame = data.frame(x$scores$X, x$scores$Y)
)
}
coef.cancor <- function(object, type=c("x", "y", "both", "list"), standardize=FALSE, ...) {
coef <- object$coef
if (standardize) {
coef$X <- diag(sqrt(diag(cov(object$X)))) %*% coef$X
coef$Y <- diag(sqrt(diag(cov(object$Y)))) %*% coef$Y
rownames(coef$X) <- rownames(object$coef$X)
rownames(coef$Y) <- rownames(object$coef$Y)
}
type <- match.arg(type)
switch(type,
x = coef$X,
y = coef$Y,
both = list(coef$X, coef$Y),
list = list(coef$X, coef$Y)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.