### function for simulation study of PCA dimension-reduced
### cross covariance test
ccaTest <- function(object, npcs = 5, min.set1=5, min.set2=3, cc.pvalue.threshold=.1){
if (!is(object, 'GDIset')) stop("object must be a 'GDIset'")
# combine data sets -------------------------------
set1.df <- object@set1@dat
if (is(object@set1@dat, 'ffdf')){
set1.df$set <- ff(factor(rep('set1', nrow(set1.df))))
} else {
set1.df$set <- factor(rep('set1', nrow(set1.df)))
}
set2.df <- object@set2@dat
if (is(object@set2@dat, 'ffdf')){
set2.df$set <- ff(factor(rep('set2', nrow(set2.df))))
} else {
set2.df$set <- factor(rep('set2', nrow(set2.df)))
}
if (is(set1.df, 'ffdf') & is(set2.df, 'ffdf')){
full.set <- ffdfappend(set1.df, set2.df)
} else {
full.set <- rbind(as.data.frame(set1.df), as.data.frame(set2.df))
}
# do analysis grouped by gene -------------------------------------
entrez.ids <- c(object@set1@annot$entrez.id, object@set2@annot$entrez.id)
ids.in.both <- intersect(object@set1@annot$entrez.id, object@set2@annot$entrez.id)
unique.ids <- unique(ids.in.both)
ind <- 1
out.return <- list()
# out <- foreach(gene = unique.ids, .packages='gdi') %do% {
for(gene in unique.ids){
cat(gene, ' ', ind)
dat <- full.set[which(entrez.ids == gene), ]
n.sites <- table(dat$set)
set1 <- as.matrix(dat[dat$set == 'set1', -ncol(dat)])
set2 <- as.matrix(dat[dat$set == 'set2', -ncol(dat)])
set1 <- apply(set1, 1, na2mean)
set2 <- apply(set2, 1, na2mean)
if (n.sites[1] < min.set1 | n.sites[2] < min.set2){
out.return[[gene]] <- NA
cat(' omitted')
} else {
# perform PCA for each set
pca.set1 <- prcomp(set1)
pca.set2 <- prcomp(set2)
# do CCA on PC scores
cc.res <- cancor(pca.set1$x[, 1:npcs], pca.set2$x[, 1:npcs])
# do LRT w/ bartlett correction for CCA
n <- nrow(set1)
cc.rho2 <- rev(cc.res$cor^2)
test.stat <- (-1)*(n - 1 - .5 * (npcs + npcs + 1)) * log(cumprod(1 - cc.rho2))
df <- (npcs - length(cc.rho2):1 + 1 ) * (npcs - length(cc.rho2):1 + 1 )
p.value <- (1 - pchisq(test.stat, df))
test.stat <- rev(test.stat)
df <- rev(df)
p.value <- rev(p.value)
n.ccs <- npcs
# compute canonical covariate scores and loadings --------------------------------------
set1.scores <- pca.set1$x[, 1:npcs, drop=FALSE] %*% cc.res$xcoef[, 1:n.ccs, drop=FALSE]
set2.scores <- pca.set2$x[, 1:npcs, drop=FALSE] %*% cc.res$ycoef[, 1:n.ccs, drop=FALSE]
set1.loads <- cor(set1, set1.scores)
set2.loads <- cor(set2, set2.scores)
# redundancy index
dat2cc.set1 <- colSums((colVars(set1) * set1.loads^2)/sum(colVars(set1)))
dat2cc.set2 <- colSums((colVars(set2) * set2.loads^2)/sum(colVars(set2)))
set1.redundancy <- dat2cc.set1*(cc.res$cor^2)[1:n.ccs]
set2.redundancy <- dat2cc.set2*(cc.res$cor^2)[1:n.ccs]
# consolidate results into a list --------------------------------------
output <- list()
output$test.results <- cbind(chisq_stat=test.stat,
df=df,
p_value=p.value,
set1_r2=set1.redundancy,
set2_r2=set2.redundancy)
output$loadings <- list()
output$loadings$set1 <- set1.loads
output$loadings$set2 <- set2.loads
output$scores <- list()
output$scores$set1 <- set1.scores
output$scores$set2 <- set2.scores
out.return[[gene]] <- output
}
ind <- ind + 1
cat('\n')
}
# consolidate results ---------------------------------------
out.final <- list()
# testing results
out.final$testing.results <- lapply(out.return, function(x){
if(!is.na(x)){
x$test.results
} else {
rep(NA, 5)
}
})
# set 1 cc scores
out.final$set1.scores <- lapply(out.return, function(x, n.c){
if(!is.na(x)){
x$scores$set1
} else {
rep(NA, n.c)
}
}, n.c=dim(full.set)[2])
# set 2 cc scores
out.final$set2.scores <- lapply(out.return, function(x, n.c){
if(!is.na(x)){
x$scores$set2
} else {
rep(NA, n.c)
}
}, n.c=dim(full.set)[2])
# set1 loadings
out.final$set1.loadings <- lapply(out.return, function(x){
if(!is.na(x)){
x$loadings$set1
} else {
NA
}
})
# set 2 loadings
out.final$set2.loadings <- lapply(out.return, function(x){
if(!is.na(x)){
x$loadings$set2
} else {
NA
}
})
out.final
}
na2mean <- function(x){
x[is.na(x)] <- mean(na.omit(x))
return(x)
}
pca <- function(x) prcomp(t(x[, -ncol(x)]))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.