knitr::opts_chunk$set(message = FALSE, warning = FALSE, cache=TRUE, fig.width = 5.5, fig.height = 5.5)
This manual describes how to peform a Regularized Generalized Canonical Correlation Analysis analysis and then how to perform the same data analysis with missing Canonical Correlation Analysis analysis method implemented in mgcca function in this package.
mgcca
package depends on BigDataStatMeth
packages so in orther to install mgcca
we need to install it.
devtools::install_github("isglobal-brge/BigDataStatMeth")
The development version of mgcca
package can be installed from BRGE GitHub repository:
devtools::install_github("isglobal-brge/mgcca")
The package depends on other R/Bioc packages that can be installed using standard functions (install.packages
and/or biocLite
). Then, the package can be loaded into R as usual
library(mgcca)
This vignette uses data from different packages such as RGCCA
from CRAN and omicade4
from Bioconductor with commands,
# Install RGCCA install.packages("RGCCA") # Install omicade4 if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("omicade4")
library(RGCCA) data(Russett, package="RGCCA") Xagric <- as.matrix(Russett[,c("gini","farm","rent")]) Xind <- as.matrix(Russett[,c("gnpr","labo")]) Xpolit <- as.matrix(Russett[ , c("inst", "ecks", "death")]) X <- list(Xagric=Xagric, Xind=Xind, Xpolit=Xpolit)
labs <- rownames(Russett) group <- as.vector(apply(Russett[, c("demostab", "demoinst", "dictator")], 1, which.max)) names(group) <- labs
Russett.sel <- cbind(Xagric, Xind, Xpolit) X.rgcca <- X X.rgcca[[4]] <- Russett.sel C <- matrix(c(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0), 4, 4) res.rgcca <- rgcca(X.rgcca, C, tau = c(0, 0, 0, 0), ncomp = rep(2, 4), scheme = "factorial", scale = TRUE, verbose=FALSE)
plot(res.rgcca$Y[[4]][, 1], res.rgcca$Y[[4]][, 2], type = "n", xlab = "Global Component 1", ylab = "Global Component 2") text(res.rgcca$Y[[4]][, 1], res.rgcca$Y[[4]][, 2], labs, col = group) grid() abline(h=0) abline(v=0)
corr <- cor(Russett.sel, res.rgcca$Y[[4]])
plot(corr[,1], corr[, 2], type = "n", xlab = "Global Component 1", ylab = "Global Component 2") text(corr[, 1], corr[, 2], rownames(corr)) grid() abline(h=0) abline(v=0)
res.mgcca <- mgcca(X)
plotInds(res.mgcca, group=as.factor(group), print.labels = TRUE, pos.leg="topleft") plotVars(res.mgcca, nlab=5)
omic
caseThe nutrimouse dataset comes from a nutrition study in the mouse. It was provided by Pascal Martin from the Toxicology and Pharmacology Laboratory (French National Institute for Agronomic Research) and is available at R package CCA
. It contains information on 40 mouse for two tables and a grouping variable having wild-type and PPARalpha -/- genotypes. Such information can be retrieved into R by:
library(CCA) data(nutrimouse, package = "CCA") X <- as.matrix(nutrimouse$gene) Y <- as.matrix(nutrimouse$lipid) group <- nutrimouse$genotype
Table X includes gene expresion on 120 genes potentially involved in nutritional problems, while table Y contains concentration of 21 hepatic fatty acids concentration.
dim(X) dim(Y) table(group)
X.s <- scale(X) Y.s <- scale(Y) lambda.est <- estim.regul(X.s, Y.s, plt=FALSE) res.rcca <- rcc(X.s, Y.s, lambda.est[1], lambda.est[2])
plt.indiv(res.rcca, 1, 2, ind.names = group)
XX <- list(genes=X.s, lipids=Y.s) ll <- c(lambda.est$lambda1, lambda.est$lambda2) res.mgcca <- mgcca(XX, method = "penalized", lambda=c(0.001, 0.1))
plotInds(res.mgcca, group=group)
X.rgcca <- list(X, Y, cbind(X,Y)) C <- matrix(c(0, 0, 1, 0, 0, 1, 1, 1, 0), 3, 3) res.rgcca <- rgcca(X.rgcca, C, tau = c(0, 0, 0), ncomp = rep(2, 3), scheme = "factorial", scale = TRUE, verbose=FALSE)
plot(res.rgcca$Y[[3]][, 1], res.rgcca$Y[[3]][, 2], type = "n", xlab = "Global Component 1", ylab = "Global Component 2") text(res.rgcca$Y[[3]][, 1], res.rgcca$Y[[3]][, 2], group, col = as.numeric(group)) grid() abline(h=0) abline(v=0)
plotVars(res.mgcca, nlab=5)
getSignif(res.mgcca, pval=1e-3, df="genes")
var.signif <- getSignif(res.mgcca, pval=1e-3, df="genes")$variable plotVars(res.mgcca, var=var.signif, var.col = rep("red", length(var.signif)), var.lab=TRUE, df="genes")
library(omicade4) res.mcia <- mcia(list(genes=t(X.s), lipid=t(Y.s))) plotVar(res.mcia, nlab=5)
plot(res.mcia, phenovec=group, sample.lab=FALSE, df.color=c("blue", "darkgreen"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.