##' Compare \code{\link{IcaSet}} objects by computing the correlation between either
##' projection values of common features or genes, or contributions of common
##' samples.
##'
##' The user must carefully choose the object on which the correlation will be
##' computed.
##' If \code{level='samples'}, the correlations are based on the mixing
##' matrices of the ICA decompositions (of dimension samples x components). \code{'A'}
##' will be typically chosen when the ICA decompositions were computed on the
##' same dataset, or on datasets that include the same samples.
##' If \code{level='features'} is
##' chosen, the correlation is calculated between the source matrices (of dimension
##' features x components) of the ICA decompositions. \code{'S'} will be typically
##' used when the ICA decompositions share common features (e.g same microarrays).
##' If \code{level='genes'}, the correlations are calculated
##' on the attributes \code{'SByGene'} which store the
##' projections of the annotated features. \code{'SByGene'} will be typically chosen
##' when ICA were computed on datasets from different technologies, for which
##' comparison is possible only after annotation into a common ID, like genes.
##'
##' \code{cutoff_zval} is only used when \code{level} is one of \code{c('genes','features')}, in
##' order to restrict the correlation to the contributing features or genes.
##'
##' When \code{cutoff_zval} is specified, for each pair of components, genes or features that are
##' included in the circle of center 0 and radius \code{cutoff_zval} are excluded from
##' the computation of the correlation.
##'
##' It must be taken into account by the user that if
##' \code{cutoff_zval} is different from \code{NULL} or \code{0}, the computation will be much
##' slowler since each pair of component is treated individually.
##'
##' @title Comparison of IcaSet objects using correlation
##' @param icaSets list of IcaSet objects, e.g results of ICA decompositions
##' obtained on several datasets.
##' @param labAn vector of names for each icaSet, e.g the the names of the
##' datasets on which were calculated the decompositions.
##' @param type.corr Type of correlation to compute, either
##' \code{'pearson'} or \code{'spearman'}.
##' @param cutoff_zval either NULL or 0 (default) if all genes are used to
##' compute the correlation between the components, or a threshold to compute
##' the correlation on the genes that have at least a scaled projection higher
##' than cutoff_zval. Will be used only when correlations are calculated on S or
##' SByGene.
##' @param level Data level of the \code{IcaSet} objects on which is applied the correlation.
##' It must correspond to a feature shared by the IcaSet objects:
##' \code{'samples'} if they were applied to common samples (correlations are computed between matrix \code{A}), \code{'features'} if they were applied to
##' common features (correlations are computed between matrix \code{S}), \code{'genes'} if they share gene IDs after
##' annotation into genes (correlations are computed between matrix \code{SByGene}).
##' @return A list whose length equals the number of pairs of \code{IcaSet} and whose elements
##' are outputs of function \code{\link{cor2An}}.
##' @export
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##'
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##'
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet object
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##' dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##' dat=dat2, alreadyAnnot=TRUE)$icaSet
##'
##' listPairCor <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"),
##' type.corr="pearson", level="genes", cutoff_zval=0)
##'
##'
##' \dontrun{
##' #### Comparison of 2 ICA decompositions obtained on 2 different gene expression datasets.
##' ## load the two datasets
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##'
##' ## Define a function used to build two examples of IcaSet objects
##' treat <- function(es, annot="hgu133a.db") {
##' es <- selectFeatures_IQR(es,10000)
##' exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##' colnames(exprs(es)) <- sampleNames(es)
##' resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##' resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##' dat=exprs(es), pData=pData(es), refSamples=character(0),
##' annotation=annot, typeID= typeIDmainz,
##' chipManu = "affymetrix", mart=mart)
##' icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##'
##' ## The pearson correlation is used as a measure of association between the gene projections
##' # on the different components (type.corr="pearson").
##' listPairCor <- compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes", cutoff_zval=0)
##'
##' ## Same thing but adding a selection of genes on which the correlation between two components is computed:
##' # when considering pairs of components, only projections whose scaled values are not located within
##' # the circle of radius 1 are used to compute the correlation (cutoff_zval=1).
##' listPairCor <- compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", cutoff_zval=1, level="genes")
##' }
##' @export
##' @author Anne Biton
##' @seealso \code{\link{cor2An}}
compareAn <- function (icaSets,
labAn,
type.corr = c("pearson","spearman"),
cutoff_zval=0,
level = c("samples","features","genes")
) {
if (missing(labAn))
if (is.null(names(icaSets)))
labAn <- c(1:nbAn)
else
labAn <- names(icaSets)
nbAn <- length(icaSets)
if (length(labAn) != nbAn)
stop("labAn must have the same length than the list icaSets")
level <- match.arg(tolower(level), choices = c("features","genes","samples"))
switch(level,
features={ object="S"},
genes={object="SByGene"},
samples={object="A"}
)
type.corr <- match.arg(type.corr)
nbCompByAn <- lapply(icaSets, function(x) ncol(A(x)))
matlist <-
lapply(icaSets,
function(icaSet, object) {
data.frame(icaSet[object],check.names = FALSE, stringsAsFactors = FALSE)
}
, object = object
)
allpairs <- combn(x=1:nbAn,m = 2, simplify = FALSE)
pairnames <- unlist(lapply(allpairs, function(x,labAn) paste(labAn[x[1]],labAn[x[2]],sep="-"), labAn = labAn))
listCorPair <-
lapply(allpairs,
function(pa, matlist, labAn, cutoff_zval, type.corr) {
mat1 <- matlist[[pa[1]]]
mat2 <- matlist[[pa[2]]]
resCor <- cor2An(mat1, mat2, lab = c(labAn[pa[1]],labAn[pa[2]]), type.corr = type.corr, cutoff_zval = cutoff_zval)
}
, matlist = matlist
, labAn = labAn
, type.corr = type.corr
, cutoff_zval = if (missing(cutoff_zval)) NA else cutoff_zval
)
names(listCorPair) <- pairnames
return(listCorPair)
}
##' This function measures the correlation between two matrices containing the results of
##' two decompositions.
##'
##' Before computing the correlations, the components are scaled and restricted
##' to common row names.
##'
##' It must be taken into account by the user that if \code{cutoff_zval} is different
##' from NULL or zero, the computation will be slowler since each pair of
##' component is treated individually.
##'
##' When \code{cutoff_zval} is specified, for each
##' pair of components, genes that are included in the circle of center 0 and
##' radius \code{cutoff_zval} are excluded from the computation of the correlation
##' between the gene projection of the two components.
##' @title Correlation between two matrices
##' @param mat1 matrix of dimension features/genes x number of components, e.g
##' the results of an ICA decomposition
##' @param mat2 matrix of dimension features/genes x number of components, e.g
##' the results of an ICA decomposition
##' @param lab The vector of labels for mat1 and mat2, e.g the the names of the
##' two datasets on which were calculated the two decompositions
##' @param type.corr Type of correlation, either \code{'pearson'} or
##' \code{'spearman'}
##' @param cutoff_zval cutoff_zval: 0 (default) if all genes are
##' used to compute the correlation between the components, or a threshold to
##' compute the correlation on the genes that have at least a scaled projection
##' higher than cutoff_zval.
##' @return This function returns a list consisting of:
##' \item{cor}{a matrix of dimensions '(nbcomp1+nbcomp2) x (nbcomp1*nbcomp2)',
##' containing the correlation values between each pair of components,}
##' \item{pval}{ a matrix of dimension '(nbcomp1+nbcomp2) x (nbcomp1*nbcomp2)',
##' containing the p-value of the correlation tests for each
##' pair of components,}
##' \item{inter}{ the intersection between the features/genes of \code{mat1}
##' and \code{mat2},}
##' \item{labAn}{ the labels of the compared matrices.}
##' @author Anne Biton
##' @export
##' @examples
##' cor2An(mat1=matrix(rnorm(10000),nrow=1000,ncol=10), mat2=matrix(rnorm(10000),nrow=1000,ncol=10),
##' lab=c("An1","An2"), type.corr="pearson")
##'
##' @seealso \code{rcorr}, \code{cor.test}, \code{\link{compareAn}}
cor2An <- function (mat1,
mat2,
lab,
type.corr = c("pearson","spearman"),
cutoff_zval = 0
) {
if (is.null(cutoff_zval))
cutoff_zval <- 0
if (cutoff_zval < 0)
stop("'cutoff_zval' must be positive")
comp1 <- comp2 <- NULL
mat1 <- data.frame(scale(mat1), check.names = FALSE, stringsAsFactors = FALSE)
mat2 <- data.frame(scale(mat2), check.names = FALSE, stringsAsFactors = FALSE)
nbcomp1 <- ncol(mat1)
nbcomp2 <- ncol(mat2)
gg <- intersect(rownames(mat1),rownames(mat2))
if (length(gg)<10)
warning(paste("Less than 10 elements are common between the icaSet objects ", lab[1], " and ", lab[2], ", the comparison of these two decompositions should be reconsidered.", sep = ""))
mat1 <- mat1[gg,]
mat2 <- mat2[gg,]
if (missing(lab))
lab <- paste("an",c(1:2),sep = "")
if (cutoff_zval == 0) {
r <- signif(rcorr(as.matrix(mat1), as.matrix(mat2), type = type.corr)$r, digits = 5)
dimnames(r) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))
rpval <- NULL
rpval <- rcorr(as.matrix(mat1),as.matrix(mat2),type=type.corr)$P
dimnames(rpval) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))
}
else {
## transform matrix into lists
l1 <- lapply(as.list(mat1),
function(x,n) {
names(x) <- n
return(x)
}
, n = rownames(mat1)
)
l2 <- lapply(as.list(mat2),
function(x,n) {
names(x) <- n
return(x)
}
, n = rownames(mat2)
)
rpval <- matrix(ncol = nbcomp2+nbcomp1 , nrow =nbcomp2+nbcomp1)
r <- matrix(ncol = nbcomp2+nbcomp1 , nrow =nbcomp2+nbcomp1)
for (i in 1:nbcomp1) {
reslist <-
sapply(1:nbcomp2,
function(j, i, l1, l2, cutoff_zval, gg) {
dc <- data.frame(comp1=l1[[i]],comp2=l2[[j]])
rownames(dc) <- gg
g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval ))
inter <- gg[!(gg %in% g2del)]
res <- cor.test(as.matrix(mat1[inter,i]),as.matrix(mat2[inter,j]), method = type.corr)
return(list(cor=res$estimate,pval=res$p.value))
}
, i = i
, l1 = l1
, l2 = l2
, cutoff_zval = cutoff_zval
, gg = gg)
reslist <- as.data.frame(reslist)
r[i,(nbcomp1+1):(nbcomp1+nbcomp2)] <- r[(nbcomp1+1):(nbcomp1+nbcomp2),i] <- unlist(reslist["cor",])
rpval[i,(nbcomp1+1):(nbcomp1+nbcomp2)] <- rpval[(nbcomp1+1):(nbcomp1+nbcomp2),i] <- unlist(reslist["pval",])
for (j in 1:nbcomp1) {
dc <- data.frame(comp1=l1[[i]],comp2=l1[[j]])
rownames(dc) <- gg
g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval ))
inter <- names(l1[[1]])[!(names(l1[[1]]) %in% g2del)]
res <- cor.test(as.matrix(mat1[inter,i]),as.matrix(mat1[inter,j]), method = type.corr)
rpval[i,j] <- rpval[j,i] <- res$p.value
r[i,j] <- r[j,i] <- res$estimate
}
}
for (i in 1:nbcomp2) {
for (j in 1:nbcomp2) {
dc <- data.frame(comp1=l2[[i]],comp2=l2[[j]])
rownames(dc) <- gg
g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval ))
inter <- names(l2[[1]])[!(names(l2[[1]]) %in% g2del)]
res <- cor.test(as.matrix(mat2[inter,i]),as.matrix(mat2[inter,j]), method = type.corr)
rpval[nbcomp1+i,nbcomp1+j] <- rpval[nbcomp1+j,nbcomp1+i] <- res$p.value
r[nbcomp1+i,nbcomp1+j] <- r[nbcomp1+j,nbcomp1+i] <- res$estimate
}
}
dimnames(rpval) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))
dimnames(r) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""), paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))
}
return(list(cor=r,pval=rpval,inter = gg, nbComp = c(nbcomp1,nbcomp2), labAn = lab))
}
correl2Comp <- function (
### This function computes the correlation between two components.
comp1
### The first component, a vector of projections or contributions indexed by labels
,comp2
### The second component, a vector of projections or contributions indexed by labels
,type.corr = "pearson"
### name of the type of correlation to compute, either 'pearson' or 'spearman'
,plot = FALSE
### if TRUE the plot of comp1 vs comp2 is drawn
,cutoff_zval = 0
### either NULL or 0 (default) if all genes are used to compute the correlation between the components, or a threshold to compute the correlation on the genes that have at least a scaled projection higher than cutoff_zval.
,test = FALSE
### if TRUE the p-value of correlation is returned instead of the correlation value
,alreadyTreat = FALSE
### if TRUE comp1 and comp2 are considered as being already treated (i.e scaled and restricted to common elements)
) {
##details<< Before computing the correlation, the components are scaled and restricted to common labels.
## When \code{cutoff_zval} is different from 0, the elements that are included in the circle of center 0 and radius \code{cutoff_zval} are not taken into account during the computation of the correlation.
if(!alreadyTreat) {
comp1 <- (comp1-mean(comp1))/sd(comp1)
comp2 <- (comp2-mean(comp2))/sd(comp2)
gg <- intersect(names(comp1),names(comp2))
comp1 <- comp1[gg]
comp2 <- comp2[gg]
}
dc <- data.frame(comp1=comp1,comp2=comp2)
rownames(dc) <- gg
g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval ))
genes.intersect <- gg[!(gg %in% g2del)]
if (length(genes.intersect) < 4)
if (test) return(1) else return(0)
comp1.ord = comp1[genes.intersect]
comp2.ord = comp2[genes.intersect]
if (test) {
r = cor.test(comp1.ord,comp2.ord,method = type.corr, alternative = "two.sided")$p.value
}
else {
r = cor(comp1.ord,comp2.ord,method = type.corr)
r = signif(r, digits = 3)
}
if (plot) plot(comp1.ord,comp2.ord,xlab = paste("comp1"),ylab=paste("comp2"),pch = 16,sub = paste("corr_",type.corr," = ",r,sep=""))
return(r)
### This function returns either the correlation value or the p-value of the correlation test.
}
##' This function builds a data.frame describing for each node of the graph
##' its ID and which analysis/data it comes from.
##'
##' The created file is used in Cytoscape.
##' @title Generate node attributes
##' @param nbAn Number of analyses being considered, i.e number of IcaSet
##' objects
##' @param nbComp Number of components by analysis, if of length 1 then
##' it is assumed that each analysis has the same number of components.
##' @param labAn Labels of the analysis, if missing it will be generated
##' as an1, an2, ...
##' @param labComp List containing the component labels indexed by analysis, if missing
##' will be generated as comp1, comp2, ...
##' @param file File where the description of the node attributes will be
##' written
##' @return A data.frame describing each node/component
##' @export
##' @examples
##' ## 4 datasets, 20 components calculated in each dataset, labAn
##' nodeAttrs(nbAn=4, nbComp=20, labAn=c("tutu","titi","toto","tata"))
##'
##' @author Anne Biton
nodeAttrs <- function(nbAn,
nbComp,
labAn,
labComp,
file
) {
nb <- an <- lab <- comp <- NULL
if (missing(labAn))
labAn <- paste(rep("an",nbAn),1:nbAn,sep = "")
else if (length(labAn) != nbAn)
stop("The length of 'labAn' must equal 'nbAn'.")
if (missing(labComp)) {
if (length(nbComp) == 1) {
labid <-
paste(
rep(paste(rep("comp",nbComp),c(1:nbComp),sep = ""),nbAn),
paste(rep("an",nbComp*nbAn),sapply(c(1:nbAn),rep,nbComp),sep=""),
sep = "_"
)
labComp <-rep(paste(rep("comp",nbComp),c(1:nbComp),sep = ""),nbAn)
nbComp <- rep(nbComp,nbAn)
}
else {
if (length(nbComp) != nbAn)
stop("The provided number of components must equal the number of analyses")
labid <- foreach(nb = nbComp, an = labAn) %do% {paste(paste(rep("comp",nb),c(1:nb),sep = ""),rep(an,nb),sep="_")}
labid <- unlist(labid)
labComp <- unlist(foreach(nb = nbComp, an = 1:nbAn) %do% {paste(rep("comp",sum(nb)),c(1:nb),sep = "")})
}
}
else {
if (!is.list(labComp))
stop("The component labels must be provided using a list.")
if (length(labComp) != nbAn)
stop("The length of the list containing the component labels 'labComp' must have the same length than the list containing the analysis labels 'labAn'.")
names(labComp) <- names(labAn)
foreach(lab = labComp, nb = nbComp) %do% {
if (length(lab) != nb)
stop("The length of the list 'labComp' containing component labels does not map to 'nbComp'.")
}
labid <- unlist(foreach(nb = nbComp, an = labAn, comp = labComp) %do% {paste(comp,rep(an,nb),sep="_")})
labComp <- unlist(labComp)
}
compnb <- unlist(lapply(nbComp,function(x) c(1:x)))
names(nbComp) <- labAn
an <- unlist(lapply(names(nbComp), function(x,nb) rep(x,nb[x]), nb = nbComp))
dataAttr <- data.frame(id = labid, labComp = labComp, indComp = compnb, labAn = an, stringsAsFactors = FALSE, check.names = FALSE)
if (!missing(file))
if (!is.null(file))
write.table(dataAttr,file=file,sep = " ", row.names = FALSE, quote = FALSE)
return(dataAttr)
}
##' compareAn2graphfile
##'
##' This function builds a correlation graph from the outputs of function \code{\link{compareAn}}.
##'
##' When correlations are considered (\code{useVal}="cor"), absolute values
##' are used since the components have no direction.
##'
##' If \code{useMax} is \code{TRUE} each component is linked to the most correlated component of
##' each different \code{IcaSet}.
##'
##' If \code{cutoff} is specified, only
##' correlations exceeding this value are taken into account during the graph construction.
##' For example, if \code{cutoff} is 1, only relationships between components
##' that correspond to a correlation value larger than 1 will be included.
##'
##' When \code{useVal="pval"} and \code{useMax=TRUE}, the minimum value is taken
##' instead of the maximum.
##'
##' @param listPairCor The output of the function \code{\link{compareAn}}, containing the
##' correlation between several pairs of objects of class \code{\link{IcaSet}}.
##' @param useMax If TRUE, the graph is restricted to edges that correspond to
##' maximum score, see details
##' @param cutoff Cutoff used to select pairs that will be included in the
##' graph.
##' @param useVal The value on which is based the graph, either \code{"cor"} for
##' correlation or \code{"pval"} for p-values of correlation tests.
##' @param file File name.
##' @return A data.frame with the graph description, has
##' two columns \code{n1} and \code{n2} filled with node IDs, each row denotes that there is an edge from \code{n1} to \code{n2}. Additional columns quantify the strength of association: correlation (\code{cor}), p-value (\code{pval}), (\code{1-abs(cor)}) (\code{distcor}), log10-pvalue (\code{logpval}).
##' @author Anne Biton
##' @seealso \code{\link{compareAn}}, \code{\link{cor2An}}
##' @export
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##'
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##'
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet object
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##' dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##' dat=dat2, alreadyAnnot=TRUE)$icaSet
##'
##' resCompareAn <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"),
##' type.corr="pearson", level="genes", cutoff_zval=0)
##'
##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"),
##' compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt")
##'
##'
##' \dontrun{
##' #### Comparison of 2 ICA decompositions obtained on 2 different gene expression datasets.
##' ## load the two datasets
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##'
##' ## Define a function used to build two examples of IcaSet objects
##' treat <- function(es, annot="hgu133a.db") {
##' es <- selectFeatures_IQR(es,10000)
##' exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##' colnames(exprs(es)) <- sampleNames(es)
##' resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##' resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##' dat=exprs(es), pData=pData(es), refSamples=character(0),
##' annotation=annot, typeID= typeIDmainz,
##' chipManu = "affymetrix", mart=mart)
##' icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##'
##' ## Compute correlation between every pair of IcaSet objects.
##' resCompareAn <- compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes", cutoff_zval=0)
##'
##' ## Same thing but adding a selection of genes on which the correlation between two components is computed:
##' # when considering pairs of components, only projections whose scaled values are not located within
##' # the circle of radius 1 are used to compute the correlation (cutoff_zval=1).
##' resCompareAn <- compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", cutoff_zval=1, level="genes")
##'
##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"),
##' ## i.e, component A of analysis i is linked to component B of analysis j,
##' ## only if component B is the most correlated component to A amongst all component of analysis j.
##' compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt")
##'
##' ## Restrict the graph to correlation values exceeding 0.4
##' compareAn2graphfile(listPairCor=resCompareAn, useMax=FALSE, cutoff=0.4, useVal="cor", file="myGraph.txt")
##'
##' }
compareAn2graphfile <- function (listPairCor,
useMax = TRUE,
cutoff = NULL,
useVal = c("cor","pval"),
file = NULL
) {
useVal <- match.arg(useVal)
dataGraphs <-
lapply(listPairCor,
function(res, useMax, cutoff, useVal) {
dd <- abs(res[[useVal]])
nbcomp1 <- res$nbComp[1]
nbcomp2 <- res$nbComp[2]
attrGraph <- c("cor",if ("pval" %in% names(res)) "pval")
if (useMax) {
if (useVal == "pval")
op <- "which.min"
else
op <- "which.max"
## find correlation max both direction an1 <-> an2 since the max is not necessarily reciprocal
subdd <- dd[(nbcomp1+1):(nbcomp1+nbcomp2), 1:nbcomp1]
ind <- apply(subdd,2,op)
coord <- as.list(data.frame(t(cbind(ind,1:nbcomp1))))
dataGraph <- data.frame(n1=rownames(dd)[1:nbcomp1],
n2=rownames(dd)[(nbcomp1+1):(nbcomp1+nbcomp2)][ind])
dataGraph[[useVal]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = subdd))
addval <- setdiff(attrGraph,useVal)
if (length(addval)==1)
dataGraph[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][(nbcomp1+1):(nbcomp1+nbcomp2), 1:nbcomp1]))
## an2 -> an1
subdd <- dd[1:nbcomp1,(nbcomp1+1):(nbcomp1+nbcomp2)]
ind <- apply(subdd,2,op)
coord <- as.list(data.frame(t(cbind(ind,1:nbcomp2))))
dataGraph2 <- data.frame(n1=rownames(dd)[(nbcomp1+1):(nbcomp1+nbcomp2)],
n2=rownames(dd)[1:nbcomp1][ind])
dataGraph2[[useVal]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = subdd))
addval <- setdiff(attrGraph,useVal)
if (length(addval)==1)
dataGraph2[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][1:nbcomp1,(nbcomp1+1):(nbcomp1+nbcomp2)]))
dataGraph <- rbind(dataGraph,dataGraph2)
pval <- NULL
if (!is.null(cutoff)) {
if (useVal == "pval")
dataGraph <- subset(dataGraph, pval <= cutoff)
else
dataGraph <- subset(dataGraph, cor >= cutoff)
}
}
else if (!is.null(cutoff)) {
subdd <- dd[1:nbcomp1, (nbcomp1+1):(nbcomp1+nbcomp2)]
dataGraph <-
lapply(as.list(colnames(subdd)),
function(nn, cutoff, subdd, useVal, attrGraph, res,op, subInd) {
x <- subdd[,nn]
ind <- which(eval(parse(text=paste("x",op,"cutoff"))))
if (length(ind) > 0) {
dataGraph <- data.frame(n1 = rep(nn,length(ind)), n2 = rownames(subdd)[ind])
dataGraph[[useVal]] <- subdd[ind,nn]
addval <- setdiff(attrGraph,useVal)
if (length(addval)==1) {
coord <- as.list(data.frame(t(cbind(dataGraph$n1,dataGraph$n2))))
dataGraph[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][subInd[[1]],subInd[[2]]]))
}
return(dataGraph)
}
}
, cutoff = cutoff
, subdd = subdd
, useVal = useVal
, attrGraph = attrGraph
, res = res
, op = if (useVal == "pval") "<=" else ">="
, subInd = list(1:nbcomp1, (nbcomp1+1):(nbcomp1+nbcomp2))
)
dataGraph <- do.call(rbind,dataGraph)
}
return(dataGraph)
}
, useMax = useMax
, cutoff = if (missing(cutoff)) NULL else cutoff
, useVal = useVal
)
dataGraphs <- do.call(rbind,dataGraphs)
dataGraphs$invcor = 1/(10*as.numeric(dataGraphs$cor))
dataGraphs$distcor = 1-abs(as.numeric(dataGraphs$cor))
dataGraphs[,c("cor","pval","invcor","distcor")] <- apply(dataGraphs[,c("cor","pval","invcor","distcor")],2,as.numeric)
dataGraphs <- annotReciprocal(dataGraph = dataGraphs, keepOnlyReciprocal = FALSE)# file
dataGraphs[,c("cor","pval","invcor","distcor")] <- apply(dataGraphs[,c("cor","pval","invcor","distcor")],2,as.numeric)
if (!missing(file))
if (!is.null(file))
write.table(dataGraphs, file = file, row.names = FALSE, quote = FALSE, sep = "\t")
return(dataGraphs)
}
##' annotReciprocal
##'
##' This function notes edges of a graph as reciprocal or not.
##'
##'
##' @param dataGraph data.frame which contains the graph description, must have
##' two columns \code{n1} and \code{n2} filled with node IDs, each row denoting there is an edge from \code{n1} to \code{n2}.
##' @param file file where the graph description is written
##' @param keepOnlyReciprocal if TRUE \code{dataGraph} is restricted to
##' reciprocal edges, else all edges are kept (default).
##' @return This function returns the argument \code{dataGraph} with an
##' additional column named 'reciprocal' which contains TRUE if the edge
##' described by the row is reciprocal, and FALSE if it is not reciprocal.
##' @examples
##' dg <- data.frame(n1=c("A","B","B","C","C","D","E","F"),n2=c("B","G","A","B","D","C","F","E"))
##' annotReciprocal(dataGraph=dg)
##'
##' @export
##' @author Anne Biton
annotReciprocal <- function (dataGraph,
file,
keepOnlyReciprocal = FALSE
) {
names(dataGraph)[1:2] <- c("n1","n2")
dataGraph$keep <- rep(NA, length=nrow(dataGraph))
dataGraph_keep <- apply( dataGraph, MARGIN = 1 ,
function (r, dataGraph) {
dataN2 = subset(dataGraph, dataGraph$n1 == r["n2"])
if (r["n1"] %in% dataN2$n2) r["keep"] = TRUE
else r["keep"] = FALSE
return(r)
}
, dataGraph = dataGraph
)
dataGraph_keep <- as.data.frame(t(dataGraph_keep), check.names = FALSE, stringsAsFactors = FALSE)
names(dataGraph_keep)[ncol(dataGraph_keep)] <- "reciprocal"
if (keepOnlyReciprocal) {
dataGraph_keep<- subset(dataGraph_keep, dataGraph_keep$reciprocal == TRUE )
dataGraph_keep <- dataGraph_keep[,-ncol(dataGraph_keep)]
}
if (!missing(file))
if(!is.null(file))
write.table(dataGraph_keep, file = file, sep = "\t", quote = FALSE, row.names = FALSE)
return(dataGraph_keep)
}
##' This function plots the
##' correlation graph in an interactive device using function \code{tkplot}.
##'
##' You have to slighly move the nodes to see cliques because strongly related nodes are often superimposed.
##' The \code{edgeWeight} column is used to weight the edges within the
##' fruchterman.reingold layout available in the package \code{igraph}.
##'
##' The argument
##' \code{nodeCol} typically denotes the column containing the names of the datasets.
##' Colors are automatically
##' attributed to the nodes using palette Set3 of package \code{RColorBrewer}. The
##' corresponding colors can be directly specified in the 'col' argument. In
##' that case, 'col' must be a vector of colors indexed by the unique elements
##' contained in \code{nodeCol} column (e.g dataset ids).
##'
##' As for colors, one can define
##' the column of \code{nodeAttrs} that is used to define the node shapes. The
##' corresponding shapes can be directly specified in the \code{shape} argument. In
##' that case, \code{shape} must be one of \code{c("circle","square", " vcsquare", "rectangle", "crectangle", "vrectangle")} and must be
##' indexed by the unique elements of \code{nodeShape} column.
##'
##' Unfortunately, shapes
##' can't be taken into account when tkplot is TRUE (interactive plot).
##'
##' If \code{reciproCol} is not missing, it is used to color the edges, either in grey if the
##' edge is not reciprocal or in black if the edge is reciprocal.
##'
##'
##' @title Plots graph using
##' @param dataGraph A data.frame containing the graph description. It must
##' have two columns \code{n1} and \code{n2}, each row denoting that there is an edge from n1
##' to n2. Node labels in columns \code{n1} and \code{n2} of \code{dataGraph} must correspond to
##' node IDs in column \code{id} of \code{nodeAttrs}.
##' @param edgeWeight The column of dataGraph used to weight edges.
##' @param nodeAttrs A data.frame with node description, see function \code{nodeAttrs}.
##' @param nodeShape Denotes the column of \code{nodeAttrs} used to attribute the node shapes.
##' @param nodeCol Denotes the column of \code{nodeAttrs} used to
##' color the nodes in the graph.
##' @param nodeName Denotes the column of \code{nodeAttrs} used
##' as labels for the nodes in the graph.
##' @param col A vector of colors, for the nodes, indexed by the unique elements of \code{nodeCol}
##' column from \code{nodeAttrs}. If missing, colors will be automatically attributed.
##' @param shape A vector of shapes indexed by the unique elements of
##' column \code{nodeShape} from \code{nodeAttrs}. If missing, shapes will be automatically
##' attributed.
##' @param title Title for the plot
##' @param reciproCol Denotes the column of \code{dataGraph} containing \code{TRUE} if the
##' row defines a reciprocal node, else \code{FALSE}. See \code{\link{annotReciprocal}}.
##' @param tkplot If TRUE, performs interactive plot with function \code{tkplot}, else uses \code{plot.igraph}.
##' @param \dots Additional parameters as required by \code{tkplot}.
##' @return A list consisting of \describe{
##' \item{dataGraph}{a data.frame defining the correlation
##' graph}
##' \item{nodeAttrs}{a data.frame describing the node
##' of the graph}
##' \item{graph}{the graph as an object of class \code{igraph}}
##' \item{graphid}{the id of the graph plotted using \code{tkplot}} }
##' @export
##' @author Anne Biton
##' @seealso \code{\link{compareAn}}, \code{\link{nodeAttrs}}, \code{\link{compareAn2graphfile}}, \code{\link{runCompareIcaSets}}
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##'
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##'
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet object
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##' dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##' dat=dat2, alreadyAnnot=TRUE)$icaSet
##' icaSets <- list(icaSettoy1, icaSettoy2)
##'
##' resCompareAn <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"),
##' type.corr="pearson", level="genes", cutoff_zval=0)
##'
##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"),
##' dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt")
##'
##' ## construction of the data.frame with the node description
##' nbComp <- rep(3,2) #each IcaSet contains 3 components
##' nbAn <- 2 # we are comparing 2 IcaSets
##' # labels of components created as comp*i*
##' labComp <- foreach(icaSet=icaSets, nb=nbComp, an=1:nbAn) %do% {
##' paste(rep("comp",sum(nb)),1:nbComp(icaSet),sep = "")}
##'
##' # creation of the data.frame with the node description
##' nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp,
##' labAn = c("toy1","toy2"), file = "nodeInfo.txt")
##'
##' ## Plot correlation graph, slightly move the attached nodes to make the cliques visible
##' ## use tkplot=TRUE to have an interactive graph
##' res <- plotCorGraph(title = "Compare toy 1 and 2", dataGraph = dataGraph, nodeName = "indComp", tkplot = FALSE,
##' nodeAttrs = nodeDescr, edgeWeight = "cor", nodeShape = "labAn", reciproCol = "reciprocal")
##'
##'
##' \dontrun{
##' ## load two microarray datasets
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##'
##' ## Define a function used to build two examples of IcaSet objects
##' treat <- function(es, annot="hgu133a.db") {
##' es <- selectFeatures_IQR(es,10000)
##' exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##' colnames(exprs(es)) <- sampleNames(es)
##' resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##' resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##' dat=exprs(es), pData=pData(es), refSamples=character(0),
##' annotation=annot, typeID= typeIDmainz,
##' chipManu = "affymetrix", mart=mart)
##' icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##'
##' icaSets <- list(icaSetMainz, icaSetVdx)
##' labAn <- c("Mainz", "Vdx")
##'
##' ## correlations between gene projections of each pair of IcaSet
##' resCompareAn <- compareAn(icaSets = icaSets, level = "genes", type.corr= "pearson",
##' labAn = labAn, cutoff_zval=0)
##'
##' ## construction of the correlation graph using previous output
##' dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, file="corGraph.txt")
##'
##' ## construction of the data.frame with the node description
##' nbComp <- rep(10,2) #each IcaSet contains 10 components
##' nbAn <- 2 # we are comparing 2 IcaSets
##' # labels of components created as comp*i*
##' labComp <- foreach(icaSet=icaSets, nb=nbComp, an=1:nbAn) %do% {
##' paste(rep("comp",sum(nb)),1:nbComp(icaSet),sep = "")}
##'
##' # creation of the data.frame with the node description
##' nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp,
##' labAn = labAn, file = "nodeInfo.txt")
##'
##' ## Plot correlation graph, slightly move the attached nodes to make the cliques visible
##' res <- plotCorGraph(title = "Compare two ICA decomsitions obtained on \n two
##' microarray-based data of breast tumors", dataGraph = dataGraph, nodeName = "indComp",
##' nodeAttrs = nodeDescr, edgeWeight = "cor", nodeShape = "labAn", reciproCol = "reciprocal")
##'
##' }
plotCorGraph <- function(
dataGraph,
edgeWeight = "cor",
nodeAttrs,
nodeShape,
nodeCol = "labAn",
nodeName = "indComp",
col,
shape,
title = "",
reciproCol = "reciprocal",
tkplot = FALSE,
...
) {
if (!(edgeWeight %in% colnames(dataGraph))) {
stop (paste(edgeWeight,"is not available within the columns of dataGraph."))
}
nodeName <- match.arg(nodeName, choices = colnames(nodeAttrs))
if (missing(nodeShape))
nodeAttrs$shape <- rep("circle",nrow(nodeAttrs))
else if (!(nodeShape %in% colnames(nodeAttrs))) {
nodeAttrs$shape <- rep("circle",nrow(nodeAttrs))
warning(paste("The column ", nodeShape, " is not available in 'dataGraph'.",sep = ""))
}
else {
potShapes <- c("circle","square", "csquare", "rectangle", "crectangle", "vrectangle")
autoShape <- TRUE
if (!missing(shape)) {
autoShape <- FALSE
if (length(intersect(names(shape),unique(nodeAttrs[[nodeShape]]))) != length(unique(nodeAttrs[[nodeShape]]))) {
warning("'shape' argument is incorrect, some elements of nodeAttrs$'nodeShape' are not indexed. Shapes will be attributed automatically.")
autoShape <- TRUE
}
if (length(setdiff(shape,potShapes))>0) {
warning("'shape' argument is incorrect, the available shapes are: 'circle','square', 'csquare', 'rectangle', 'crectangle', and 'vrectangle'. Shapes will be attributed automatically.")
autoShape <- TRUE
}
}
if (autoShape) {
levs <- unique(nodeAttrs[[nodeShape]])
if (length(levs) > length(potShapes)) {
warning("R graphs can only handle 6 node shapes at the most.")
shape <- rep(potShapes,6)[1:length(levs)]
}
else
shape <- potShapes[1:length(levs)]
names(shape) <- levs
nodeAttrs$shape <- shape[nodeAttrs[[nodeShape]]]
}
else
nodeAttrs$shape <- shape[nodeAttrs[[nodeShape]]]
}
if (!(nodeCol %in% colnames(nodeAttrs))) {
stop(paste("The column ",nodeCol, " is not available in nodeAttrs.",sep=""))
}
else {
nbAn <- length(unique(nodeAttrs[[nodeCol]]))
if (!missing(col) && !is.null(col)) {
if (length(intersect(names(col),unique(nodeAttrs[[nodeCol]]))) != length(unique(nodeAttrs[[nodeCol]]))) {
warning("'col' argument is incorrect, some elements of nodeAttrs$'nodeCol' are not indexed. Colors will be attributed automatically.")
colAn <- brewer.pal(nbAn,"Set3")
names(colAn) <- unique(nodeAttrs[[nodeCol]])
nodeAttrs$col = colAn[nodeAttrs[[nodeCol]]]
}
else
nodeAttrs$col <- col[nodeAttrs[[nodeCol]]]
}
else {
colAn <- brewer.pal(nbAn,"Set3")
names(colAn) <- unique(nodeAttrs[[nodeCol]])
nodeAttrs$col = colAn[nodeAttrs[[nodeCol]]]
}
}
n1 <- NULL
nodes <- as.character(unique(as.character(dataGraph$n1)))
edges <- llply(nodes,
function(n,data,edgeWeight) {
ll <- list(edges = as.character(subset(data, n1 == n)$n2), weights = subset(data, n1 == n)[[edgeWeight]])
},
data = dataGraph, edgeWeight = edgeWeight)
names(edges) <- nodes
g <- new("graphNEL", nodes = nodes, edgeL = edges, edgemode = "directed")
# build igraph object from graphNEL object
ig <- igraph.from.graphNEL(g, name = TRUE, weight = TRUE)
V(ig)$color <- nodeAttrs$col[match(V(ig)$name,nodeAttrs$id)]
V(ig)$shape <- nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)]
E(ig)$width <- 10^E(ig)$weight
indHighCor <- which(abs(E(ig)$weight)>0.4)
indLowCor <- which(abs(E(ig)$weight)<=0.4)
E(ig)$weight <- 1/abs(log2(abs(E(ig)$weight)))
E(ig)$weight[indHighCor] <- E(ig)$weight[indHighCor]+6
#E(ig)$weight[indLowCor] <- E(ig)$weight[indLowCor]-0.25
if (!missing(reciproCol)) {
if (reciproCol %in% colnames(dataGraph)) {
nodes <- as.character(unique(as.character(dataGraph$n1)))
colEdges <- llply(nodes,
function(n,data,reciproCol) {
c('TRUE' = "black", 'FALSE' = "gray50")[as.character(subset(data, n1 == n)[[reciproCol]])]
},
data = dataGraph, reciproCol = reciproCol)
colEdges <- unlist(colEdges)
E(ig)$color <- colEdges
}
else {
warning(paste("No column of 'dataGraph' has the name",reciproCol))
E(ig)$color <- "black"
}
}
else
E(ig)$color <- "black"
lay <- layout.fruchterman.reingold(ig,niter=500,area=vcount(ig)^2.3,repulserad=vcount(ig)^100, weights = E(ig)$weight)
graph <- ig
if (!capabilities()[["X11"]])
tkplot <- FALSE
if (tkplot) {
if(capabilities()[["X11"]] & tolower(Sys.info()["sysname"])!="windows") {
dimScreen <- system("xdpyinfo | grep dimensions", intern = TRUE)
dimScreen <- as.numeric(strsplit(gsub(gsub(gsub(dimScreen, pattern = " ", replacement = ""), pattern = "[A-Za-z0-9]*:", replacement = ""), pattern = "pixels\\([A-Za-z0-9]*\\)", replacement = ""), split = "x")[[1]])
}
else
dimScreen <- c(450,450)
graphid <- tkplot(ig,
layout = lay,
vertex.label = nodeAttrs[[nodeName]][match(V(ig)$name,nodeAttrs$id)],
vertex.shape = nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)],
canvas.width=dimScreen[1], canvas.height=dimScreen[2],
...)
tkplot.fit.to.screen(graphid)
}
else {
graph <- plot(ig,
layout = lay,
vertex.label = nodeAttrs[[nodeName]][match(V(ig)$name,nodeAttrs$id)],
vertex.shape = nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)],
main = title,
...
)
graph <- ig
graphid = ""
}
return(list(dataGraph=dataGraph, nodeAttrs = nodeAttrs, graph = graph, graphid=graphid))
}
##' runCompareIcaSets
##'
##' This function encompasses the comparison of several IcaSet objects using correlations
##' and the plot of the corresponding correlation graph.
##' The IcaSet objects are compared by calculating the correlation between either
##' projection values of common features or genes, or contributions of common
##' samples.
##'
##' This function calls four functions: \code{\link{compareAn}} which computes the
##' correlations, \code{\link{compareAn2graphfile}} which builds the graph,
##' \code{\link{nodeAttrs}} which builds the node description data, and \code{\link{plotCorGraph}}
##' which uses tkplot to plot the graph in an interactive device.
##'
##' If the user wants to see the correlation graph in Cytoscape, he must fill
##' the arguments \code{fileDataGraph} and \code{fileNodeDescr}, in order to
##' import the graph and its node descriptions as a .txt file in Cytoscape.
##'
##' When \code{labAn} is missing, each element i of \code{icaSets} is labeled as
##' 'Ani'.
##'
##' The user must carefully choose the data level used in the comparison:
##' If \code{level='samples'}, the correlations are based on the mixing
##' matrices of the ICA decompositions (of dimension samples x components). \code{'A'}
##' will be typically chosen when the ICA decompositions were computed on the
##' same dataset, or on datasets that include the same samples.
##' If \code{level='features'} is
##' chosen, the correlation is calculated between the source matrices (of dimension
##' features x components) of the ICA decompositions. \code{'S'} will be typically
##' used when the ICA decompositions share common features (e.g same microarrays).
##' If \code{level='genes'}, the correlations are calculated
##' on the attributes \code{'SByGene'} which store the
##' projections of the annotated features. \code{'SByGene'} will be typically chosen
##' when ICA were computed on datasets from different technologies, for which
##' comparison is possible only after annotation into a common ID, like genes.
##'
##' \code{cutoff_zval}
##' is only used when \code{level} is one of \code{c('features','genes')}, in
##' order to restrict the correlation to the contributing features or genes.
##'
##' When \code{cutoff_zval} is specified, for each pair of components, genes or features that are
##' included in the circle of center 0 and radius \code{cutoff_zval} are excluded from
##' the computation of the correlation.
##'
##' It must be taken into account by the user that if cutoff_zval
##' is different from NULL or zero, the computation will be much slowler since
##' each pair of component is treated individually.
##'
##' Edges of the graph are built based on the correlation values between the
##' components. Absolute values of correlations are used since
##' components have no direction.
##'
##' If \code{useMax} is \code{TRUE} each component will be linked to only one component of
##' each other IcaSet that corresponds to the most correlated component among
##' all components of the same IcaSet. If \code{cutoff_graph} is specified, only
##' correlations exceeding this value are taken into account to build the graph.
##' For example, if \code{cutoff} is 1, only relationships between components
##' that correspond to a correlation value higher than 1 will be included.
##' Absolute correlation values are used since the components have no direction.
##'
##' The contents of the returned list are \describe{
##' \item{dataGraph:}{\code{dataGraph} data.frame that describes the correlation
##' graph,} \item{nodeAttrs:}{\code{nodeAttrs} data.frame that describes the node
##' of the graph} \item{graph}{\code{graph} the graph as an igraph-object,}
##' \item{graphid:}{\code{graphid} the id of the graph plotted using tkplot.} }
##'
##' @param icaSets List of \code{\link{IcaSet}} objects, e.g results of ICA decompositions
##' obtained on several datasets.
##' @param labAn Vector of names for each icaSet, e.g the the names of the
##' datasets on which were calculated the decompositions.
##' @param type.corr Type of correlation to compute, either
##' \code{'pearson'} or \code{'spearman'}.
##' @param cutoff_zval Either NULL or 0 (default) if all genes are used to
##' compute the correlation between the components, or a threshold to compute
##' the correlation using the genes that have at least a scaled projection higher
##' than cutoff_zval. Will be used only when \code{level} is one of \code{c("features","genes")}.
##' @param level Data level of the \code{IcaSet} objects on which is applied the correlation.
##' It must correspond to a data level shared by the IcaSet objects:
##' \code{'samples'} if they were applied to common samples (correlations are computed between matrix
##' \code{A}), \code{'features'} if they were applied to common features (correlations are computed between matrix \code{S}), \code{'genes'}
##' if they share gene IDs after
##' annotation into genes (correlations are computed between matrix \code{SByGene}).
##' @param fileNodeDescr File where node descriptions are saved (useful when the
##' user wants to visualize the graph using Cytoscape).
##' @param fileDataGraph File where graph description is saved (useful when the
##' user wants to visualize the graph using Cytoscape).
##' @param plot if \code{TRUE} (default) plot the correlation graph
##' @param title title of the graph
##'
##' @param col vector of colors indexed by elements of labAn; if missing, colors
##' will be automatically attributed
##' @param cutoff_graph the cutoff used to select pairs that will be included in
##' the graph
##' @param useMax if \code{TRUE}, the graph is restricted to edges that correspond to
##' maximum correlation between components, see details
##' @param tkplot If TRUE, performs interactive plot with function \code{tkplot}, else uses \code{plot.igraph}
##' @return A list consisting of \describe{
##' \item{dataGraph:}{a data.frame defining the correlation
##' graph}
##' \item{nodeAttrs:}{a data.frame describing the node
##' of the graph,}
##' \item{graph:}{the graph as an object of class \code{igraph},}
##' \item{graphid}{the id of the graph plotted with \code{tkplot}}. }
##' @export
##' @author Anne Biton
##' @seealso \code{\link{compareAn2graphfile}}, \code{\link{compareAn}}, \code{\link{cor2An}}, \code{\link{plotCorGraph}}
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##'
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##'
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet objects
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##' dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##' dat=dat2, alreadyAnnot=TRUE)$icaSet
##'
##' ## compare IcaSet objects
##' ## use tkplot=TRUE to get an interactive graph
##' rescomp <- runCompareIcaSets(icaSets=list(icaSettoy1, icaSettoy2), labAn=c("toy1","toy2"),
##' type.corr="pearson", level="genes", tkplot=FALSE)
##'
##'
##' \dontrun{
##' ## load the microarray-based gene expression datasets
##' ## of breast tumors
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##'
##' ## Define a function used to build two examples of IcaSet objects
##' ## and annotate the probe sets into gene Symbols
##' treat <- function(es, annot="hgu133a.db") {
##' es <- selectFeatures_IQR(es,10000)
##' exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##' colnames(exprs(es)) <- sampleNames(es)
##' resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##' resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##' dat=exprs(es), pData=pData(es), refSamples=character(0),
##' annotation=annot, typeID= typeIDmainz,
##' chipManu = "affymetrix", mart=mart)
##' icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##'
##' ## compare the IcaSets
##' runCompareIcaSets(icaSets=list(icaSetMainz, icaSetVdx), labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes")
##' }
runCompareIcaSets <- function(icaSets,
labAn,
type.corr = c("pearson","spearman"),
cutoff_zval=0,
level = c("genes","features","samples"),
fileNodeDescr = NULL,
fileDataGraph = NULL,
plot = TRUE,
title = "",
col,
cutoff_graph = NULL,
useMax = TRUE,
tkplot = FALSE
) {
nb <- NULL
if (missing(labAn))
labAn <- paste("An",1:length(icaSets))
else
if (length(labAn) != length(icaSets))
stop("Length of 'labAn' is different from length of 'icaSets'.")
if (!missing(col))
if (length(col) != length(icaSets))
stop("Length of 'col' is different from length of 'icaSets'.")
else
if (is.null(names(col)) | length(intersect(names(col), labAn)) != length(labAn))
names(col) <- labAn
type.corr <- type.corr[1]
resCompareAn <- compareAn(icaSets = icaSets, level = level,type.corr= type.corr, labAn = labAn, cutoff_zval=cutoff_zval)
dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax = useMax, file = fileDataGraph, cutoff = cutoff_graph)
nbComp <- unlist(lapply(icaSets,function(x) length(indComp(x))))
nbAn <- length(icaSets)
icaSet <- NULL
labComp <- (foreach(icaSet = icaSets, nb = nbComp, an = 1:nbAn) %do% {paste(rep("comp",sum(nb)),indComp(icaSet),sep = "")})
nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp, labAn = labAn, file = fileNodeDescr)
if (plot) {
res <- plotCorGraph(title = title, dataGraph = dataGraph, nodeName = "indComp", nodeAttrs = nodeDescr, edgeWeight = "cor", col = if(missing(col)) NULL else col, nodeShape = "labAn", reciproCol = "reciprocal", tkplot = tkplot)
return(list(dataGraph=res$dataGraph,
nodeAttrs=res$nodeAttrs,
graph=res$graph,
graphid=res$graphid
))
}
else
return(list(dataGraph=dataGraph, nodeAttrs = nodeAttrs))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.