R/compareAnalysis.R

Defines functions runCompareIcaSets plotCorGraph nodeAttrs

Documented in nodeAttrs plotCorGraph runCompareIcaSets

##' Compare \code{\link{IcaSet}} objects by computing the correlation between either
##' projection values of common features or genes, or contributions of common
##' samples.
##' 
##' The user must carefully choose the object on which the correlation will be
##' computed.
##' If \code{level='samples'}, the correlations are based on the mixing
##' matrices of the ICA decompositions (of dimension samples x components). \code{'A'}
##' will be typically chosen when the ICA decompositions were computed on the
##' same dataset, or on datasets that include the same samples.
##' If \code{level='features'} is
##' chosen, the correlation is calculated between the source matrices (of dimension
##' features x components) of the ICA decompositions. \code{'S'} will be typically
##' used when the ICA decompositions share common features (e.g same microarrays).
##' If \code{level='genes'}, the correlations are calculated
##' on the attributes \code{'SByGene'} which store the 
##' projections of the annotated features. \code{'SByGene'} will be typically chosen
##' when ICA were computed on datasets from different technologies, for which
##' comparison is possible only after annotation into a common ID, like genes.
##' 
##' \code{cutoff_zval} is only used when \code{level} is one of \code{c('genes','features')}, in
##' order to restrict the correlation to the contributing features or genes.
##' 
##' When \code{cutoff_zval} is specified, for each pair of components, genes or features that are
##' included in the circle of center 0 and radius \code{cutoff_zval} are excluded from
##' the computation of the correlation.
##'
##' It must be taken into account by the user that if
##' \code{cutoff_zval} is different from \code{NULL} or \code{0}, the computation will be much
##' slowler since each pair of component is treated individually.
##'
##' @title Comparison of IcaSet objects using correlation 
##' @param icaSets list of IcaSet objects, e.g results of ICA decompositions
##' obtained on several datasets.
##' @param labAn vector of names for each icaSet, e.g the the names of the
##' datasets on which were calculated the decompositions.
##' @param type.corr Type of correlation to compute, either
##' \code{'pearson'} or \code{'spearman'}.
##' @param cutoff_zval either NULL or 0 (default) if all genes are used to
##' compute the correlation between the components, or a threshold to compute
##' the correlation on the genes that have at least a scaled projection higher
##' than cutoff_zval. Will be used only when correlations are calculated on S or
##' SByGene.
##' @param level Data level of the \code{IcaSet} objects on which is applied the correlation.
##' It must correspond to a feature shared by the IcaSet objects:
##' \code{'samples'} if they were applied to common samples (correlations are computed between matrix \code{A}), \code{'features'} if they were applied to
##' common features (correlations are computed between matrix \code{S}), \code{'genes'} if they share gene IDs after
##' annotation into genes (correlations are computed between matrix \code{SByGene}).
##' @return A list whose length equals the number of pairs of \code{IcaSet} and whose elements
##' are outputs of function \code{\link{cor2An}}. 
##' @export
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##' 
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##' 
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet object
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##'                           dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##'                           dat=dat2, alreadyAnnot=TRUE)$icaSet
##' 
##' listPairCor <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"), 
##'                          type.corr="pearson", level="genes", cutoff_zval=0)
##'
##' 
##' \dontrun{
##' #### Comparison of 2 ICA decompositions obtained on 2 different gene expression datasets.
##' ## load the two datasets
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##' 
##' ## Define a function used to build two examples of IcaSet objects
##' treat <- function(es, annot="hgu133a.db") {
##'    es <- selectFeatures_IQR(es,10000)
##'    exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##'    colnames(exprs(es)) <- sampleNames(es)
##'    resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##'    resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##'                         dat=exprs(es), pData=pData(es), refSamples=character(0),
##'                         annotation=annot, typeID= typeIDmainz,
##'                         chipManu = "affymetrix", mart=mart)
##'    icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##' 
##' ## The pearson correlation is used as a measure of association between the gene projections
##' # on the different components (type.corr="pearson").
##' listPairCor <- compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes", cutoff_zval=0)
##' 
##' ## Same thing but adding a selection of genes on which the correlation between two components is computed:
##' # when considering pairs of components, only projections whose scaled values are not located within
##' # the circle of radius 1 are used to compute the correlation (cutoff_zval=1).
##' listPairCor <-  compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", cutoff_zval=1, level="genes")
##' }
##' @export
##' @author Anne Biton
##' @seealso \code{\link{cor2An}}

compareAn <- function (icaSets,
                       labAn,
                       type.corr = c("pearson","spearman"),
                       cutoff_zval=0,
                       level = c("samples","features","genes")
                       ) {
    
    if (missing(labAn))
        if (is.null(names(icaSets)))
            labAn <- c(1:nbAn)
        else
            labAn <- names(icaSets)
    
    nbAn <- length(icaSets)

    if (length(labAn) != nbAn)
        stop("labAn must have the same length than the list icaSets")
          
    level <- match.arg(tolower(level), choices = c("features","genes","samples"))
    switch(level,
           features={ object="S"},
           genes={object="SByGene"},
           samples={object="A"}
           )

    
 type.corr <- match.arg(type.corr)
  nbCompByAn <- lapply(icaSets, function(x) ncol(A(x)))
  
      
  matlist <-
      lapply(icaSets,
             function(icaSet, object) {
                 data.frame(icaSet[object],check.names = FALSE, stringsAsFactors = FALSE)
             }
             , object = object
             )

  
  
  allpairs <- combn(x=1:nbAn,m = 2, simplify = FALSE)
  pairnames <- unlist(lapply(allpairs, function(x,labAn) paste(labAn[x[1]],labAn[x[2]],sep="-"), labAn = labAn))

  listCorPair <- 
          lapply(allpairs,
             function(pa, matlist, labAn, cutoff_zval, type.corr) {
                 mat1 <- matlist[[pa[1]]]
                 mat2 <- matlist[[pa[2]]]
                 resCor <- cor2An(mat1, mat2, lab = c(labAn[pa[1]],labAn[pa[2]]), type.corr = type.corr, cutoff_zval = cutoff_zval)
                 
                 
                 
             }
             , matlist = matlist
             , labAn = labAn
             , type.corr = type.corr
             , cutoff_zval = if (missing(cutoff_zval)) NA else cutoff_zval
            )
  names(listCorPair) <- pairnames
 
  return(listCorPair)
}



##' This function measures the correlation between two matrices containing the results of
##' two decompositions.
##' 
##' Before computing the correlations, the components are scaled and restricted
##' to common row names.
##' 
##' It must be taken into account by the user that if \code{cutoff_zval} is different
##' from NULL or zero, the computation will be slowler since each pair of
##' component is treated individually.
##'
##' When \code{cutoff_zval} is specified, for each
##' pair of components, genes that are included in the circle of center 0 and
##' radius \code{cutoff_zval} are excluded from the computation of the correlation
##' between the gene projection of the two components.
##' @title Correlation between two matrices 
##' @param mat1 matrix of dimension features/genes x number of components, e.g
##' the results of an ICA decomposition
##' @param mat2 matrix of dimension features/genes x number of components, e.g
##' the results of an ICA decomposition
##' @param lab The vector of labels for mat1 and mat2, e.g the the names of the
##' two datasets on which were calculated the two decompositions
##' @param type.corr Type of correlation, either \code{'pearson'} or
##' \code{'spearman'}
##' @param cutoff_zval cutoff_zval: 0 (default) if all genes are
##' used to compute the correlation between the components, or a threshold to
##' compute the correlation on the genes that have at least a scaled projection
##' higher than cutoff_zval.
##' @return This function returns a list consisting of:
##' \item{cor}{a matrix of dimensions '(nbcomp1+nbcomp2) x (nbcomp1*nbcomp2)', 
##' containing the correlation values between each pair of components,}
##' \item{pval}{ a matrix of dimension '(nbcomp1+nbcomp2) x (nbcomp1*nbcomp2)', 
##' containing the p-value of the correlation tests for each
##' pair of components,}
##' \item{inter}{ the intersection between the features/genes of \code{mat1}
##' and \code{mat2},}
##' \item{labAn}{ the labels of the compared matrices.}
##' @author Anne Biton
##' @export
##' @examples 
##' cor2An(mat1=matrix(rnorm(10000),nrow=1000,ncol=10), mat2=matrix(rnorm(10000),nrow=1000,ncol=10),
##'        lab=c("An1","An2"), type.corr="pearson")
##' 
##' @seealso \code{rcorr}, \code{cor.test}, \code{\link{compareAn}}

cor2An <- function (mat1,
                    mat2,
                    lab,
                    type.corr = c("pearson","spearman"),
                    cutoff_zval = 0
                    ) {
    
    if (is.null(cutoff_zval))
        cutoff_zval <- 0

    if (cutoff_zval < 0)
        stop("'cutoff_zval' must be positive")

    comp1 <- comp2 <- NULL

    mat1 <- data.frame(scale(mat1), check.names = FALSE, stringsAsFactors = FALSE)
    mat2 <- data.frame(scale(mat2), check.names = FALSE, stringsAsFactors = FALSE)
    nbcomp1 <- ncol(mat1)
    nbcomp2 <- ncol(mat2)
    
    gg <- intersect(rownames(mat1),rownames(mat2))

    if (length(gg)<10)
        warning(paste("Less than 10 elements are common between the icaSet objects ", lab[1], " and ", lab[2], ", the comparison of these two decompositions should be reconsidered.", sep = ""))
    
    mat1 <- mat1[gg,]
    mat2 <- mat2[gg,]

    if (missing(lab))
        lab <- paste("an",c(1:2),sep = "")

    if (cutoff_zval == 0) {
        r <- signif(rcorr(as.matrix(mat1), as.matrix(mat2), type = type.corr)$r, digits = 5)
        dimnames(r) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))
    
        rpval <- NULL	

        rpval <- rcorr(as.matrix(mat1),as.matrix(mat2),type=type.corr)$P
        dimnames(rpval) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))

    }
    else {

            ## transform matrix into lists
            l1 <- lapply(as.list(mat1),
                        function(x,n) {
                              names(x) <- n
                              return(x)
                        }
                        , n = rownames(mat1) 
                  )
            l2 <- lapply(as.list(mat2),
                        function(x,n) {
                            names(x) <- n
                            return(x)
                        }
                        , n = rownames(mat2) 
                  )
            
            rpval <- matrix(ncol = nbcomp2+nbcomp1 , nrow =nbcomp2+nbcomp1)
            r <- matrix(ncol = nbcomp2+nbcomp1 , nrow =nbcomp2+nbcomp1)

            for (i in 1:nbcomp1) {
                   reslist <- 
                sapply(1:nbcomp2,
                   function(j, i, l1, l2, cutoff_zval, gg) {
                       dc <- data.frame(comp1=l1[[i]],comp2=l2[[j]])
                       rownames(dc) <- gg
                       g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval   ))
                       inter <- gg[!(gg %in% g2del)]
                       res <- cor.test(as.matrix(mat1[inter,i]),as.matrix(mat2[inter,j]), method = type.corr)
                       return(list(cor=res$estimate,pval=res$p.value))
                    }
                   , i = i
                   , l1 = l1
                   , l2 = l2
                   , cutoff_zval = cutoff_zval
                   , gg = gg)
               
               reslist <- as.data.frame(reslist)
               r[i,(nbcomp1+1):(nbcomp1+nbcomp2)] <- r[(nbcomp1+1):(nbcomp1+nbcomp2),i] <- unlist(reslist["cor",])
               rpval[i,(nbcomp1+1):(nbcomp1+nbcomp2)] <- rpval[(nbcomp1+1):(nbcomp1+nbcomp2),i] <- unlist(reslist["pval",])

                for (j in 1:nbcomp1) {
                    dc <- data.frame(comp1=l1[[i]],comp2=l1[[j]])
                    rownames(dc) <- gg
                    g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval   ))
                    inter <- names(l1[[1]])[!(names(l1[[1]]) %in% g2del)]
                    res <- cor.test(as.matrix(mat1[inter,i]),as.matrix(mat1[inter,j]), method = type.corr)
                    rpval[i,j] <- rpval[j,i] <- res$p.value
                    r[i,j] <- r[j,i] <- res$estimate
                }
            }

            for (i in 1:nbcomp2) {
                for (j in 1:nbcomp2) {
                    dc <- data.frame(comp1=l2[[i]],comp2=l2[[j]])
                    rownames(dc) <- gg
                    g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval   ))
                    inter <- names(l2[[1]])[!(names(l2[[1]]) %in% g2del)]
                    res <- cor.test(as.matrix(mat2[inter,i]),as.matrix(mat2[inter,j]), method = type.corr)
                    rpval[nbcomp1+i,nbcomp1+j] <- rpval[nbcomp1+j,nbcomp1+i] <- res$p.value
                    r[nbcomp1+i,nbcomp1+j] <- r[nbcomp1+j,nbcomp1+i] <- res$estimate
                    
                }
            }
            
            dimnames(rpval) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))
            dimnames(r) <- list(c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")),c(paste("comp",c(1:nbcomp1), "_", lab[1], sep = ""),  paste("comp",c(1:nbcomp2), "_", lab[2], sep = "")))

        }
      return(list(cor=r,pval=rpval,inter = gg, nbComp = c(nbcomp1,nbcomp2), labAn = lab))
}





      
correl2Comp <- function (
### This function computes the correlation between two components.
                         comp1
                         ### The first component, a vector of projections or contributions indexed by labels
                         ,comp2
                         ### The second component, a vector of projections or contributions indexed by labels
                         ,type.corr = "pearson"
                         ###  name of the type of correlation to compute, either 'pearson' or 'spearman'
                         ,plot = FALSE
                         ###  if TRUE the plot of comp1 vs comp2 is drawn
                         ,cutoff_zval = 0
                         ###  either NULL or 0 (default) if all genes are used to compute the correlation between the components, or a threshold to compute the correlation on the genes that have at least a scaled projection higher than cutoff_zval. 
                         ,test = FALSE
                         ### if TRUE the p-value of correlation is returned instead of the correlation value
                         ,alreadyTreat = FALSE
                         ### if TRUE comp1 and comp2 are considered as being already treated (i.e scaled and restricted to common elements) 
                         ) {
##details<< Before computing the correlation, the components are scaled and restricted to common labels.  
##  When \code{cutoff_zval} is different from 0, the elements that are included in the circle of center 0 and radius \code{cutoff_zval} are not taken into account during the computation of the correlation.

    if(!alreadyTreat) {
        comp1 <- (comp1-mean(comp1))/sd(comp1)
        comp2 <- (comp2-mean(comp2))/sd(comp2)
        gg <- intersect(names(comp1),names(comp2))
        comp1 <- comp1[gg]
        comp2 <- comp2[gg]
    }

            dc <- data.frame(comp1=comp1,comp2=comp2)
            rownames(dc) <- gg
            g2del <- rownames(subset(dc, comp1^2 + comp2^2<= cutoff_zval   ))
            genes.intersect <- gg[!(gg %in% g2del)]
        
        if (length(genes.intersect) < 4)
            if (test) return(1) else return(0)
	comp1.ord = comp1[genes.intersect]
	comp2.ord = comp2[genes.intersect]
        
	
	if (test) {
                
                r = cor.test(comp1.ord,comp2.ord,method = type.corr, alternative = "two.sided")$p.value
	}
	else {
                r = cor(comp1.ord,comp2.ord,method = type.corr)
                r = signif(r, digits = 3)
		
	}
	if (plot) plot(comp1.ord,comp2.ord,xlab = paste("comp1"),ylab=paste("comp2"),pch = 16,sub = paste("corr_",type.corr," = ",r,sep=""))
	return(r)
### This function returns either the correlation value or the p-value of the correlation test.
}


##' This function builds a data.frame describing for each node of the graph
##' its ID and which analysis/data it comes from.
##'
##' The created file is used in Cytoscape.
##' @title Generate node attributes
##' @param nbAn Number of analyses being considered, i.e number of IcaSet
##' objects
##' @param nbComp Number of components by analysis, if of length 1 then
##' it is assumed that each analysis has the same number of components.
##' @param labAn Labels of the analysis, if missing it will be generated
##' as an1, an2, ...
##' @param labComp List containing the component labels indexed by analysis, if missing
##' will be generated as comp1, comp2, ...
##' @param file File where the description of the node attributes will be
##' written
##' @return A data.frame describing each node/component
##' @export
##' @examples
##' ## 4 datasets, 20 components calculated in each dataset, labAn    
##' nodeAttrs(nbAn=4, nbComp=20, labAn=c("tutu","titi","toto","tata"))
##' 
##' @author Anne Biton

nodeAttrs <- function(nbAn,
                      nbComp,
                      labAn,
                      labComp,
                      file
                      ) {

    nb <- an <- lab <- comp <- NULL
    
    if (missing(labAn))
        labAn <- paste(rep("an",nbAn),1:nbAn,sep = "")
    else if (length(labAn) != nbAn)
        stop("The length of 'labAn' must equal 'nbAn'.")

    
    if (missing(labComp)) {
        if (length(nbComp) == 1) {
            labid <- 
                paste(
                      rep(paste(rep("comp",nbComp),c(1:nbComp),sep = ""),nbAn),
                      paste(rep("an",nbComp*nbAn),sapply(c(1:nbAn),rep,nbComp),sep=""),
                      sep = "_"
                      )
            labComp <-rep(paste(rep("comp",nbComp),c(1:nbComp),sep = ""),nbAn)
                      
            nbComp <- rep(nbComp,nbAn)
        }
        else {
            if (length(nbComp) != nbAn)
                stop("The provided number of components must equal the number of analyses")
           
            labid <- foreach(nb = nbComp, an = labAn) %do% {paste(paste(rep("comp",nb),c(1:nb),sep = ""),rep(an,nb),sep="_")}
            labid <- unlist(labid)
            labComp <- unlist(foreach(nb = nbComp, an = 1:nbAn) %do% {paste(rep("comp",sum(nb)),c(1:nb),sep = "")})
        }
    }
    else {
        if (!is.list(labComp))
            stop("The component labels must be provided using a list.")
        if (length(labComp) != nbAn)
            stop("The length of the list containing the component labels 'labComp' must have the same length than the list containing the analysis labels 'labAn'.")
        names(labComp) <- names(labAn)

        foreach(lab = labComp, nb = nbComp) %do% {
            if (length(lab) != nb)
                stop("The length of the list 'labComp' containing component labels does not map to 'nbComp'.")
        }
        labid <- unlist(foreach(nb = nbComp, an = labAn, comp = labComp) %do% {paste(comp,rep(an,nb),sep="_")})
        labComp <- unlist(labComp)

        
    }

   compnb <- unlist(lapply(nbComp,function(x) c(1:x)))
   names(nbComp) <- labAn
   an <- unlist(lapply(names(nbComp), function(x,nb) rep(x,nb[x]), nb = nbComp))

   dataAttr <- data.frame(id = labid, labComp = labComp, indComp = compnb, labAn = an, stringsAsFactors = FALSE, check.names = FALSE)

    if (!missing(file))
        if (!is.null(file))
            write.table(dataAttr,file=file,sep = " ", row.names = FALSE, quote = FALSE)

    return(dataAttr)
}

##' compareAn2graphfile
##' 
##' This function builds a correlation graph from the outputs of function \code{\link{compareAn}}.
##'
##' When correlations are considered (\code{useVal}="cor"), absolute values
##' are used since the components have no direction.
##' 
##' If \code{useMax} is \code{TRUE} each component is linked to the most correlated component of
##' each different \code{IcaSet}.
##' 
##' If \code{cutoff} is specified, only
##' correlations exceeding this value are taken into account during the graph construction.
##' For example, if \code{cutoff} is 1, only relationships between components
##' that correspond to a correlation value larger than 1 will be included.
##'
##' When \code{useVal="pval"} and \code{useMax=TRUE}, the minimum value is taken
##' instead of the maximum. 
##' 
##' @param listPairCor The output of the function \code{\link{compareAn}}, containing the
##' correlation between several pairs of objects of class \code{\link{IcaSet}}.
##' @param useMax If TRUE, the graph is restricted to edges that correspond to
##' maximum score, see details
##' @param cutoff Cutoff used to select pairs that will be included in the
##' graph.
##' @param useVal The value on which is based the graph, either \code{"cor"} for
##' correlation or \code{"pval"} for p-values of correlation tests.
##' @param file File name.
##' @return A data.frame with the graph description, has
##' two columns \code{n1} and \code{n2} filled with node IDs, each row denotes that there is an edge from \code{n1} to \code{n2}. Additional columns quantify the strength of association: correlation (\code{cor}), p-value (\code{pval}), (\code{1-abs(cor)}) (\code{distcor}), log10-pvalue (\code{logpval}).
##' @author Anne Biton
##' @seealso \code{\link{compareAn}}, \code{\link{cor2An}}
##' @export
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##' 
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##' 
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet object
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##'                           dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##'                           dat=dat2, alreadyAnnot=TRUE)$icaSet
##' 
##' resCompareAn <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"), 
##'                          type.corr="pearson", level="genes", cutoff_zval=0)
##'
##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"),
##' compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt")
##' 
##' 
##' \dontrun{
##' #### Comparison of 2 ICA decompositions obtained on 2 different gene expression datasets.
##' ## load the two datasets
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##' 
##' ## Define a function used to build two examples of IcaSet objects
##' treat <- function(es, annot="hgu133a.db") {
##'    es <- selectFeatures_IQR(es,10000)
##'    exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##'    colnames(exprs(es)) <- sampleNames(es)
##'    resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##'    resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##'                         dat=exprs(es), pData=pData(es), refSamples=character(0),
##'                         annotation=annot, typeID= typeIDmainz,
##'                         chipManu = "affymetrix", mart=mart)
##'    icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##' 
##' ## Compute correlation between every pair of IcaSet objects.
##' resCompareAn <- compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes", cutoff_zval=0)
##' 
##' ## Same thing but adding a selection of genes on which the correlation between two components is computed:
##' # when considering pairs of components, only projections whose scaled values are not located within
##' # the circle of radius 1 are used to compute the correlation (cutoff_zval=1).
##' resCompareAn <-  compareAn(icaSets=list(icaSetMainz,icaSetVdx),
##' labAn=c("Mainz","Vdx"), type.corr="pearson", cutoff_zval=1, level="genes")
##'
##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"),
##' ## i.e, component A of analysis i is linked to component B of analysis j,
##' ## only if component B is the most correlated component to A amongst all component of analysis j.
##' compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt")
##' 
##' ## Restrict the graph to correlation values exceeding 0.4
##' compareAn2graphfile(listPairCor=resCompareAn, useMax=FALSE, cutoff=0.4,  useVal="cor", file="myGraph.txt")
##'
##' }
compareAn2graphfile <- function (listPairCor,
                                 useMax = TRUE,
                                 cutoff = NULL,
                                 useVal = c("cor","pval"),
                                 file = NULL
                                 ) {
    useVal <- match.arg(useVal)
    dataGraphs <- 
    lapply(listPairCor,
           function(res, useMax, cutoff, useVal) {
               dd <- abs(res[[useVal]])
               
               nbcomp1 <- res$nbComp[1]
               nbcomp2 <- res$nbComp[2]
               attrGraph <- c("cor",if ("pval" %in% names(res)) "pval")
               if (useMax) {
                   if (useVal == "pval") 
                       op <- "which.min"
                   else 
                       op <- "which.max"
                   
                  
                   ## find correlation max both direction an1 <-> an2 since the max is not necessarily reciprocal
                   subdd <- dd[(nbcomp1+1):(nbcomp1+nbcomp2), 1:nbcomp1]
                   ind <- apply(subdd,2,op)
                   coord <- as.list(data.frame(t(cbind(ind,1:nbcomp1))))
                   
                   dataGraph <- data.frame(n1=rownames(dd)[1:nbcomp1],
                                           n2=rownames(dd)[(nbcomp1+1):(nbcomp1+nbcomp2)][ind])
                   dataGraph[[useVal]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = subdd))
                   addval <- setdiff(attrGraph,useVal)

                   if (length(addval)==1)
                       dataGraph[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][(nbcomp1+1):(nbcomp1+nbcomp2), 1:nbcomp1]))

                   ## an2 -> an1
                   subdd <- dd[1:nbcomp1,(nbcomp1+1):(nbcomp1+nbcomp2)]
                   ind <- apply(subdd,2,op)
                   coord <- as.list(data.frame(t(cbind(ind,1:nbcomp2))))
                   
                   dataGraph2 <- data.frame(n1=rownames(dd)[(nbcomp1+1):(nbcomp1+nbcomp2)],
                                           n2=rownames(dd)[1:nbcomp1][ind])

                   dataGraph2[[useVal]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = subdd))

                   addval <- setdiff(attrGraph,useVal)

                   if (length(addval)==1)
                       dataGraph2[[addval]] <- unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][1:nbcomp1,(nbcomp1+1):(nbcomp1+nbcomp2)]))

                   dataGraph <- rbind(dataGraph,dataGraph2)

                   pval <- NULL
                   if (!is.null(cutoff)) {
                       if (useVal == "pval")
                           dataGraph <- subset(dataGraph, pval <= cutoff)
                       else
                           dataGraph <- subset(dataGraph, cor >= cutoff)
                           
                   }
               }
               else if (!is.null(cutoff)) {
                   subdd <- dd[1:nbcomp1, (nbcomp1+1):(nbcomp1+nbcomp2)]
                   dataGraph <- 
                       lapply(as.list(colnames(subdd)), 
                         function(nn, cutoff, subdd,  useVal, attrGraph, res,op, subInd)   {
                             x <- subdd[,nn]

                             ind <- which(eval(parse(text=paste("x",op,"cutoff")))) 
                             if (length(ind) > 0) {
                                 dataGraph <- data.frame(n1 = rep(nn,length(ind)), n2 = rownames(subdd)[ind])
                                 dataGraph[[useVal]] <- subdd[ind,nn]
                                 addval <- setdiff(attrGraph,useVal)
                                 if (length(addval)==1) {
                                    coord <- as.list(data.frame(t(cbind(dataGraph$n1,dataGraph$n2))))
                                    dataGraph[[addval]] <-  unlist(lapply(coord, function(x,m) m[x[1],x[2]], m = res[[addval]][subInd[[1]],subInd[[2]]]))
                                 }
                                 
                                 return(dataGraph)
                                 
                             }
                          }
                          , cutoff = cutoff
                          , subdd = subdd
                          , useVal = useVal
                          , attrGraph = attrGraph
                          , res = res
                          , op = if (useVal == "pval") "<=" else ">="
                          , subInd = list(1:nbcomp1, (nbcomp1+1):(nbcomp1+nbcomp2))
                          )
                   dataGraph <- do.call(rbind,dataGraph)
                   

               }
               return(dataGraph)                        

           }
           , useMax = useMax
           , cutoff = if (missing(cutoff)) NULL else cutoff 
           , useVal = useVal 
           )
    dataGraphs <- do.call(rbind,dataGraphs)

    
   dataGraphs$invcor = 1/(10*as.numeric(dataGraphs$cor))
   dataGraphs$distcor = 1-abs(as.numeric(dataGraphs$cor))
   dataGraphs[,c("cor","pval","invcor","distcor")] <- apply(dataGraphs[,c("cor","pval","invcor","distcor")],2,as.numeric)
   dataGraphs <- annotReciprocal(dataGraph = dataGraphs, keepOnlyReciprocal = FALSE)# file  
   dataGraphs[,c("cor","pval","invcor","distcor")] <- apply(dataGraphs[,c("cor","pval","invcor","distcor")],2,as.numeric)

    if (!missing(file))
        if (!is.null(file))
            write.table(dataGraphs, file = file, row.names = FALSE, quote = FALSE, sep = "\t")
    
   return(dataGraphs)

}

##' annotReciprocal
##' 
##' This function notes edges of a graph as reciprocal or not.
##' 
##' 
##' @param dataGraph data.frame which contains the graph description, must have
##' two columns \code{n1} and \code{n2} filled with node IDs, each row denoting there is an edge from \code{n1} to \code{n2}.
##' @param file file where the graph description is written
##' @param keepOnlyReciprocal if TRUE \code{dataGraph} is restricted to
##' reciprocal edges, else all edges are kept (default).
##' @return This function returns the argument \code{dataGraph} with an
##' additional column named 'reciprocal' which contains TRUE if the edge
##' described by the row is reciprocal, and FALSE if it is not reciprocal.
##' @examples 
##' dg <- data.frame(n1=c("A","B","B","C","C","D","E","F"),n2=c("B","G","A","B","D","C","F","E"))
##' annotReciprocal(dataGraph=dg)
##' 
##' @export
##' @author Anne Biton
annotReciprocal <- function (dataGraph,
                             file,
                             keepOnlyReciprocal = FALSE
                             ) {

	names(dataGraph)[1:2] <- c("n1","n2") 
        dataGraph$keep <- rep(NA, length=nrow(dataGraph))

	dataGraph_keep <- apply( dataGraph, MARGIN = 1 , 
					   function (r, dataGraph) {
						dataN2 = subset(dataGraph, dataGraph$n1 == r["n2"])
						if (r["n1"] %in% dataN2$n2) r["keep"] = TRUE
						else r["keep"] = FALSE
						return(r)		
					    }
					    , dataGraph = dataGraph
	)

        dataGraph_keep <- as.data.frame(t(dataGraph_keep), check.names = FALSE, stringsAsFactors = FALSE)

        names(dataGraph_keep)[ncol(dataGraph_keep)] <- "reciprocal"
        
        if (keepOnlyReciprocal) {
          dataGraph_keep<- subset(dataGraph_keep, dataGraph_keep$reciprocal == TRUE )
          dataGraph_keep <- dataGraph_keep[,-ncol(dataGraph_keep)]
        }
        
        if (!missing(file))
            if(!is.null(file)) 
                write.table(dataGraph_keep, file  = file, sep = "\t", quote = FALSE, row.names = FALSE)
        
	return(dataGraph_keep)
}
 

##' This function plots the
##' correlation graph in an interactive device using function \code{tkplot}.
##'
##' You have to slighly move the nodes to see cliques because strongly related nodes are often superimposed. 
##' The \code{edgeWeight} column is used to weight the edges within the
##' fruchterman.reingold layout available in the package \code{igraph}.
##'
##' The argument
##' \code{nodeCol} typically denotes the column containing the names of the datasets.
##' Colors are automatically
##' attributed to the nodes using palette Set3 of package \code{RColorBrewer}. The
##' corresponding colors can be directly specified in the 'col' argument. In
##' that case, 'col' must be a vector of colors indexed by the unique elements
##' contained in \code{nodeCol} column (e.g dataset ids).
##'
##' As for colors, one can define
##' the column of \code{nodeAttrs} that is used to define the node shapes.  The
##' corresponding shapes can be directly specified in the \code{shape} argument. In
##' that case, \code{shape} must be one of \code{c("circle","square", " vcsquare", "rectangle", "crectangle", "vrectangle")} and must be 
##' indexed by the unique elements of \code{nodeShape} column.
##'
##' Unfortunately, shapes
##' can't be taken into account when tkplot is TRUE (interactive plot).
##'
##' If \code{reciproCol} is not missing, it is used to color the edges, either in grey if the
##' edge is not reciprocal or  in black if the edge is reciprocal.
##'
##' 
##' @title Plots graph using 
##' @param dataGraph A data.frame containing the graph description. It must
##' have two columns \code{n1} and \code{n2}, each row denoting that there is an edge from n1
##' to n2.  Node labels in columns \code{n1} and \code{n2} of \code{dataGraph} must correspond to
##' node IDs in column \code{id} of \code{nodeAttrs}.
##' @param edgeWeight The column of dataGraph used to weight edges.
##' @param nodeAttrs A data.frame with node description, see function \code{nodeAttrs}.
##' @param nodeShape Denotes the column of \code{nodeAttrs} used to attribute the node shapes. 
##' @param nodeCol Denotes the column of \code{nodeAttrs} used to
##' color the nodes in the graph.
##' @param nodeName Denotes the column of \code{nodeAttrs} used
##' as labels for the nodes in the graph.
##' @param col A vector of colors, for the nodes, indexed by the unique elements of \code{nodeCol}
##' column from \code{nodeAttrs}. If missing, colors will be automatically attributed.
##' @param shape A vector of shapes indexed by the unique elements of
##' column \code{nodeShape} from \code{nodeAttrs}. If missing, shapes will be automatically
##' attributed.
##' @param title Title for the plot
##' @param reciproCol Denotes the column of \code{dataGraph} containing \code{TRUE} if the
##' row defines a reciprocal node, else \code{FALSE}. See \code{\link{annotReciprocal}}.
##' @param tkplot If TRUE, performs interactive plot with function \code{tkplot}, else uses \code{plot.igraph}.
##' @param \dots Additional parameters as required by \code{tkplot}.
##' @return A list consisting of \describe{
##' \item{dataGraph}{a data.frame defining the correlation
##' graph}
##' \item{nodeAttrs}{a data.frame describing the node
##' of the graph}
##' \item{graph}{the graph as an object of class \code{igraph}}
##' \item{graphid}{the id of the graph plotted using \code{tkplot}} }
##' @export
##' @author Anne Biton
##' @seealso \code{\link{compareAn}}, \code{\link{nodeAttrs}}, \code{\link{compareAn2graphfile}}, \code{\link{runCompareIcaSets}}
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##' 
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##' 
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet object
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##'                           dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##'                           dat=dat2, alreadyAnnot=TRUE)$icaSet
##' icaSets <- list(icaSettoy1, icaSettoy2)
##' 
##' resCompareAn <- compareAn(icaSets=list(icaSettoy1,icaSettoy2), labAn=c("toy1","toy2"), 
##'                          type.corr="pearson", level="genes", cutoff_zval=0)
##'
##' ## Build a graph where edges correspond to maximal correlation value (useVal="cor"),
##' dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, useVal="cor", file="myGraph.txt")
##' 
##' ## construction of the data.frame with the node description
##' nbComp <- rep(3,2) #each IcaSet contains 3 components
##' nbAn <- 2 # we are comparing 2 IcaSets
##' # labels of components created as comp*i* 
##' labComp <- foreach(icaSet=icaSets, nb=nbComp, an=1:nbAn) %do% {
##'                   paste(rep("comp",sum(nb)),1:nbComp(icaSet),sep = "")}
##' 
##' # creation of the data.frame with the node description
##' nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp,
##'                        labAn = c("toy1","toy2"), file = "nodeInfo.txt")
##'
##' ## Plot correlation graph, slightly move the attached nodes to make the cliques visible
##' ## use tkplot=TRUE to have an interactive graph
##' res <- plotCorGraph(title = "Compare toy 1 and 2", dataGraph = dataGraph, nodeName = "indComp", tkplot = FALSE, 
##'                  nodeAttrs = nodeDescr, edgeWeight = "cor", nodeShape = "labAn", reciproCol = "reciprocal")
##'
##' 
##' \dontrun{
##' ## load two microarray datasets
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##' 
##' ## Define a function used to build two examples of IcaSet objects
##' treat <- function(es, annot="hgu133a.db") {
##'    es <- selectFeatures_IQR(es,10000)
##'    exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##'    colnames(exprs(es)) <- sampleNames(es)
##'    resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##'    resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##'                         dat=exprs(es), pData=pData(es), refSamples=character(0),
##'                         annotation=annot, typeID= typeIDmainz,
##'                         chipManu = "affymetrix", mart=mart)
##'    icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##' 
##' icaSets <- list(icaSetMainz, icaSetVdx)
##' labAn <- c("Mainz", "Vdx")
##'
##' ## correlations between gene projections of each pair of IcaSet
##' resCompareAn <- compareAn(icaSets = icaSets, level = "genes", type.corr= "pearson",
##'                           labAn = labAn, cutoff_zval=0)
##'
##' ## construction of the correlation graph using previous output
##' dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax=TRUE, file="corGraph.txt")
##'
##' ## construction of the data.frame with the node description
##' nbComp <- rep(10,2) #each IcaSet contains 10 components
##' nbAn <- 2 # we are comparing 2 IcaSets
##' # labels of components created as comp*i* 
##' labComp <- foreach(icaSet=icaSets, nb=nbComp, an=1:nbAn) %do% {
##'                   paste(rep("comp",sum(nb)),1:nbComp(icaSet),sep = "")}
##' 
##' # creation of the data.frame with the node description
##' nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp,
##'     labAn = labAn, file = "nodeInfo.txt")
##'
##' ## Plot correlation graph, slightly move the attached nodes to make the cliques visible
##' res <- plotCorGraph(title = "Compare two ICA decomsitions obtained on \n two
##'                  microarray-based data of breast tumors", dataGraph = dataGraph, nodeName = "indComp", 
##'                  nodeAttrs = nodeDescr, edgeWeight = "cor", nodeShape = "labAn", reciproCol = "reciprocal")
##'
##' }
plotCorGraph <- function(
                      dataGraph,
                      edgeWeight = "cor",
                      nodeAttrs,
                      nodeShape,
                      nodeCol = "labAn",
                      nodeName = "indComp",
                      col,
                      shape,
                      title = "",
                      reciproCol = "reciprocal",
                      tkplot = FALSE,
                      ...
                      ) {
    if (!(edgeWeight %in% colnames(dataGraph))) {
        stop (paste(edgeWeight,"is not available within the columns of dataGraph."))
    }
    nodeName <- match.arg(nodeName, choices = colnames(nodeAttrs))
    

    if (missing(nodeShape)) 
        nodeAttrs$shape <- rep("circle",nrow(nodeAttrs))
    else if (!(nodeShape %in% colnames(nodeAttrs))) {
        nodeAttrs$shape <-  rep("circle",nrow(nodeAttrs))
        warning(paste("The column ", nodeShape, " is not available in 'dataGraph'.",sep = ""))
    }
    else {
        potShapes <- c("circle","square", "csquare", "rectangle", "crectangle", "vrectangle")

        autoShape <- TRUE
        if (!missing(shape)) {
            autoShape <- FALSE
            if (length(intersect(names(shape),unique(nodeAttrs[[nodeShape]]))) != length(unique(nodeAttrs[[nodeShape]]))) { 
                warning("'shape' argument is incorrect, some elements of nodeAttrs$'nodeShape' are not indexed. Shapes will be attributed automatically.")
                autoShape <- TRUE
            }
            
            if (length(setdiff(shape,potShapes))>0) {
                warning("'shape' argument is incorrect, the available shapes are: 'circle','square', 'csquare', 'rectangle', 'crectangle', and 'vrectangle'. Shapes will be attributed automatically.")
                autoShape <- TRUE
                
            }
        }
        if (autoShape) {
            levs <- unique(nodeAttrs[[nodeShape]])
            if (length(levs) > length(potShapes)) {
                warning("R graphs can only handle 6 node shapes at the most.")
                shape <- rep(potShapes,6)[1:length(levs)]
            }
            else
                shape <- potShapes[1:length(levs)]
            names(shape) <- levs
            nodeAttrs$shape <- shape[nodeAttrs[[nodeShape]]]
        }
        else 
            nodeAttrs$shape <- shape[nodeAttrs[[nodeShape]]]
        
    }
        
    
    if (!(nodeCol %in% colnames(nodeAttrs))) { 
        stop(paste("The column ",nodeCol, " is not available in nodeAttrs.",sep=""))
    }
    else {
        nbAn <- length(unique(nodeAttrs[[nodeCol]]))
        
        if (!missing(col) && !is.null(col)) {
            if (length(intersect(names(col),unique(nodeAttrs[[nodeCol]]))) != length(unique(nodeAttrs[[nodeCol]]))) {
                warning("'col' argument is incorrect, some elements of nodeAttrs$'nodeCol' are not indexed. Colors will be attributed automatically.")
                colAn <- brewer.pal(nbAn,"Set3")
                names(colAn) <- unique(nodeAttrs[[nodeCol]])
                nodeAttrs$col = colAn[nodeAttrs[[nodeCol]]]
            }
            else
                nodeAttrs$col <- col[nodeAttrs[[nodeCol]]]
        }
        else {
            colAn <- brewer.pal(nbAn,"Set3")
            names(colAn) <- unique(nodeAttrs[[nodeCol]])
            nodeAttrs$col = colAn[nodeAttrs[[nodeCol]]]
        }
    }
    
    
    n1 <- NULL
    nodes <- as.character(unique(as.character(dataGraph$n1)))
    edges <- llply(nodes,
                   function(n,data,edgeWeight) {
                       ll <- list(edges = as.character(subset(data, n1 == n)$n2), weights = subset(data, n1 == n)[[edgeWeight]])
                   },
                   data = dataGraph, edgeWeight = edgeWeight)
    names(edges) <- nodes
    
    
    
    
    g <- new("graphNEL", nodes = nodes, edgeL = edges, edgemode = "directed")
                                        # build igraph object from graphNEL object
    ig <- igraph.from.graphNEL(g, name = TRUE, weight = TRUE)
    
    V(ig)$color <- nodeAttrs$col[match(V(ig)$name,nodeAttrs$id)]
    V(ig)$shape <- nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)]
    E(ig)$width <- 10^E(ig)$weight
    indHighCor <- which(abs(E(ig)$weight)>0.4)
    indLowCor <- which(abs(E(ig)$weight)<=0.4)
    E(ig)$weight <- 1/abs(log2(abs(E(ig)$weight)))
    E(ig)$weight[indHighCor] <- E(ig)$weight[indHighCor]+6
    E(ig)$weight[indLowCor] <- E(ig)$weight[indLowCor]-0.25

   if (!missing(reciproCol)) {
       if (reciproCol %in% colnames(dataGraph)) {

           nodes <- as.character(unique(as.character(dataGraph$n1)))
           colEdges <- llply(nodes,
                             function(n,data,reciproCol) {
                              c('TRUE' = "black", 'FALSE' = "gray50")[as.character(subset(data, n1 == n)[[reciproCol]])]
                             },
                             data = dataGraph, reciproCol = reciproCol)

           colEdges <- unlist(colEdges)
           E(ig)$color <- colEdges
       }
       else {
            warning(paste("No column of 'dataGraph' has the name",reciproCol))
            E(ig)$color <- "black"
        }

    }
    else
        E(ig)$color <- "black"
    
    lay <- layout.fruchterman.reingold(ig,niter=500,area=vcount(ig)^2.3,repulserad=vcount(ig)^100, weights = E(ig)$weight)

    graph <- ig
            
    if (!capabilities()[["X11"]])
        tkplot <- FALSE
    
    if (tkplot) {
        
        if(capabilities()[["X11"]] & tolower(Sys.info()["sysname"])!="windows") {
            dimScreen <- system("xdpyinfo | grep dimensions", intern = TRUE)
            dimScreen <- as.numeric(strsplit(gsub(gsub(gsub(dimScreen, pattern = " ", replacement = ""), pattern = "[A-Za-z0-9]*:", replacement = ""), pattern = "pixels\\([A-Za-z0-9]*\\)", replacement = ""), split = "x")[[1]])
        }
        else
            dimScreen <- c(450,450)
        
        graphid <- tkplot(ig,
                          layout = lay,
                          vertex.label =  nodeAttrs[[nodeName]][match(V(ig)$name,nodeAttrs$id)],
                          vertex.shape = nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)],
                          canvas.width=dimScreen[1], canvas.height=dimScreen[2],
                          ...)
        
        
        tkplot.fit.to.screen(graphid)
        
    }
    else {
        
        graph <- plot(ig,
                      layout = lay,
                      vertex.label = nodeAttrs[[nodeName]][match(V(ig)$name,nodeAttrs$id)],
                      vertex.shape = nodeAttrs$shape[match(V(ig)$name,nodeAttrs$id)],
                      main = title,
                      ...
                      )
        graph <- ig
        graphid = ""

        
    }
    return(list(dataGraph=dataGraph, nodeAttrs = nodeAttrs, graph = graph, graphid=graphid))

}



##' runCompareIcaSets
##' 
##' This function encompasses the comparison of several IcaSet objects using correlations
##' and the plot of the corresponding correlation graph.
##' The IcaSet objects are compared by calculating the correlation between either
##' projection values of common features or genes, or contributions of common
##' samples.
##' 
##' This function calls four functions: \code{\link{compareAn}} which computes the
##' correlations, \code{\link{compareAn2graphfile}} which builds the graph,
##' \code{\link{nodeAttrs}} which builds the node description data, and \code{\link{plotCorGraph}}
##' which uses tkplot to plot the graph in an interactive device.
##' 
##' If the user wants to see the correlation graph in Cytoscape, he must fill
##' the arguments \code{fileDataGraph} and \code{fileNodeDescr}, in order to
##' import the graph and its node descriptions as a .txt file in Cytoscape.
##' 
##' When \code{labAn} is missing, each element i of \code{icaSets} is labeled as
##' 'Ani'.
##' 
##' The user must carefully choose the data level used in the comparison:
##' If \code{level='samples'}, the correlations are based on the mixing
##' matrices of the ICA decompositions (of dimension samples x components). \code{'A'}
##' will be typically chosen when the ICA decompositions were computed on the
##' same dataset, or on datasets that include the same samples.
##' If \code{level='features'} is
##' chosen, the correlation is calculated between the source matrices (of dimension
##' features x components) of the ICA decompositions. \code{'S'} will be typically
##' used when the ICA decompositions share common features (e.g same microarrays).
##' If \code{level='genes'}, the correlations are calculated
##' on the attributes \code{'SByGene'} which store the 
##' projections of the annotated features. \code{'SByGene'} will be typically chosen
##' when ICA were computed on datasets from different technologies, for which
##' comparison is possible only after annotation into a common ID, like genes.
##'
##' \code{cutoff_zval}
##' is only used when \code{level} is one of \code{c('features','genes')}, in
##' order to restrict the correlation to the contributing features or genes.
##' 
##' When \code{cutoff_zval} is specified, for each pair of components, genes or features that are
##' included in the circle of center 0 and radius \code{cutoff_zval} are excluded from
##' the computation of the correlation.
##'
##' It must be taken into account by the user that if cutoff_zval
##' is different from NULL or zero, the computation will be much slowler since
##' each pair of component is treated individually.
##' 
##' Edges of the graph are built based on the correlation values between the
##' components. Absolute values of correlations are used since
##' components have no direction.
##' 
##' If \code{useMax} is \code{TRUE} each component will be linked to only one component of
##' each other IcaSet that corresponds to the most correlated component among
##' all components of the same IcaSet. If \code{cutoff_graph} is specified, only
##' correlations exceeding this value are taken into account to build the graph.
##' For example, if \code{cutoff} is 1, only relationships between components
##' that correspond to a correlation value higher than 1 will be included.
##' Absolute correlation values are used since the components have no direction.
##' 
##' The contents of the returned list are \describe{
##' \item{dataGraph:}{\code{dataGraph} data.frame that describes the correlation
##' graph,} \item{nodeAttrs:}{\code{nodeAttrs} data.frame that describes the node
##' of the graph} \item{graph}{\code{graph} the graph as an igraph-object,}
##' \item{graphid:}{\code{graphid} the id of the graph plotted using tkplot.} }
##' 
##' @param icaSets List of \code{\link{IcaSet}} objects, e.g results of ICA decompositions
##' obtained on several datasets.
##' @param labAn Vector of names for each icaSet, e.g the the names of the
##' datasets on which were calculated the decompositions.
##' @param type.corr Type of correlation to compute, either
##' \code{'pearson'} or \code{'spearman'}.
##' @param cutoff_zval Either NULL or 0 (default) if all genes are used to
##' compute the correlation between the components, or a threshold to compute
##' the correlation using the genes that have at least a scaled projection higher
##' than cutoff_zval. Will be used only when \code{level} is one of \code{c("features","genes")}.
##' @param level Data level of the \code{IcaSet} objects on which is applied the correlation.
##' It must correspond to a data level shared by the IcaSet objects:
##' \code{'samples'} if they were applied to common samples (correlations are computed between matrix
##' \code{A}), \code{'features'} if they were applied to common features (correlations are computed between matrix \code{S}), \code{'genes'}
##' if they share gene IDs after
##' annotation into genes (correlations are computed between matrix \code{SByGene}).
##' @param fileNodeDescr File where node descriptions are saved (useful when the
##' user wants to visualize the graph using Cytoscape).
##' @param fileDataGraph File where graph description is saved (useful when the
##' user wants to visualize the graph using Cytoscape).
##' @param plot if \code{TRUE} (default) plot the correlation graph
##' @param title title of the graph
##' 
##' @param col vector of colors indexed by elements of labAn; if missing, colors
##' will be automatically attributed
##' @param cutoff_graph the cutoff used to select pairs that will be included in
##' the graph
##' @param useMax if \code{TRUE}, the graph is restricted to edges that correspond to
##' maximum correlation between components, see details
##' @param tkplot If TRUE, performs interactive plot with function \code{tkplot}, else uses \code{plot.igraph}
##' @return A list consisting of \describe{
##' \item{dataGraph:}{a data.frame defining the correlation
##' graph}
##' \item{nodeAttrs:}{a data.frame describing the node
##' of the graph,}
##' \item{graph:}{the graph as an object of class \code{igraph},}
##' \item{graphid}{the id of the graph plotted with \code{tkplot}}. }
##' @export
##' @author Anne Biton
##' @seealso \code{\link{compareAn2graphfile}}, \code{\link{compareAn}}, \code{\link{cor2An}}, \code{\link{plotCorGraph}}
##' @examples
##'
##' dat1 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat1) <- paste("g", 1:1000, sep="")
##' colnames(dat1) <- paste("s", 1:10, sep="")
##' dat2 <- data.frame(matrix(rnorm(10000),ncol=10,nrow=1000))
##' rownames(dat2) <- paste("g", 1:1000, sep="")
##' colnames(dat2) <- paste("s", 1:10, sep="")
##' 
##' ## run ICA
##' resJade1 <- runICA(X=dat1, nbComp=3, method = "JADE")
##' resJade2 <- runICA(X=dat2, nbComp=3, method = "JADE")
##' 
##' ## build params
##' params <- buildMineICAParams(resPath="toy/")
##'
##' ## build IcaSet objects
##' icaSettoy1 <- buildIcaSet(params=params, A=data.frame(resJade1$A), S=data.frame(resJade1$S),
##'                           dat=dat1, alreadyAnnot=TRUE)$icaSet
##' icaSettoy2 <- buildIcaSet(params=params, A=data.frame(resJade2$A), S=data.frame(resJade2$S),
##'                           dat=dat2, alreadyAnnot=TRUE)$icaSet
##'
##' ## compare IcaSet objects
##' ## use tkplot=TRUE to get an interactive graph
##' rescomp <- runCompareIcaSets(icaSets=list(icaSettoy1, icaSettoy2), labAn=c("toy1","toy2"),
##'                              type.corr="pearson", level="genes", tkplot=FALSE)
##'
##'
##' \dontrun{
##' ## load the microarray-based gene expression datasets
##' ## of breast tumors 
##' library(breastCancerMAINZ)
##' library(breastCancerVDX)
##' data(mainz)
##' data(vdx)
##' 
##' ## Define a function used to build two examples of IcaSet objects
##' ## and annotate the probe sets into gene Symbols
##' treat <- function(es, annot="hgu133a.db") {
##'    es <- selectFeatures_IQR(es,10000)
##'    exprs(es) <- t(apply(exprs(es),1,scale,scale=FALSE))
##'    colnames(exprs(es)) <- sampleNames(es)
##'    resJade <- runICA(X=exprs(es), nbComp=10, method = "JADE", maxit=10000)
##'    resBuild <- buildIcaSet(params=buildMineICAParams(), A=data.frame(resJade$A), S=data.frame(resJade$S),
##'                         dat=exprs(es), pData=pData(es), refSamples=character(0),
##'                         annotation=annot, typeID= typeIDmainz,
##'                         chipManu = "affymetrix", mart=mart)
##'    icaSet <- resBuild$icaSet
##' }
##' ## Build the two IcaSet objects
##' icaSetMainz <- treat(mainz)
##' icaSetVdx <- treat(vdx)
##' 
##' ## compare the IcaSets
##' runCompareIcaSets(icaSets=list(icaSetMainz, icaSetVdx), labAn=c("Mainz","Vdx"), type.corr="pearson", level="genes")
##' }
runCompareIcaSets <- function(icaSets,
                              labAn,
                              type.corr = c("pearson","spearman"),
                              cutoff_zval=0,
                              level = c("genes","features","samples"),
                              fileNodeDescr = NULL,
                              fileDataGraph = NULL,
                              plot = TRUE,
                              title = "",
                              col,
                              cutoff_graph = NULL,
                              useMax = TRUE,
                              tkplot = FALSE
                              ) {

    nb <- NULL

    if (missing(labAn))
        labAn <- paste("An",1:length(icaSets))
    else
        if (length(labAn) != length(icaSets))
            stop("Length of 'labAn' is different from length of 'icaSets'.")
    
    if (!missing(col))
        if (length(col) != length(icaSets))
            stop("Length of 'col' is different from length of 'icaSets'.")
        else
            if (is.null(names(col)) | length(intersect(names(col), labAn)) != length(labAn))
                names(col) <- labAn
    
    type.corr <- type.corr[1]
    
    resCompareAn <- compareAn(icaSets = icaSets, level = level,type.corr= type.corr, labAn = labAn, cutoff_zval=cutoff_zval)
    dataGraph <- compareAn2graphfile(listPairCor=resCompareAn, useMax = useMax, file = fileDataGraph, cutoff = cutoff_graph)
    nbComp <- unlist(lapply(icaSets,function(x) length(indComp(x))))
    nbAn <- length(icaSets)
    icaSet <- NULL
    labComp <- (foreach(icaSet = icaSets, nb = nbComp, an = 1:nbAn) %do% {paste(rep("comp",sum(nb)),indComp(icaSet),sep = "")})

    nodeDescr <- nodeAttrs(nbAn = nbAn, nbComp = nbComp, labComp = labComp, labAn = labAn, file = fileNodeDescr)

    if (plot) {
        res <- plotCorGraph(title = title, dataGraph = dataGraph, nodeName = "indComp", nodeAttrs = nodeDescr, edgeWeight = "cor", col = if(missing(col)) NULL else col, nodeShape = "labAn", reciproCol = "reciprocal", tkplot = tkplot)
    return(list(dataGraph=res$dataGraph, 
                nodeAttrs=res$nodeAttrs,
                graph=res$graph, 
                graphid=res$graphid 
                ))

    }
    else 
        return(list(dataGraph=dataGraph, nodeAttrs = nodeAttrs))
}

Try the MineICA package in your browser

Any scripts or data that you put into this service are public.

MineICA documentation built on Nov. 8, 2020, 5:35 p.m.