mixOmics2: Omics Data Integration Project

Documented in cimDiablo

################################################################################
# Authors:
#   Amrit Singh,
#   Florian Rohart,
#   Kim-Anh Le Cao,
#
# created: 2015
# last modified: 19-08-2016
#
# Copyright (C) 2015
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
################################################################################



################################################
#
## 2) cim_diablo
#
################################################

# This function is a small wrapper of cim.
# For more customisation, please use cim







#' Clustered Image Maps (CIMs) ("heat maps") for DIABLO
#'
#' This function generates color-coded Clustered Image Maps (CIMs) ("heat
#' maps") to represent "high-dimensional" data sets analysed with DIABLO.
#'
#' This function is a small wrapper of \code{link{cim}} specific to the DIABLO
#' framework.
#'
#' @param object An object of class inheriting from \code{"block.splsda"}.
#' @param color a character vector of colors such as that generated by
#' \code{\link{terrain.colors}}, \code{\link{topo.colors}},
#' \code{\link{rainbow}}, \code{\link{color.jet}} or similar functions.
#' @param color.Y a character vector of colors to be used for the levels of the
#' outcome
#' @param color.blocks a character vector of colors to be used for the blocks
#' @param comp positive integer. The similarity matrix is computed based on the
#' variables selected on those specified components. See example. Defaults to
#' \code{comp = 1}.
#' @param margins numeric vector of length two containing the margins (see
#' \code{\link{par}(mar)}) for column and row names respectively.
#' @param legend.position position of the legend, one of "bottomright",
#' "bottom", "bottomleft", "left", "topleft", "top", "topright", "right" and
#' "center".
#' @param transpose logical indicating if the matrix should be transposed for
#' plotting. Defaults to \code{FALSE}.
#' @param row.names,col.names logical, should the name of rows and/or columns
#' of \code{mat} be shown? If \code{TRUE} (defaults) \code{rownames(mat)}
#' and/or \code{colnames(mat)} are used. Possible character vectors with row
#' and/or column labels can be used.
#' @param size.legend size of the legend
#' @return none
#' @author Amrit Singh, Florian Rohart
#' @seealso \code{\link{cim}}, \code{\link{heatmap}}, \code{\link{hclust}},
#' \code{\link{plotVar}}, \code{\link{network}} and
#'
#' \url{http://mixomics.org/mixDIABLO/} for more details on all options
#' available.
#' @references Singh A., Gautier B., Shannon C., Vacher M., Rohart F., Tebbutt
#' S. and Lê Cao K.A. (2016). DIABLO: multi omics integration for biomarker
#' discovery. BioRxiv available here:
#' \url{http://biorxiv.org/content/early/2016/08/03/067611}
#'
#' Eisen, M. B., Spellman, P. T., Brown, P. O. and Botstein, D. (1998). Cluster
#' analysis and display of genome-wide expression patterns. \emph{Proceeding of
#' the National Academy of Sciences of the USA} \bold{95}, 14863-14868.
#'
#' Weinstein, J. N., Myers, T. G., O'Connor, P. M., Friend, S. H., Fornace Jr.,
#' A. J., Kohn, K. W., Fojo, T., Bates, S. E., Rubinstein, L. V., Anderson, N.
#' L., Buolamwini, J. K., van Osdol, W. W., Monks, A. P., Scudiero, D. A.,
#' Sausville, E. A., Zaharevitz, D. W., Bunow, B., Viswanadhan, V. N., Johnson,
#' G. S., Wittes, R. E. and Paull, K. D. (1997). An information-intensive
#' approach to the molecular pharmacology of cancer. \emph{Science} \bold{275},
#' 343-349.
#'
#' González I., Lê Cao K.A., Davis M.J., Déjean S. (2012). Visualising
#' associations between paired 'omics' data sets. \emph{BioData Mining};
#' \bold{5}(1).
#'
#' mixOmics article:
#'
#' Rohart F, Gautier B, Singh A, Lê Cao K-A. mixOmics: an R package for 'omics
#' feature selection and multiple data integration. PLoS Comput Biol 13(11):
#' e1005752
#' @keywords multivariate iplot hplot graphs cluster
#' @examples
#'
#' ## default method: shows cross correlation between 2 data sets
#' #------------------------------------------------------------------
#' Y = nutrimouse$diet
#' data = list(gene = nutrimouse$gene, lipid = nutrimouse$lipid)
#' design = matrix(c(0,1,1,1,0,1,1,1,0), ncol = 3, nrow = 3, byrow = TRUE)
#'
#'
#' nutrimouse.sgccda <- block.splsda(X = data,
#' Y = Y,
#' design = design,
#' keepX = list(gene = c(10,10), lipid = c(15,15)),
#' ncomp = 2,
#' scheme = "centroid")
#'
#' cimDiablo(nutrimouse.sgccda)
#'
#'
#' @export cimDiablo
cimDiablo = function(object,
color = NULL,
color.Y,
color.blocks,
comp = NULL,
margins = c(2, 15),
legend.position="topright",
transpose = FALSE,
row.names = TRUE,
col.names = TRUE,
size.legend=1.5)
{

    # check input object
    if (!is(object, "block.splsda"))
    stop("cimDiablo is only available for 'block.splsda' objects")

    if (length(object$X) <= 1)
    stop("This function is only available when there are more than 3 blocks")
    # so 2 blocks in X + the outcome Y

    ncomp = min(object$ncomp)
    #-- comp
    if(is.null(comp))
    {comp = 1:ncomp}
    if (length(comp) > 1) {
        comp = unique(comp)
        if (!is.numeric(comp) || any(comp < 1))
        stop("invalid vector for 'comp'.", call. = FALSE)
        if (any(comp > ncomp))
        stop("the elements of 'comp' must be smaller or equal than ", ncomp,
        ".", call. = FALSE)
    }

    if (length(comp) == 1) {
        if (is.null(comp) || !is.numeric(comp) || comp <= 0 || comp > ncomp)
        stop("invalid value for 'comp'.", call. = FALSE)
        comp=c(comp,comp)
    }

    comp = round(comp)

    # color
    if(missing(color.Y))
    {
        color.Y = color.mixo(1:nlevels(object$Y))
    } else {
        if(length(color.Y) != nlevels(object$Y))
        stop("'color.Y' needs to be of length ", nlevels(object$Y))
    }


    if(missing(color.blocks))
    {
        color.blocks = brewer.pal(n = 12, name = 'Paired')[seq(2, 12, by = 2)]
    } else {
        if(length(color.blocks) != length(object$X))
        stop("'color.blocks' needs to be of length ", length(object$X))
    }

    X = object$X
    Y = object$Y

    #need to reorder variates and loadings to put 'Y' in last
    indY = object$indY
    object$variates = c(object$variates[-indY], object$variates[indY])
    object$loadings = c(object$loadings[-indY], object$loadings[indY])

    #reducing loadings for ncomp
    object$loadings = lapply(object$loadings, function(x){x[, comp,
        drop=FALSE]})

    keepA = lapply(object$loadings, function(i) apply(abs(i), 1, sum) > 0)
    XDatList = mapply(function(x, y){
        x[, y]
    }, x=X, y=keepA[-length(keepA)], SIMPLIFY=FALSE)
    XDat = do.call(cbind, XDatList)
    XDat[which(XDat > 2)] = 2
    XDat[which(XDat < -2)] = -2

    #dark = brewer.pal(n = 12, name = 'Paired')[seq(2, 12, by = 2)]
    VarLabels = factor(rep(names(X), lapply(keepA[-length(keepA)], sum)),
    levels = names(X))#[order(names(X))])

    ## Plot heatmap
    opar = par()[! names(par()) %in% c("cin", "cra", "csi", "cxy", "din",
        "page")]
    par(mfrow=c(1,1))
    cim(XDat,transpose= transpose, color = color,
    row.names = row.names, col.names = col.names,
    col.sideColors = color.blocks[as.numeric(VarLabels)],
    row.sideColors = color.Y[as.numeric(Y)], margins = margins)

    if(!transpose)
    {
        legend(legend.position,
        c("Rows", c(levels(Y)[order(levels(Y))], "", "Columns", names(X))),
        col = c(1, color.Y, 1, 1,
        color.blocks[1:nlevels(VarLabels)][match(levels(VarLabels), names(X))]),
        pch = c(NA, rep(19, nlevels(Y)), NA, NA, rep(19, nlevels(VarLabels))),
        bty="n",
        cex = size.legend,
        text.font = c(2, rep(1, nlevels(Y)), NA, 2, rep(1, nlevels(VarLabels))))

    } else { # if transpose == TRUE, rows and columns must be switched
        legend(legend.position,
        c("Rows", names(X), "", "Columns", c(levels(Y)[order(levels(Y))])),
        col = c(1, color.blocks[1:nlevels(VarLabels)][match(levels(VarLabels),
        names(X))], 1, 1, color.Y),
        pch = c(NA, rep(19, nlevels(VarLabels)), NA, NA, rep(19, nlevels(Y))),
        bty="n",
        cex = size.legend,
        text.font = c(2, rep(1, nlevels(VarLabels)), NA, 2, rep(1, nlevels(Y))))

    }
    par(opar)

    return(invisible(XDat))
}