mixOmics: Omics Data Integration Project

Documented in cimDiablo

################################################
#
## 2) cim_diablo
#
################################################

# This function is a small wrapper of cim.
# For more customisation, please use cim

#' Clustered Image Maps (CIMs) ("heat maps") for DIABLO
#' 
#' This function generates color-coded Clustered Image Maps (CIMs) ("heat
#' maps") to represent "high-dimensional" data sets analysed with DIABLO.
#' 
#' This function is a small wrapper of \code{\link{cim}} specific to the DIABLO
#' framework.
#' 
#' @param object An object of class inheriting from \code{"block.splsda"}.
#' @param color a character vector of colors such as that generated by
#' \code{\link{terrain.colors}}, \code{\link{topo.colors}},
#' \code{\link{rainbow}}, \code{\link{color.jet}} or similar functions.
#' @param color.Y a character vector of colors to be used for the levels of the
#' outcome
#' @param color.blocks a character vector of colors to be used for the blocks
#' @param comp positive integer. The similarity matrix is computed based on the
#' variables selected on those specified components. See example. Defaults to
#' \code{comp = 1}.
#' @param margins numeric vector of length two containing the margins (see
#' \code{\link{par}(mar)}) for column and row names respectively.
#' @param legend.position position of the legend, one of "bottomright",
#' "bottom", "bottomleft", "left", "topleft", "top", "topright", "right" and
#' "center".
#' @param transpose logical indicating if the matrix should be transposed for
#' plotting. Defaults to \code{FALSE}.
#' @param row.names,col.names logical, should the name of rows and/or columns
#' of \code{mat} be shown? If \code{TRUE} (defaults) \code{rownames(mat)}
#' and/or \code{colnames(mat)} are used. Possible character vectors with row
#' and/or column labels can be used.
#' @param size.legend size of the legend
#' @param trim (Logical or numeric) If FALSE, values are not changed. If TRUE,
#'   the values are trimmed to 3 standard deviation range. If a numeric,
#'   values with absolute values greater than the provided values are trimmed.
#' @param ... Other valid arguments passed to \code{cim}.
#' @inherit cim return
#' @author Amrit Singh, Florian Rohart, Kim-Anh Lê Cao, Al J Abadi
#' @seealso \code{\link{cim}}, \code{\link{heatmap}}, \code{\link{hclust}},
#' \code{\link{plotVar}}, \code{\link{network}} and
#' 
#' \url{http://mixomics.org/mixDIABLO/} for more details on all options
#' available.
#' @references 
#' Singh A., Shannon C., Gautier B., Rohart F., Vacher M., Tebbutt S.
#' and Lê Cao K.A. (2019), DIABLO: an integrative approach for identifying key 
#' molecular drivers from multi-omics assays, Bioinformatics, 
#' Volume 35, Issue 17, 1 September 2019, Pages 3055–3062.
#' 
#' Eisen, M. B., Spellman, P. T., Brown, P. O. and Botstein, D. (1998). Cluster
#' analysis and display of genome-wide expression patterns. \emph{Proceeding of
#' the National Academy of Sciences of the USA} \bold{95}, 14863-14868.
#' 
#' Weinstein, J. N., Myers, T. G., O'Connor, P. M., Friend, S. H., Fornace Jr.,
#' A. J., Kohn, K. W., Fojo, T., Bates, S. E., Rubinstein, L. V., Anderson, N.
#' L., Buolamwini, J. K., van Osdol, W. W., Monks, A. P., Scudiero, D. A.,
#' Sausville, E. A., Zaharevitz, D. W., Bunow, B., Viswanadhan, V. N., Johnson,
#' G. S., Wittes, R. E. and Paull, K. D. (1997). An information-intensive
#' approach to the molecular pharmacology of cancer. \emph{Science} \bold{275},
#' 343-349.
#' 
#' González I., Lê Cao K.A., Davis M.J., Déjean S. (2012). Visualising
#' associations between paired 'omics' data sets. \emph{BioData Mining};
#' \bold{5}(1).
#' 
#' mixOmics article:
#' 
#' Rohart F, Gautier B, Singh A, Lê Cao K-A. mixOmics: an R package for 'omics
#' feature selection and multiple data integration. PLoS Comput Biol 13(11):
#' e1005752
#' @keywords multivariate iplot hplot graphs cluster
#' @example ./examples/cimDiablo-examples.R
#' @export
cimDiablo = function(object,
                     color = NULL,
                     color.Y,
                     color.blocks,
                     comp = NULL,
                     margins = c(2, 15),
                     legend.position = "topright",
                     transpose = FALSE,
                     row.names = TRUE,
                     col.names = TRUE,
                     size.legend = 1.5,
                     trim = TRUE,
                     ...)
{
    # check input object
    if (!is(object, "block.splsda"))
        stop("cimDiablo is only available for 'block.splsda' objects")
    
    if (length(object$X) <= 1)
        stop("This function is only available when there are more than 3 blocks")
    # so 2 blocks in X + the outcome Y
    
    ncomp = min(object$ncomp)
    #-- comp
    if (is.null(comp))
    {
        comp = 1:ncomp
    }
    if (length(comp) > 1) {
        comp = unique(comp)
        if (!is.numeric(comp) || any(comp < 1))
            stop("invalid vector for 'comp'.", call. = FALSE)
        if (any(comp > ncomp))
            stop("the elements of 'comp' must be <= ",
                 ncomp,
                 ".",
                 call. = FALSE)
    }
    
    if (length(comp) == 1) {
        if (is.null(comp) || !is.numeric(comp) || comp <= 0 || comp > ncomp)
            stop("invalid value for 'comp'.", call. = FALSE)
    }
    
    comp = round(comp)
    
    # color
    if (missing(color.Y))
    {
        if (nlevels(object$Y) <= 10)
            color.Y = color.mixo(1:nlevels(object$Y))
        else
            color.Y <- color.jet(nlevels(object$Y))
    } else {
        if (length(color.Y) != nlevels(object$Y))
            stop("'color.Y' needs to be of length ", nlevels(object$Y))
    }
    
    
    if (missing(color.blocks))
    {
        color.blocks = brewer.pal(n = 12, name = 'Paired')[seq(2, 12, by = 2)]
    } else {
        if (length(color.blocks) != length(object$X))
            stop("'color.blocks' needs to be of length ", length(object$X))
    }
    
    X = object$X
    Y = object$Y
    
    #need to reorder variates and loadings to put 'Y' in last
    indY = object$indY
    object$variates = c(object$variates[-indY], object$variates[indY])
    object$loadings = c(object$loadings[-indY], object$loadings[indY])
    
    #reducing loadings for ncomp
    object$loadings = lapply(object$loadings, function(x) {
        x[, comp,
          drop = FALSE]
    })
    
    keepA = lapply(object$loadings, function(i)
        apply(abs(i), 1, sum) > 0)
    XDatList = mapply(function(x, y) {
        x[, y]
    },
    x = X,
    y = keepA[-length(keepA)],
    SIMPLIFY = FALSE)
    XDat = do.call(cbind, XDatList)
    
    XDat[is.na(XDat)] <- 0 # XDat is centred so this is equiv. to imputation by the mean
    
    ## --- trim outlier values
    if (is.numeric(trim) | isTRUE(trim))
    {
        trim <- if (isTRUE(trim)) 3 else abs(trim)
        cat(sprintf("\ntrimming values to [-%s, %s] range for cim visualisation. See 'trim' arg in ?cimDiablo\n", trim, trim))
        trim <- abs(trim)
        XDat[which(XDat > trim)] = trim # to avoid huge effects from outliers
        XDat[which(XDat < -trim)] = -trim
    } else if (!isFALSE(trim))
    {
        stop("'trim' must be either logical or numeric")
    }
  
    #dark = brewer.pal(n = 12, name = 'Paired')[seq(2, 12, by = 2)]
    VarLabels = factor(rep(names(keepA[-length(keepA)]), lapply(keepA[-length(keepA)], sum)),
                       levels = names(X))#[order(names(X))])
    
    ## Plot heatmap
    opar = par()[!names(par()) %in% c("cin", "cra", "csi", "cxy", "din",
                                      "page")]
    par(mfrow = c(1, 1))
    res <- cim(
        XDat,
        transpose = transpose,
        color = color,
        row.names = row.names,
        col.names = col.names,
        col.sideColors = color.blocks[as.numeric(VarLabels)],
        row.sideColors = color.Y[as.numeric(Y)],
        margins = margins,
        ...
    )
    
    if (!transpose)
    {
        legend(
            legend.position,
            c("Rows", c(
                levels(Y)[order(levels(Y))], "", "Columns", names(X)
            )),
            col = c(1, color.Y, 1, 1,
                    color.blocks[1:nlevels(VarLabels)][match(levels(VarLabels), names(X))]),
            pch = c(NA, rep(19, nlevels(Y)), NA, NA, rep(19, nlevels(VarLabels))),
            bty = "n",
            cex = size.legend,
            text.font = c(2, rep(1, nlevels(Y)), NA, 2, rep(1, nlevels(VarLabels)))
        )
        
    } else {
        # if transpose == TRUE, rows and columns must be switched
        legend(
            legend.position,
            c("Rows", names(X), "", "Columns", c(levels(Y)[order(levels(Y))])),
            col = c(1, color.blocks[1:nlevels(VarLabels)][match(levels(VarLabels),
                                                                names(X))], 1, 1, color.Y),
            pch = c(NA, rep(19, nlevels(VarLabels)), NA, NA, rep(19, nlevels(Y))),
            bty = "n",
            cex = size.legend,
            text.font = c(2, rep(1, nlevels(VarLabels)), NA, 2, rep(1, nlevels(Y)))
        )
        
    }
    par(opar)
    
    return(invisible(res))
}