R/plotWeights.R
In MOFA: Multi-Omics Factor Analysis (MOFA)

Documented in plotTopWeights plotWeights plotWeightsHeatmap

########################################
## Functions to visualise the weights ##
########################################

#' @title Plot heatmap of the weights
#' @name plotWeightsHeatmap
#' @description Function to visualize the loadings for a given set of factors in a given view. \cr 
#' This is useful to visualize the overall pattern of the weights
#'  but not to individually characterise the factors. \cr
#' To inspect the loadings of individual factors, use the functions
#'  \code{\link{plotWeights}} and \code{\link{plotTopWeights}}
#' @param object a trained \code{\link{MOFAmodel}} object.
#' @param view character vector with the view name(s),
#'  or numeric vector with the index of the view(s) to use. 
#' Default is 'all'
#' @param features character vector with the feature name(s),
#'  or numeric vector with the index of the feature(s) to use. 
#' Default is 'all'
#' @param factors character vector with the factor name(s),
#'  or numeric vector with the index of the factor(s) to use. 
#' Default is 'all'
#' @param threshold threshold on absolute weight values, so that loadings
#'  with a magnitude below this threshold (in all factors) are removed
#' @param ... extra arguments passed to \code{\link[pheatmap]{pheatmap}}.
#' @return produces a heatmap of feature weights for all factors
#' @details The weights, or the loadings, provide the mapping between the high-dimensional space (the genes) and the low-dimensional space (the factors). \cr
#' They define a score for each gene on each factor, such that genes with no association with the factor are expected to 
#' have values close to zero, whereas genes with strong association with the factor are expected to have large absolute values. \cr
#' The sign of the loading indicates the direction of the effect: A positive loading indicates that the feature is more active 
#' in the cells with positive factor values, while a negative loading indicates that the feature is more active in the cells with negative factor values.
#' @importFrom pheatmap pheatmap
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotWeightsHeatmap(MOFA_CLL, view="Mutations")
#' plotWeightsHeatmap(MOFA_CLL, view="Mutations", factors=1:3)
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotWeightsHeatmap(MOFA_scMT, view="RNA expression")
plotWeightsHeatmap <- function(object, view, features = "all", factors = "all", threshold = 0, ...) {
  
  # Sanity checks
  if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
  if (is.numeric(view)) view <- viewNames(object)[view]
  stopifnot(all(view %in% viewNames(object)))  
  
  # Get factors
  if (paste0(factors,collapse="") == "all") {
    factors <- factorNames(object) 
  } else if (is.numeric(factors)) {
    factors <- factorNames(object)[factors]
  } else { 
    stopifnot(all(factors %in% factorNames(object))) 
  }
  
  # Define features
  if (paste(features,collapse="")=="all") { 
    features <- featureNames(object)[[view]]
  } else {
    stopifnot(all(features %in% featureNames(object)[[view]]))  
  }

  # Fetch the weights
  W <- getWeights(object, views=view, factors=factors)[[1]][features,]
  
  # Set title
  # if (is.null(main)) { main <- paste("Loadings of Latent Factors on", view) }
  
  # apply thresholding of loadings
  W <- W[!apply(W,1,function(r) all(abs(r)<threshold)),]
  W <- W[,!apply(W,2,function(r) all(abs(r)<threshold))]

  # Define breaks and center colorscale to white at zero
  minW <- min(W)
  maxW <- max(W)
  paletteLength <- 100
  colors <- c("black", "blue", "white", "orange","red")
  color <- colorRampPalette(colors)(paletteLength)
  breaks <- c(seq(minW, 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(maxW/paletteLength,maxW, length.out=floor(paletteLength/2)))

  # Plot heatmap
  pheatmap(t(W), color=color, breaks=breaks, ...)
}



#' @title Plot Weights
#' @name plotWeights
#' @description An important step to annotate factors is to visualise the feature loadings. \cr
#' This function plots all loadings for a given latent factor and view, optionally labeling the top ones. \cr
#' A similar function is \code{\link{plotTopWeights}}, which displays only the top features with highest loading.
#' @param object a \code{\link{MOFAmodel}} object.
#' @param view character vector with the view name, or numeric vector with the index of the view to use.
#' @param factor character vector with the factor name, or numeric vector with the index of the factor to use.
#' @param nfeatures number of top features to label.
#' @param abs logical indicating whether to use the absolute value of the weights.
#' @param manual A list of character vectors with features to be manually labelled 
#' (i.e. list(c("feature1","feature2"), c("feature3","feature4")). 
#' Each character vector specifies a set of features that will be highlighted with the same color, as specified in \code{color_manual}.
#' @param color_manual a character vector with colors, one for each character vector of \code{manual}
#' @param scale logical indicating whether to scale all loadings from 0 to 1.
#' @return a plot of all feature loadings for the given factor
#' @details The weights, or the loadings, provide the mapping between the high-dimensional space (the genes) and the low-dimensional space (the factors). \cr
#' They define a score for each gene on each factor, such that genes with no association with the factor are expected to 
#' have values close to zero, whereas genes with strong association with the factor are expected to have large absolute values. \cr
#' The sign of the loading indicates the direction of the effect: A positive loading indicates that the feature is more active 
#' in the cells with positive factor values, while a negative loading indicates that the feature is more active in the cells with negative factor values.
#' @import ggplot2 ggrepel
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotWeights(MOFA_CLL, view="Mutations", factor=1)
#' 
#' # Highlight specific features
#' plotWeights(MOFA_CLL, view="Mutations", factor=1,
#'   manual=list(c("IGHV"), c("TP53", "del17p13")), 
#'   color_manual=c("blue","red")
#'  )
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotWeights(MOFA_scMT, view="RNA expression", factor=1, nfeatures=15)
plotWeights <- function(object, view, factor, nfeatures=10,
                        abs=FALSE, manual = NULL, color_manual = NULL, scale = TRUE) {
  
  # Sanity checks
  if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
  if (is.numeric(view)) view <- viewNames(object)[view]
  stopifnot(all(view %in% viewNames(object))) 
  
  # Get factor
  if (is.numeric(factor)) {
      factor <- factorNames(object)[factor]
  } else { 
    stopifnot(factor %in% factorNames(object)) 
  }

  # Get manual features to color by
  if (!is.null(manual)) { 
    stopifnot(is(manual, "list"))
    stopifnot(all(Reduce(intersect,manual) %in% featureNames(object)[[view]]))  
  }
  
  # Fetch the weights
  W <- getWeights(object, views=view, factors=factor, as.data.frame = TRUE)
  W <- W[W$factor==factor & W$view==view,]
  
  # Scale values
  if (scale) W$value <- W$value/max(abs(W$value))
  
  # Parse the weights
  if (abs) W$value <- abs(W$value)
  
  # Filter the weights
  # if(nfeatures=="all") {
  #   nfeatures <- nrow(W)
  # } else {
  #   stopifnot(is(nfeatures, "numeric"))
  # }
  # W <- head(W[order(abs(W$value), decreasing=TRUE),], n=nfeatures)
    
  # Define groups for labelling
  W$group <- "0"
  
  # Define group of features to color according to the loading
  if(nfeatures>0) W$group[abs(W$value) >= sort(abs(W$value), decreasing = TRUE)[nfeatures]] <- "1"
  # if(!is.null(threshold)) W$group[abs(W$value) >= threshold] <- "1"
  
  # Define group of features to label manually
  if(!is.null(manual)) {
    if (is.null(color_manual)) {
      color_manual <- hcl(h = seq(15, 375, length = length(manual) + 1), l = 65, c = 100)[seq_along(manual)]
    } else {
      stopifnot(length(color_manual)==length(manual)) 
    }
    for (m in seq_along(manual)) {
      W$group[W$feature %in% manual[[m]]] <- as.character(m+1)
    }
  }
  
  # Sort by weight 
  W <- W[order(W$value),]
  W$feature <- factor(W$feature, levels=W$feature)
  
  # Define plot title
  # if(is.null(main)) main <- paste("Distribution of weigths of LF", factor, "in", view, "view")
  
  # Generate plot
  W$tmp <- as.character(W$group!="0")
  gg_W <- ggplot(W, aes_string(x="feature", y="value", col="group")) + 
    # scale_y_continuous(expand = c(0.01,0.01)) + scale_x_discrete(expand = c(0.01,0.01)) +
    geom_point(aes_string(size="tmp")) + labs(x="Rank position", y="Loading") +
    geom_hline(yintercept=0, color="black", linetype="dashed") +
    scale_x_discrete(breaks = NULL, expand=c(0.05,0.05)) +
    ggrepel::geom_text_repel(data = W[W$group!="0",], aes_string(label = "feature", col = "group"),
                             segment.alpha=0.1, segment.color="black",
                             segment.size=0.3, box.padding = unit(0.5, "lines"),
                             show.legend= FALSE)
  # Define size
  gg_W <- gg_W + scale_size_manual(values=c(0.5,2)) + guides(size=FALSE)
  
  # Define colors
  cols <- c("grey","black",color_manual)
  gg_W <- gg_W + scale_color_manual(values=cols) + guides(col=FALSE)
  
  # Add Theme  
  gg_W <- gg_W +
    theme(
      # panel.spacing = margin(5,5,5,5),
      # panel.border = element_rect(colour = "black", fill=NA, size=0.75),
      plot.title = element_text(size=rel(1.3), hjust=0.5),
      # axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.text.y = element_text(size=rel(1.3), color="black"),
      axis.title.y = element_text(size=rel(1.5), color="black"),
      axis.ticks.x = element_blank(),

      # white background and dark border
      panel.background = element_rect(fill = "white", colour = NA),
      panel.border     = element_rect(fill = NA, colour = "grey20"),
      # make gridlines dark, same contrast with white as in theme_grey
      panel.grid.major = element_line(colour = "grey92"),
      panel.grid.minor = element_line(colour = "grey92", size = rel(0.5)),
      # contour strips to match panel contour
      strip.background = element_rect(fill = "grey85", colour = "grey20")
    )
  
  return(gg_W)
}


#' @title Plot top weights
#' @name plotTopWeights
#' @description Plot top weights for a given latent factor in a given view.
#' @param object a trained \code{\link{MOFAmodel}} object.
#' @param view character vector with the view name, or numeric vector with the index of the view to use.
#' @param factor character vector with the factor name, or numeric vector with the index of the factor to use.
#' @param nfeatures number of top features to display.
#' Default is 10.
#' @param abs logical indicating whether to use the absolute value of the weights.
#' Default is TRUE.
#' @param sign can be 'positive', 'negative' or 'both' to show only positive,
#'  negative or all weigths, respectively.
#' Default is 'both'.
#' @param scale logical indicating whether to scale all loadings from 0 to 1.
#' Default is TRUE.
#' @details The weights, or the loadings, provide the mapping between the high-dimensional space (the genes) and the low-dimensional space (the factors). \cr
#' They define a score for each gene on each factor, such that genes with no association with the factor are expected to 
#' have values close to zero, whereas genes with strong association with the factor are expected to have large absolute values. \cr
#' The sign of the loading indicates the direction of the effect: A positive loading indicates that the feature is more active 
#' in the cells with positive factor values, while a negative loading indicates that the feature is more active in the cells with negative factor values.
#' @import ggplot2
#' @return Returns a \code{ggplot2} object
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotTopWeights(MOFA_CLL, view="Mutations", factor=1, nfeatures=3)
#' plotTopWeights(MOFA_CLL, view="Mutations", factor=1, nfeatures=3, sign = "positive")
#' plotTopWeights(MOFA_CLL, view="Mutations", factor=1, nfeatures=3, sign = "negative")
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotTopWeights(MOFA_scMT, view="RNA expression", factor=1)
plotTopWeights <- function(object, view, factor, nfeatures = 10, abs = TRUE, scale = TRUE, sign = "both") {
  
  # Sanity checks
  if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
  stopifnot(view %in% viewNames(object))
  # if(!is.null(manual_features)) { stopifnot(is(manual_features, "list"))
  # stopifnot(all(Reduce(intersect,manual_features) %in% featureNames(object)[[view]]))  }
  
  # Collect expectations  
  W <- getWeights(object, factors=factor, views=view, as.data.frame=TRUE)

  # Scale values by loading with highest (absolute) value
  if(scale) W$value <- W$value/max(abs(W$value))

  # store sign
  W <- W[W$value!=0,]
  W$sign <- ifelse(W$value>0, "+", "-")

 # select subset of only positive or negative loadings
  if (sign=="positive") { W <- W[W$value>0,] } else if (sign=="negative") { W <- W[W$value<0,] }

   # Absolute value
  if (abs) W$value <- abs(W$value)

  
  # Extract relevant features
  W <- W[with(W, order(-abs(value))), ]
  if (nfeatures>0) features <- head(W$feature,nfeatures) # Extract top hits
  # if (!is.null(manual_features)) 
  #    features <- W$feature[W$feature %in% manual_features] # Extract manual hits
  W <- W[W$feature %in% features,]
  
  # Sort according to loadings
  W <- W[with(W, order(-value, decreasing = TRUE)), ]
  W$feature <- factor(W$feature, levels=W$feature)
  W$start <- 0
  
  p <- ggplot(W, aes_string(x="feature", y="value")) +
    geom_point(size=2) +
    geom_segment(aes_string(yend="start", xend="feature"), size=0.75) +
    scale_colour_gradient(low="grey", high="black") +
    # scale_colour_manual(values=c("#F8766D","#00BFC4")) +
    # guides(colour = guide_legend(title.position="top", title.hjust = 0.5)) +
    coord_flip() +
    theme(
      axis.title.x = element_text(size=rel(1.5), color='black'),
      axis.title.y = element_blank(),
      axis.text.y = element_text(size=rel(1.2), hjust=1, color='black'),
      axis.text.x = element_text(size=rel(1.5), color='black'),
      axis.ticks.y = element_blank(),
      axis.ticks.x = element_line(),
      legend.position='top',
      # legend.title=element_text(size=rel(1.5), color="black"),
      legend.title=element_blank(),
      legend.text=element_text(size=rel(1.3), color="black"),
      legend.key=element_rect(fill='transparent'),
      panel.background = element_blank(),
      aspect.ratio = .7
      )
  
  if (sign=="negative") p <- p + scale_x_discrete(position = "top")

  # If absolute values are used, add the corresponding signs to the plot
  if (abs) p <- p +  ylim(0,max(W$value)+0.1)+ geom_text(label=W$sign,y=max(W$value)+0.1, size=10)

  if(abs & scale) p <-  p + ylab(paste("Absolute loading on factor", factor))  
  else if(abs & !scale) p <- p + ylab(paste("Absolute loading on factor", factor))
  else if(!abs & scale) p <- p + ylab(paste("Loading on factor", factor))
  else p <- p + ylab(paste("Loading on factor", factor))
  
  return(p)
  
}