R/plotNhoods.R

Defines functions plotNhoodExpressionDA plotNhoodGroups plotNhoodGraphDA plotNhoodGraph .valid_nhood plotNhoodSizeHist

Documented in plotNhoodExpressionDA plotNhoodGraph plotNhoodGraphDA plotNhoodGroups plotNhoodSizeHist

###########################
### MILO PLOTTING UTILS ###
###########################


#' Plot histogram of neighbourhood sizes
#'
#' This function plots the histogram of the number of cells belonging to
#' each neighbourhood
#'
#' @param milo A \code{\linkS4class{Milo}} object with a non-empty \code{nhoods}
#' slot.
#' @param bins number of bins for \code{geom_histogram}
#'
#' @return A \code{ggplot-class} object
#'
#' @author
#' Emma Dann
#'
#' @examples
#'
#' require(igraph)
#' require(SingleCellExperiment)
#' ux.1 <- matrix(rpois(12000, 5), ncol=400)
#' ux.2 <- matrix(rpois(12000, 4), ncol=400)
#' ux <- rbind(ux.1, ux.2)
#' vx <- log2(ux + 1)
#' pca <- prcomp(t(vx))
#'
#' sce <- SingleCellExperiment(assays=list(counts=ux, logcounts=vx),
#'                             reducedDims=SimpleList(PCA=pca$x))
#' colnames(sce) <- paste0("Cell", seq_len(ncol(sce)))
#' milo <- Milo(sce)
#' milo <- buildGraph(milo, k=20, d=10, transposed=TRUE)
#'
#' milo <- makeNhoods(milo, d=10, prop=0.1)
#' plotNhoodSizeHist(milo)
#'

#' @export
#' @rdname plotNhoodSizeHist
#' @importFrom ggplot2 ggplot geom_histogram xlab theme_classic
#' @importFrom igraph neighbors
#' @importFrom grDevices colorRampPalette
plotNhoodSizeHist <- function(milo, bins=50){
  if (! isTRUE(.valid_nhood(milo))){
    stop("Not a valid Milo object - nhoods are missing. Please run makeNhoods() first.")
  }
  df <- data.frame(nh_size=colSums(nhoods(milo)))

  ggplot(data=df, aes(nh_size)) + geom_histogram(bins=bins) +
    xlab("Neighbourhood size") +
    theme_classic(base_size = 16)
}


#' @importFrom igraph is_igraph
.valid_nhood <- function(milo){
  # check for a valid nhood slot
  n_neigh <- ncol(nhoods(milo))
  is_not_empty <- n_neigh > 0
  if (is_not_empty) {
    # is_graph_vx <- is(milo@nhoods[[sample(seq_len(n_neigh), 1)]], "igraph.vs")
    # is_numeric_vc <- is(milo@nhoods[[sample(seq_len(n_neigh), 1)]], "numeric")
    # if (isTRUE(is_igraph_vx) | isTRUE(is_numeric_vc)){
      TRUE
    # } else {
    #     FALSE
    #   }
  } else {
    FALSE
  }
}


### Plotting neighbourhood graph ###

#' Plot graph of neighbourhood
#'
#' Visualize graph of neighbourhoods
#'
#' @param x A \code{\linkS4class{Milo}} object
#' @param layout this can be (a) a character indicating the name of the \code{reducedDim} slot in the
#' \code{\linkS4class{Milo}} object to use for layout (default: 'UMAP') (b) an igraph layout object
#' @param colour_by this can be a data.frame of milo results or a character corresponding to a column in colData
#' @param subset.nhoods A logical, integer or character vector indicating a subset of nhoods to show in plot
#' (default: NULL, no subsetting). This is necessary if \code{testNhoods} was run using \code{subset.nhoods=...}.
#' @param size_range a numeric vector indicating the range of node sizes to use for plotting (to avoid overplotting
#' in the graph)
#' @param node_stroke a numeric indicating the desired thickness of the border around each node
#' @param ... arguments to pass to \code{ggraph}
#'
#' @return a \code{ggplot-class} object
#'
#' @author Emma Dann
#'
#' @examples
#' NULL
#'
#' @export
#' @rdname plotNhoodGraph
#' @import igraph
#' @import ggraph
#' @importFrom SummarizedExperiment colData<-
#' @importFrom RColorBrewer brewer.pal
plotNhoodGraph <- function(x, layout="UMAP", colour_by=NA, subset.nhoods=NULL, size_range=c(0.5,3),
                           node_stroke= 0.3, ... ){
  ## Check for valid nhoodGraph object
  if(!.valid_graph(nhoodGraph(x))){
    stop("Not a valid Milo object - neighbourhood graph is missing. Please run buildNhoodGraph() first.")
  }
  if (is.character(layout)) {
    if (!layout %in% names(reducedDims(x))) {
      stop(layout, "isn't in readucedDim(x) - choose a different layout")
    }
  }
  nh_graph <- nhoodGraph(x)

  ## Subset
  if (!is.null(subset.nhoods)) {
    nh_graph <- igraph::induced_subgraph(nh_graph, vids = which(as.numeric(V(nh_graph)$name) %in% unlist(nhoodIndex(x)[subset.nhoods])))
  }


  ## Order vertex ids by size (so big nhoods are plotted first)
  nh_graph <- permute(nh_graph, order(vertex_attr(nh_graph)$size, decreasing=TRUE))

  ## Define layout
  if (is.character(layout)) {
    redDim <- layout
    layout <- reducedDim(x, redDim)[as.numeric(vertex_attr(nh_graph)$name),]
    # make sure this is a matrix!
    if(!any(class(layout) %in% c("matrix"))){
        warning("Coercing layout to matrix format")
        layout <- as(layout, "matrix")
    }
  }

  ## Define node color
  if (!is.na(colour_by)) {
    if (colour_by %in% colnames(colData(x))) {
        if(is.factor(colData(x)[, colour_by])){
            col_levels <- levels(colData(x)[, colour_by])
            col_vals <- as.character(colData(x)[as.numeric(vertex_attr(nh_graph)$name), colour_by])
            col_vals <- factor(col_vals, levels=col_levels)
        } else{
            col_vals <- colData(x)[as.numeric(vertex_attr(nh_graph)$name), colour_by]
        }

      if(!is.factor(col_vals)){
          if(!is.numeric(col_vals)) {
              col_vals <- as.character(col_vals)
          }
      }
      V(nh_graph)$colour_by <- col_vals
    } else {
      stop(colour_by, "is not a column in colData(x)")
    }
  } else {
    V(nh_graph)$colour_by <- V(nh_graph)$size
    colour_by <- "Nhood size"
  }

  if(colour_by %in% c("logFC")){
    plot.g <- simplify(nh_graph)

    pl <- ggraph(simplify(nh_graph), layout = layout) +
      geom_edge_link0(aes(width = weight), edge_colour = "grey66", edge_alpha=0.2) +
      geom_node_point(aes(fill = colour_by, size = size), shape=21, stroke=node_stroke) +
      scale_size(range =size_range, name="Nhood size") +
      scale_edge_width(range = c(0.2,3), name="overlap size") +
      theme_classic(base_size=14) +
      theme(axis.line = element_blank(), axis.text = element_blank(),
            axis.ticks = element_blank(), axis.title = element_blank())
    # theme_graph()

  } else{
    pl <- ggraph(simplify(nh_graph), layout = layout) +
      geom_edge_link0(aes(width = weight), edge_colour = "grey66", edge_alpha=0.2) +
      geom_node_point(aes(fill = colour_by, size = size), shape=21, stroke=node_stroke) +
      scale_size(range = size_range, name="Nhood size") +
      scale_edge_width(range = c(0.2,3), name="overlap size") +
      theme_classic(base_size=14) +
      theme(axis.line = element_blank(), axis.text = element_blank(),
            axis.ticks = element_blank(), axis.title = element_blank())
      # theme_graph()
  }

  if (is.numeric(V(nh_graph)$colour_by)) {
    pl <- pl + scale_fill_gradient2(name=colour_by)
  } else {
      if(is.factor(V(nh_graph)$colour_by)){
          mycolors <- colorRampPalette(brewer.pal(11, "Spectral"))(length(levels(V(nh_graph)$colour_by)))
          } else{
              mycolors <- colorRampPalette(brewer.pal(11, "Spectral"))(length(unique(V(nh_graph)$colour_by)))
          }
      pl <- pl + scale_fill_manual(values=mycolors, name=colour_by, na.value="white")
  }
  return(pl)
  }

#' Plot Milo results on graph of neighbourhood
#'
#' Visualize log-FC estimated with differential nhood abundance testing
#' on embedding of original single-cell dataset.
#'
#' @param x A \code{\linkS4class{Milo}} object
#' @param milo_res a data.frame of milo results
#' @param alpha significance level for Spatial FDR (default: 0.05)
#' @param res_column which column of \code{milo_res} object to use for color (default: logFC)
#' @param ... arguments to pass to \code{plotNhoodGraph}
#'
#' @return a \code{ggplot} object
#'
#' @author Emma Dann
#'
#' @examples
#' NULL
#'
#' @export
#' @rdname plotNhoodGraphDA
#' @import igraph
plotNhoodGraphDA <- function(x, milo_res, alpha=0.05, res_column = "logFC", ... ){
  if(!.valid_graph(nhoodGraph(x))){
    stop("Not a valid Milo object - neighbourhood graph is missing. Please run buildNhoodGraph() first.")
  }
  if (is.character(layout)) {
    if (!layout %in% names(reducedDims(x))) {
      stop(layout, "is not in readucedDim(x) - choose a different layout")
    }
  }

  ## Add milo results to colData
  signif_res <- milo_res
  signif_res[signif_res$SpatialFDR > alpha,res_column] <- 0
  colData(x)[res_column] <- NA

  # this needs to handle nhood subsetting.
  if(any(names(list(...)) %in% c("subset.nhoods"))){
      subset.nhoods <- list(...)$subset.nhoods
      sub.indices <- nhoodIndex(x)[subset.nhoods]
      colData(x)[unlist(sub.indices[signif_res$Nhood]), res_column] <- signif_res[,res_column]
  } else{
      colData(x)[unlist(nhoodIndex(x)[signif_res$Nhood]),res_column] <- signif_res[,res_column]
  }


  ## Plot logFC
  plotNhoodGraph(x, colour_by = res_column, ... )
}

#' Plot graph of neighbourhoods coloring by nhoodGroups
#'
#' Visualize grouping of neighbourhoods obtained with \code{groupNhoods}
#'
#' @param x A \code{\linkS4class{Milo}} object
#' @param milo_res a data.frame of milo results containing the \code{nhoodGroup} column
#' @param show_groups a character vector indicating which groups to plot
#' all other neighbourhoods will be gray
#' @param ... arguments to pass to \code{plotNhoodGraph}
#'
#' @return a \code{ggplot} object
#'
#' @author Emma Dann
#'
#' @examples
#' NULL
#'
#' @export
#' @rdname plotNhoodGroups
#' @import igraph
plotNhoodGroups <- function(x, milo_res, show_groups=NULL, ... ){
  if(!.valid_graph(nhoodGraph(x))){
    stop("Not a valid Milo object - neighbourhood graph is missing. Please run buildNhoodGraph() first.")
  }
  if (is.character(layout)) {
    if (!layout %in% names(reducedDims(x))) {
      stop(layout, "is not in reducedDim(x) - choose a different layout")
    }
  }

  if (!"NhoodGroup" %in% colnames(milo_res)) {
    stop("'NhoodGroup' columns is missing from milo_res. Please run groupNhoods() or define neighbourhood groupings otherwise.")
  }

  ## Add groups to colData
  # signif_res <- milo_res
  # signif_res[signif_res$SpatialFDR > alpha,res_column] <- 0
  if (!is.null(show_groups)) {
    plot_groups <- show_groups
  } else {
    plot_groups <- unique(milo_res$NhoodGroup)
  }
  colData(x)["NhoodGroup"] <- NA
  groups_res <- milo_res[milo_res$NhoodGroup %in% plot_groups,]
  colData(x)[unlist(nhoodIndex(x)[groups_res$Nhood]),"NhoodGroup"] <- groups_res$NhoodGroup

  ## Plot logFC
  # allow override of colour_by aesthetic
  if(length(list(...))){
      if(any(names(list(...)) %in% c("colour_by"))){
          pl <- plotNhoodGraph(x, ... )
      } else{
          pl <- plotNhoodGraph(x, colour_by = "NhoodGroup", ... )
          }
  } else{
      pl <- plotNhoodGraph(x, colour_by = "NhoodGroup", ... )
  }

  return(pl)
}

#' Visualize gene expression in neighbourhoods
#'
#' Plots the average gene expression in neighbourhoods, sorted by DA fold-change
#'
#' @param x A \code{\linkS4class{Milo}} object
#' @param da.res a data.frame of DA testing results
#' @param features a character vector of features to plot (they must be in rownames(x))
#' @param alpha significance level for Spatial FDR (default: 0.1)
#' @param subset.nhoods A logical, integer or character vector indicating a subset of nhoods to show in plot
#' (default: NULL, no subsetting)
#' @param cluster_features logical indicating whether features should be clustered with hierarchical clustering.
#' If FALSE then the order in \code{features} is maintained (default: FALSE)
#' @param assay A character scalar that describes the assay slot to use for calculating neighbourhood expression.
#' (default: logcounts)
#' Of note: neighbourhood expression will be computed only if the requested features are not in the \code{nhoodExpression} slot
#' of the milo object. If you wish to plot average neighbourhood expression from a different assay, you should run
#' \code{calcNhoodExpression(x)} with the desired assay.
#' @param scale_to_1 A logical scalar to re-scale gene expression values between 0 and 1 for visualisation.
#' @param show_rownames A logical scalar whether to plot rownames or not. Generally useful to set this to
#' \code{show_rownames=FALSE} when plotting many genes.
#' @param highlight_features A character vector of feature names that should be highlighted on the right side of
#' the heatmap. Generally useful in conjunction to \code{show_rownames=FALSE}, if you are interested in only a few
#' features
#' @return a \code{ggplot} object
#'
#' @author Emma Dann
#'
#' @examples
#' NULL
#'
#' @export
#' @rdname plotNhoodExpressionDA
#' @import ggplot2
#' @import patchwork
#' @importFrom dplyr mutate left_join filter percent_rank first group_by summarise
#' @importFrom ggrepel geom_text_repel
#' @importFrom tidyr pivot_longer
#' @importFrom stats hclust
#' @importFrom tibble rownames_to_column
#' @importFrom stringr str_replace
plotNhoodExpressionDA <- function(x, da.res, features, alpha=0.1,
                                  subset.nhoods=NULL, cluster_features=FALSE, assay="logcounts",
                                  scale_to_1 = FALSE,
                                  show_rownames=TRUE,
                                  highlight_features = NULL){
  if (length(features) <= 0 | is.null(features)) {
    stop("features is empty")
  }
  ## Check if features are in rownames(x)
  if (!all(features %in% rownames(x))) {
    stop("Some features are not in rownames(x)")
  }
  ## Check if nhood expression exists
  if (dim(nhoodExpression(x))[2] == 1){
    warning("Nothing in nhoodExpression(x): computing for requested features...")
    x <- calcNhoodExpression(x, assay = assay, subset.row = features)
  }
  ## Check if all features are in nhoodExpression
  if (!all(features %in% rownames(nhoodExpression(x)))) {
    warning("Not all features in nhoodExpression(x): recomputing for requested features...")
    x <- calcNhoodExpression(x, assay = assay, subset.row = features)
  }

  expr_mat <- nhoodExpression(x)[features, ]
  colnames(expr_mat) <- seq_len(ncol(nhoods(x)))

  ## Get nhood expression matrix
  if (!is.null(subset.nhoods)) {
      expr_mat <- expr_mat[,subset.nhoods, drop=FALSE]
  }

  if (!isFALSE(scale_to_1)) {
      expr_mat <- t(apply(expr_mat, 1, function(X) (X - min(X))/(max(X)- min(X))))
      # force NAs to 0?
      if(sum(is.na(expr_mat)) > 0){
          warning("NA values found - resetting to 0")
          expr_mat[is.na(expr_mat)] <- 0
      }
  }

  rownames(expr_mat) <- sub(pattern = "-", replacement = ".", rownames(expr_mat)) ## To avoid problems when converting to data.frame

  pl_df <- data.frame(t(expr_mat)) %>%
    rownames_to_column("Nhood") %>%
    mutate(Nhood=as.double(Nhood)) %>%
    left_join(da.res, by="Nhood") %>%
    mutate(logFC_rank=percent_rank(logFC))

  ## Top plot: nhoods ranked by DA log FC
  pl_top <- pl_df %>%
    mutate(is_signif = ifelse(SpatialFDR < alpha, paste0("SpatialFDR < ", alpha), NA)) %>%
    ggplot(aes(logFC_rank, logFC)) +
    geom_hline(yintercept = 0, linetype=2) +
    geom_point(size=0.2, color="grey") +
    geom_point(data=.%>% filter(!is.na(is_signif)), aes(color=is_signif), size=1) +
    theme_bw(base_size=16) +
    ylab("DA logFC") +
    scale_color_manual(values="red", name="") +
    scale_x_continuous(expand = c(0.01, 0)) +
    theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())

  ## Bottom plot: gene expression heatmap
  if (isTRUE(cluster_features)) {
      row.order <- hclust(dist(expr_mat))$order # clustering
      ordered_features <- rownames(expr_mat)[row.order]
  } else {
    ordered_features <- rownames(expr_mat)
  }

  # this code assumes that colnames do not begin with numeric values
  # add 'X' to feature names with numeric first characters
  rownames(expr_mat) <- str_replace(rownames(expr_mat), pattern="(^[0-9]+)", replacement="X\\1")

  pl_df <- pl_df %>%
    pivot_longer(cols=rownames(expr_mat), names_to='feature', values_to="avg_expr") %>%
    mutate(feature=factor(feature, levels=ordered_features))

  if (!is.null(highlight_features)) {
    if (!all(highlight_features %in% pl_df$feature)){
      missing <- highlight_features[which(!highlight_features %in% pl_df$feature)]
      warning('Some elements of highlight_features are not in features and will not be highlighted. \nMissing features: ',
              paste(missing, collapse = ', ') )
    }
    pl_df <- pl_df %>%
      mutate(label=ifelse(feature %in% highlight_features, as.character(feature), NA))
  }

  pl_bottom <- pl_df %>%
    ggplot(aes(logFC_rank, feature, fill=avg_expr)) +
    geom_tile() +
    scale_fill_viridis_c(option="magma", name="Avg.Expr.") +
    xlab("Neighbourhoods") + ylab("Features") +
    scale_x_continuous(expand = c(0.01, 0)) +
    theme_classic(base_size = 16) +
    coord_cartesian(clip="off") +
    theme(axis.text.x = element_blank(), axis.line.x = element_blank(), axis.ticks.x = element_blank(),
          axis.line.y = element_blank(), axis.ticks.y = element_blank(),
          panel.spacing = margin(2, 2, 2, 2, "cm"),
          legend.margin=margin(0,0,0,0),
          legend.box.margin=margin(10,10,10,10)
          )

  if (!is.null(highlight_features)) {
    pl_bottom <- pl_bottom +
      geom_text_repel(data=. %>%
                        filter(!is.na(label)) %>%
                        group_by(label) %>%
                        summarise(logFC_rank=max(logFC_rank), avg_expr=mean(avg_expr), feature=first(feature)),
                      aes(label=label, x=logFC_rank),
                      size=4,
                      xlim = c(max(pl_df$logFC_rank) + 0.01, max(pl_df$logFC_rank) + 0.02),
                      min.segment.length = 0,
                      max.overlaps=Inf,
                      seed=42
      )

  }

  if(isFALSE(show_rownames)){
      pl_bottom <- pl_bottom +
          theme(axis.text.y=element_blank())
  }

  ## Assemble plot
  (pl_top / pl_bottom) +
    plot_layout(heights = c(1,4), guides = "collect") &
    theme(legend.justification=c(0, 1),
          legend.margin = margin(0,0,0,50))
}


#' Visualize gene expression in neighbourhood groups
#'
#' Plots the average gene expression in neighbourhood groups
#'
#' @param x A \code{\linkS4class{Milo}} object
#' @param da.res a data.frame of DA testing results
#' @param features a character vector of features to plot (they must be in rownames(x))
#' @param subset.nhoods A logical, integer or character vector indicating a subset of nhoods to show in plot
#' (default: NULL, no subsetting)
#' @param cluster_features logical indicating whether features should be clustered with hierarchical clustering.
#' If FALSE then the order in \code{features} is maintained (default: FALSE)
#' @param assay A character scalar that describes the assay slot to use for calculating neighbourhood expression.
#' (default: logcounts)
#' Of note: neighbourhood expression will be computed only if the requested features are not in the \code{nhoodExpression} slot
#' of the milo object. If you wish to plot average neighbourhood expression from a different assay, you should run
#' \code{calcNhoodExpression(x)} with the desired assay.
#' @param scale_to_1 A logical scalar to re-scale gene expression values between 0 and 1 for visualisation.
#' @param show_rownames A logical scalar whether to plot rownames or not. Generally useful to set this to
#' \code{show_rownames=FALSE} when plotting many genes.
#' @param highlight_features A character vector of feature names that should be highlighted on the right side of
#' the heatmap. Generally useful in conjunction to \code{show_rownames=FALSE}, if you are interested in only a few
#' features
#' @param grid.space a character setting the \code{space} parameter for \code{facet.grid} (\code{'fixed'} for equally sized facets,
#' \code{'free'} to adapt the size of facent to number of neighbourhoods in group)
#'
#' @return a \code{ggplot} object
#'
#' @author Emma Dann
#'
#' @examples
#' NULL
#'
#' @export
#' @rdname plotNhoodExpressionDA
#' @import ggplot2
#' @importFrom dplyr mutate left_join filter first group_by summarise ungroup
#' @importFrom ggrepel geom_text_repel
#' @importFrom tidyr pivot_longer
#' @importFrom stats hclust
#' @importFrom tibble rownames_to_column
#' @importFrom stringr str_replace
plotNhoodExpressionGroups <- function(x, da.res, features, alpha=0.1,
                                      subset.nhoods=NULL, cluster_features=FALSE, assay="logcounts",
                                      scale_to_1 = FALSE,
                                      show_rownames=TRUE,
                                      highlight_features = NULL,
                                      grid.space="free"){
  if (length(features) <= 0 | is.null(features)) {
    stop("features is empty")
  }
  ## Check if features are in rownames(x)
  if (!all(features %in% rownames(x))) {
    stop("Some features are not in rownames(x)")
  }
  ## Check if nhood expression exists
  if (dim(nhoodExpression(x))[2] == 1){
    warning("Nothing in nhoodExpression(x): computing for requested features...")
    x <- calcNhoodExpression(x, assay = assay, subset.row = features)
  }
  ## Check if all features are in nhoodExpression
  if (!all(features %in% rownames(nhoodExpression(x)))) {
    warning("Not all features in nhoodExpression(x): recomputing for requested features...")
    x <- calcNhoodExpression(x, assay = assay, subset.row = features)
  }

  expr_mat <- nhoodExpression(x)[features, ]
  colnames(expr_mat) <- seq_len(ncol(nhoods(x)))

  ## Get nhood expression matrix
  if (!is.null(subset.nhoods)) {
    expr_mat <- expr_mat[,subset.nhoods, drop=FALSE]
  }

  if (!isFALSE(scale_to_1)) {
    expr_mat <- t(apply(expr_mat, 1, function(X) (X - min(X))/(max(X)- min(X))))
    # force NAs to 0?
    if(sum(is.na(expr_mat)) > 0){
      warning("NA values found - resetting to 0")
      expr_mat[is.na(expr_mat)] <- 0
    }
  }

  rownames(expr_mat) <- sub(pattern = "-", replacement = ".", rownames(expr_mat)) ## To avoid problems when converting to data.frame

  pl_df <- data.frame(t(expr_mat)) %>%
    rownames_to_column("Nhood") %>%
    mutate(Nhood=as.double(Nhood)) %>%
    left_join(da.res, by="Nhood") %>%
    group_by(NhoodGroup) %>%
    mutate(logFC_rank=rank(logFC, ties.method="random")) %>%
    ungroup()

  ## plot: gene expression heatmap
  if (isTRUE(cluster_features)) {
    row.order <- hclust(dist(expr_mat))$order # clustering
    ordered_features <- rownames(expr_mat)[row.order]
  } else {
    ordered_features <- rownames(expr_mat)
  }

  # this code assumes that colnames do not begin with numeric values
  # add 'X' to feature names with numeric first characters
  rownames(expr_mat) <- str_replace(rownames(expr_mat), pattern="(^[0-9]+)", replacement="X\\1")

  pl_df <- pl_df %>%
    pivot_longer(cols=rownames(expr_mat), names_to='feature', values_to="avg_expr") %>%
    mutate(feature=factor(feature, levels=ordered_features))

  if (!is.null(highlight_features)) {
    if (!all(highlight_features %in% pl_df$feature)){
      missing <- highlight_features[which(!highlight_features %in% pl_df$feature)]
      warning('Some elements of highlight_features are not in features and will not be highlighted. \nMissing features: ', paste(missing, collapse = ', ') )
    }
    pl_df <- pl_df %>%
      mutate(label=ifelse(feature %in% highlight_features, as.character(feature), NA))
  }

  pl_bottom <- pl_df %>%
    ggplot(aes(logFC_rank, feature, fill=avg_expr)) +
    geom_tile() +
    scale_fill_viridis_c(option="magma", name="Avg.Expr.") +
    xlab("Neighbourhoods") + ylab("Features") +
    scale_x_continuous(expand = c(0.01, 0)) +
    theme_classic(base_size = 16) +
    coord_cartesian(clip="off") +
    facet_grid(.~NhoodGroup, labeller = "label_both", scales="free", space=grid.space) +
    theme(axis.text.x = element_blank(), axis.line.x = element_blank(), axis.ticks.x = element_blank(),
          axis.line.y = element_blank(), axis.ticks.y = element_blank(),
          # panel.spacing = margin(2, 2, 2, 2, "cm"),
          legend.margin=margin(0,0,0,0),
          legend.box.margin=margin(10,10,10,10)
    )

  if (!is.null(highlight_features)) {
    pl_bottom <- pl_bottom +
      geom_text_repel(data=. %>%
                        filter(!is.na(label)) %>%
                        group_by(label) %>%
                        summarise(logFC_rank=max(logFC_rank), avg_expr=mean(avg_expr), feature=first(feature)),
                      aes(label=label, x=logFC_rank),
                      size=4,
                      xlim = c(max(pl_df$logFC_rank) + 0.01, max(pl_df$logFC_rank) + 0.02),
                      min.segment.length = 0,
                      seed=42
      )

  }

  if(isFALSE(show_rownames)){
    pl_bottom <- pl_bottom +
      theme(axis.text.y=element_blank())
  }

  pl_bottom
}


#' Visualize DA results as a beeswarm plot
#'
#' This function constructs a beeswarm plot using the ggplot engine to visualise the distribution of
#' log fold changes across neighbourhood annotations.
#' @param da.res a data.frame of DA testing results
#' @param group.by a character scalar determining which column of \code{da.res} to use for grouping.
#' This can be a column added to the DA testing results using the `annotateNhoods` function.
#' If \code{da.res[,group.by]} is a character or a numeric, the function will coerce it to a factor (see details)
#' (default: NULL, no grouping)
#' @param alpha significance level for Spatial FDR (default: 0.1)
#' @param subset.nhoods A logical, integer or character vector indicating a subset of nhoods to show in plot
#' (default: NULL, no subsetting)
#'
#' @return a \code{ggplot} object
#'
#' @details The group.by variable will be coerced to a factor. If you want the variables in group.by to be
#' in a given order make sure you set the column to a factor with the levels in the right order before running the
#' function.
#'
#' @author Emma Dann
#'
#' @examples
#' NULL
#'
#' @export
#' @rdname plotDAbeeswarm
#' @import ggplot2
#' @importFrom dplyr mutate filter arrange
#' @importFrom ggbeeswarm geom_quasirandom
plotDAbeeswarm <- function(da.res, group.by=NULL, alpha=0.1, subset.nhoods=NULL){
  if (!is.null(group.by)) {
    if (!group.by %in% colnames(da.res)) {
      stop(group.by, " is not a column in da.res. Have you forgot to run annotateNhoods(x, da.res, ", group.by,")?")
    }
    if (is.numeric(da.res[,group.by])) {
      # stop(group.by, " is a numeric variable. Please bin to use for grouping.")
    }
    da.res <- mutate(da.res, group_by = da.res[,group.by])
  } else {
    da.res <- mutate(da.res, group_by = "g1")
  }

  if (!is.factor(da.res[,"group_by"])) {
    message("Converting group_by to factor...")
    da.res <- mutate(da.res, group_by = factor(group_by, levels=unique(group_by)))
    # anno_vec <- factor(anno_vec, levels=unique(anno_vec))
  }

  if (!is.null(subset.nhoods)) {
    da.res <- da.res[subset.nhoods,]
  }

  # Get position with ggbeeswarm
  beeswarm_pos <- ggplot_build(
    da.res %>%
      mutate(is_signif = ifelse(SpatialFDR < alpha, 1, 0)) %>%
      arrange(group_by) %>%
      ggplot(aes(group_by, logFC)) +
      geom_quasirandom()
  )

  pos_x <- beeswarm_pos$data[[1]]$x
  pos_y <- beeswarm_pos$data[[1]]$y

  n_groups <- unique(da.res$group_by) %>% length()

  da.res %>%
    mutate(is_signif = ifelse(SpatialFDR < alpha, 1, 0)) %>%
    mutate(logFC_color = ifelse(is_signif==1, logFC, NA)) %>%
    arrange(group_by) %>%
    mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
    mutate(pos_x = pos_x, pos_y=pos_y) %>%
    ggplot(aes(pos_x, pos_y, color=logFC_color)) +
    scale_color_gradient2() +
    guides(color="none") +
    xlab(group.by) + ylab("Log Fold Change") +
    scale_x_continuous(
      breaks = seq(1,n_groups),
      labels = setNames(levels(da.res$group_by), seq(1,n_groups))
      ) +
    geom_point() +
    coord_flip() +
    theme_bw(base_size=22) +
    theme(strip.text.y =  element_text(angle=0))

}


#' Visualize DA results as an MAplot
#'
#' Make an MAplot to visualise the relationship between DA log fold changes and neighbourhood abundance. This
#' is a useful way to diagnose issues with the DA testing, such as large compositional biases and/or issues
#' relating to large imbalances in numbers of cells between condition labels/levels.
#' @param da.res A data.frame of DA testing results
#' @param null.mean A numeric scalar determining the expected value of the log fold change under the null
#' hypothesis. \code{default=0}.
#' @param alpha A numeric scalar that represents the Spatial FDR threshold for statistical significance.
#'
#' @return a \code{ggplot} object
#'
#' @details MA plots provide a useful means to evaluate the distribution of log fold changes after differential
#' abundance testing. In particular, they can be used to diagnose global shifts that occur in the presence of
#' confounding between the number of cells acquired and the experimental variable of interest. The expected null
#' value for the log FC distribution (grey dashed line), along with the mean observed log fold change for non-DA
#' neighbourhoods (purple dashed line) are plotted for reference. The deviation between these two lines can give
#' an indication of biases in the results, such as in the presence of a single strong region of DA leading to an
#' increase in false positive DA neighbourhoods in the opposite direction.
#'
#' @author Mike Morgan
#'
#' @examples
#' NULL
#'
#' @export
#' @rdname plotNhoodMA
#' @import ggplot2
#' @importFrom cowplot theme_cowplot
plotNhoodMA <- function(da.res, alpha=0.05, null.mean=0){
  if(isFALSE(any(class(da.res) %in% c("tibble", "data.frame")))){
    stop("Input `da.res` not recognised - please input the results data.frame from DA testing.")
  }

  if(!is.numeric(alpha)){
    stop("alpha is not numeric - please provide a numeric value for the Spatial FDR threshold")
  }

  if(!is.numeric(null.mean)){
    stop("typeof null.mean not recoginised, please provide a numeric value")
  }

  if(isFALSE(!all(colnames(da.res) %in% c("SpatialFDR", "logCPM", "logFC")))){
    stop("Please provide a results data.frame containing the logCPM, logFC and SpatialFDR results")
  }

  sig.cols <- c("black", "red")
  names(sig.cols) <- c(FALSE, TRUE)
  max.lfc <- max(abs(da.res$logFC))
  max.eps <- max.lfc * 0.1

  emp.null <- mean(da.res$logFC[da.res$SpatialFDR >= alpha])
  min.x <- min(da.res$logCPM)
  minx.eps <- min.x * 0.01
  max.x <- max(da.res$logCPM)
  maxx.eps <- max.x * 0.01

  da.res$Sig <- da.res$SpatialFDR < alpha
  ma.p <- ggplot(da.res, aes(x=logCPM, y=logFC, colour=Sig)) +
    geom_hline(yintercept=null.mean, lty=2, colour='grey80') +
    geom_hline(yintercept=emp.null, lty=2, colour='purple') +
    geom_point() +
    theme_cowplot() +
    scale_colour_manual(values=sig.cols) +
    scale_y_continuous(limits=c(-max.lfc - max.eps, max.lfc + max.eps)) +
    scale_x_continuous(limits=c(min.x-minx.eps, max.x + maxx.eps)) +
    labs(x="Mean log scaled counts", y="Log fold-change") +
    guides(colour=guide_legend(title=paste0("SpatialFDR < ", alpha))) +
    NULL

  return(ma.p)
}


#' Plot the number of cells in a neighbourhood per sample and condition
#'
#' @param x A \code{\linkS4class{Milo}} object with a non-empty \code{nhoodCounts}
#' slot.
#' @param subset.nhoods A logical, integer or character vector indicating the rows of \code{nhoodCounts(x)} to use for
#' plotting. If you use a logical vector, make sure the length matches \code{nrow(nhoodCounts(x))}.
#' @param design.df A \code{data.frame} which matches samples to a condition of interest.
#' The row names should correspond to the samples. You can use the same \code{design.df}
#' that you already used in the \code{testNhoods} function.
#' @param condition String specifying the condition of interest Has to be a column in the \code{design}.
#' @param n_col Number of columns in the output \code{ggplot}.
#'
#' @return A \code{ggplot-class} object
#'
#' @author
#' Nick Hirschmüller
#'
#' @examples
#'
#' require(SingleCellExperiment)
#' ux.1 <- matrix(rpois(12000, 5), ncol=300)
#' ux.2 <- matrix(rpois(12000, 4), ncol=300)
#' ux <- rbind(ux.1, ux.2)
#' vx <- log2(ux + 1)
#' pca <- prcomp(t(vx))
#'
#' sce <- SingleCellExperiment(assays=list(counts=ux, logcounts=vx),
#'                             reducedDims=SimpleList(PCA=pca$x))
#' milo <- Milo(sce)
#' milo <- buildGraph(milo, k=20, d=10, transposed=TRUE)
#' milo <- makeNhoods(milo, k=20, d=10, prop=0.3)
#' milo <- calcNhoodDistance(milo, d=10)
#'
#' cond <- sample(c("A","B","C"),300,replace=TRUE)
#'
#' meta.df <- data.frame(Condition=cond, Replicate=c(rep("R1", 100), rep("R2", 100), rep("R3", 100)))
#' meta.df$SampID <- paste(meta.df$Condition, meta.df$Replicate, sep="_")
#' milo <- countCells(milo, meta.data=meta.df, samples="SampID")
#'
#' design.mtx <- data.frame("Condition"=c(rep("A", 3), rep("B", 3), rep("C",3)),
#'                          "Replicate"=rep(c("R1", "R2", "R3"), 3))
#' design.mtx$SampID <- paste(design.mtx$Condition, design.mtx$Replicate, sep="_")
#' rownames(design.mtx) <- design.mtx$SampID
#'
#' plotNhoodCounts(x = milo,
#'                 subset.nhoods = c(1,2),
#'                 design.df = design.mtx,
#'                 condition = "Condition")
#'
#' @export
#' @rdname plotNhoodCounts
#' @importFrom ggplot2 ggplot geom_point stat_summary facet_wrap ylab
#' @importFrom tidyr pivot_longer
#' @importFrom tibble rownames_to_column has_rownames
#' @importFrom dplyr left_join
plotNhoodCounts <- function(x, subset.nhoods, design.df, condition, n_col=3){
  if (!is(x, "Milo")) {
    stop("Unrecognised input type - must be of class Milo")
  }
  if (ncol(nhoods(x)) == 1 & nrow(nhoods(x)) == 1) {
    stop("No neighbourhoods found. Please run makeNhoods() first.")
  }
  if (ncol(nhoodCounts(x)) == 1 & nrow(nhoodCounts(x)) == 1) {
    stop("Neighbourhood counts missing - please run countCells() first")
  }
  if (is(subset.nhoods, "logical")){
    if (length(subset.nhoods) != nrow(nhoodCounts(x))){
      stop("Length of the logical vector has to match number of rows in nhoodCounts(x)")
    }
  } else if (!all(subset.nhoods %in% rownames(nhoodCounts(x)))) {
      stop("Specified subset.nhoods do not exist - ",
      "these should either be an integer or character vector corresponding to row names in nhoodCounts(x) ",
      "or a logical vector with length nrow(nhoodCounts(x)).")
  }
  if (!is(design.df,"data.frame") | !has_rownames(design.df)){
    stop("The design.df has to be of type data.frame with rownames that correspond to the samples.")
  }
  if (!condition %in% colnames(design.df)){
    stop("Condition of interest has to be a column in the design matrix")
  }

  nhood.counts.df <- data.frame(as.matrix(nhoodCounts(x)[subset.nhoods, , drop=FALSE]))
  nhood.counts.df <- rownames_to_column(nhood.counts.df, "subset.nhoods.id")
  nhood.counts.df.long <- pivot_longer(nhood.counts.df,
                                       cols=-1, # pivot all columns into longer format, except the first one.
                                       names_to = "experiment",
                                       values_to = "values")

  tmp.desgin <- rownames_to_column(design.df, "experiment")[,c("experiment", condition)]
  colnames(tmp.desgin) <- c("experiment", "cond")

  nhood.counts.df.long <- left_join(nhood.counts.df.long,
                                    tmp.desgin,
                                    by="experiment")
  nhood.counts.df.long$subset.nhoods.id <- paste("Nhood:", nhood.counts.df.long$subset.nhoods.id)

  p <- ggplot(nhood.counts.df.long, aes(x=cond, y=values)) +
      geom_point()+
    stat_summary(fun="mean", geom="crossbar",
                 mapping=aes(ymin=after_stat(y), ymax=after_stat(y)), width=0.22,
                 position=position_dodge(), show.legend = FALSE, color="red")+
    facet_wrap(~subset.nhoods.id, ncol = n_col)+
    labs(x=condition, y="# cells in neighbourhood") +
      NULL

  return(p)
}
MikeDMorgan/miloR documentation built on Feb. 29, 2024, 6:20 p.m.