R/visualize.R

#' @title geneHeatmap
#' @description Heatmap plot for selected genes
#' @param obj Seurat object
#' @param genes selected genes.
#' @param label stamp for output file name
#' @param annotate features to display on top of cells
#' @param minScale minimum value to scale the matrix
#' @param maxScale maximum value to scale the matrix
#' @param cluster_rows clustering by rows
#' @param cluster_cols clustering by cols
#' @param cutree_rows cut the rows into specified groups
#' @param cutree_cols cut the cols into specified groups
#' @param clustering_distance_rows method to measure distance
#' @param clustering_method clustering method
#' @param show_rownames show row names
#' @param show_colnames show column names
#' @param fontsize_row size of row names
#' @examples
#' \dontrun{
#' geneHeatmap(obj, genes, label, annotate)
#' }
#' @export
#' @import pheatmap
#' @import Seurat
geneHeatmap <- function(
  obj,
  genes,
  label,
  annotate,
  minScale = -2.5,
  maxScale = 2.5,
  cluster_rows = T,
  cluster_cols = F,
  cutree_rows = NA,
  cutree_cols = NA,
  clustering_distance_rows = "euclidean",
  clustering_method = "ward.D2",
  show_rownames = T,
  show_colnames= F,
  fontsize_row = 5
){
  sub.data <- as.matrix(obj@scale.data[genes, order(obj@ident)])
  if(length(genes) == 1){
    sub.data <- t(sub.data)
    row.names(sub.data) <- genes
    cluster_rows <- F
    cutree_rows <- NA
  }
  sub.obj <- ScaleData(obj, display.progress =F)
  sub.data[ sub.data < minScale] <- minScale
  sub.data[ sub.data > maxScale] <- maxScale
  annotate.df <- sapply(annotate, function(x){
    if( x == "ident"){
      as.character(slot(sub.obj, x))
    } else {
      as.character(sub.obj@meta.data[[x]])
    }
  })
  annotate.df <- as.data.frame(annotate.df, row.names=row.names(sub.obj@meta.data),stringsAsFactors=F)[order(sub.obj@ident),]
  gaps_col <- match(unique(annotate.df[[1]]), annotate.df[[1]]) - 1
  gaps_col <- gaps_col[-1]
  pdf(paste0("./heatmap.", label, ".pdf"))
  pheatmap(
    sub.data,
    color=gplots::colorpanel(n=100, low="#FF00FF",mid="#000000", high="#FFFF00"),
    cluster_rows=cluster_rows, cluster_cols=cluster_cols,
    cutree_rows = cutree_rows, cutree_cols = cutree_cols,
    clustering_distance_rows = clustering_distance_rows,
    clustering_method = clustering_method,
    show_rownames = show_rownames, show_colnames = show_colnames,
    fontsize_row = fontsize_row,
    annotation_col = annotate.df,
    gaps_col = gaps_col
  )
  dev.off()
}


#' @title volcanoPlot
#' @description volcanoPlot plot for genes between two groups
#' @param DEGs differentially expressed gene matrix obtained from 'FindMarkers'.
#' @param adjP cutoff of adjusted p value to define DEGs
#' @param fc fold change to define DEGs
#' @param ymax maximum -log10(adjP) to show on y axis, default: 5.
#' @param showSymbols show gene symbols for DEGs
#' @param label stamp for output file name, default: date.
#' @param width figure width, default: 7
#' @param heigth figure height, default: 5
#' @examples
#' \dontrun{
#' volcanoPlot(DEGs)
#' }
#' @export
#' @import ggrepel
#' @import ggplot2

volcanoPlot <- function(DEGs, adjP=0.05, fc=log(2), ymax=5, showSymbols=FALSE, label=suppressWarnings(Sys.Date()), width=7, height=7){
  up.num <- sum(DEGs$p_val_adj <= adjP & DEGs$avg_logFC >= fc)
  down.num <- sum(DEGs$p_val_adj <= adjP & DEGs$avg_logFC <= -fc)
  DEGs$UpDown <- "Stable"
  DEGs$UpDown[DEGs$p_val_adj <= adjP & DEGs$avg_logFC >= fc] <- "Up"
  DEGs$UpDown[DEGs$p_val_adj <= adjP & DEGs$avg_logFC <= -fc] <- "Down"
  DEGs$p_val_adj <- -log10(DEGs$p_val_adj)
  theme.publication <- theme(panel.background = element_rect(fill = "white", colour = "black"),
                             axis.title=element_text(size=rel(1.5)), plot.title=element_text(hjust=0.5, size=rel(2)))
  p <- ggplot(DEGs, aes(x=avg_logFC, y=p_val_adj)) +
    geom_point(data=subset(DEGs, UpDown == "Stable"), pch=21, size=1, aes(fill=factor(UpDown)), color="grey") +
    geom_point(data=subset(DEGs, UpDown != "Stable"), pch=21, aes(fill=factor(UpDown)), color="grey") +
    scale_fill_manual( values=c("Up" = "red", "Down" = "blue", "Stable" = "grey"),
                       breaks=c("Up", "Down", "Stable"),
                       labels=c(paste(up.num, "Up"), paste(down.num, "Down"), "Not Sig")) +
    xlab("Expression fold change") + ylab("-log10(adjusted p value)") +
    labs(title=label, fill=paste(sum(up.num, down.num), "significant DEGs")) +
    geom_vline(xintercept=c(-fc, fc), color="grey", linetype="dotted") +
    geom_hline(yintercept=-log10(adjP), color="grey", linetype="dotted")
  if(isTRUE(showSymbols)){
    p <- p + geom_text_repel(data = subset(DEGs, UpDown != "Stable"),
                             aes(label=gene), size=5, box.padding = 0.25, point.padding = 0.3)
  }
  p + theme.publication
  ggsave(paste0(label, ".volcano.pdf"), width=width, height=height)
}


#' @title barPlot
#' @description barplot of gene expression values across different groups of cells
#' @param obj Seurat object
#' @param genes selected genes.
#' @param groupBy specify attributes in meta.data to group cells
#' @param label stamp for output file name
#' @param maxColumn maximum column number to show legend
#' @param width figure width, default: 7
#' @param heigth figure height, default: 5
#' @examples
#' \dontrun{
#' barPlot(obj, genes, groupBy, label)
#' }
#' @export

barPlot <- function(obj, genes, groupBy, label, maxColumn=6, width=7, height=5){
  genes <- genes[genes %in% row.names(obj@data)]
  gene.num <- length(genes)
  cells <- list()
  for(i in levels(unique(obj@meta.data[[groupBy]]))){
    cells[[i]] <- which(obj@meta.data[[groupBy]] %in% i)
  }
  this.data <- obj@data[genes, as.numeric(unlist(cells))]
  cell.colors <- colorPick(cells)
  # plot
  pdf(file=paste0(label, ".barplot.pdf"), width=width, height=height)
  par(mar=c(1,1,1,2), oma=c(2,1,2,3))
  layout(matrix(c(1, rep(2, gene.num), 3, seq(4, length.out=gene.num)), ncol = 2), width=c(1.5,8.5))

  # panel 1, null
  plot(x=0,y=0, xlim=c(0,1), type = "n", axes = F, xlab = "", ylab = "")
  # panel 2, ylab
  plot(x=0,y=0, xlim=c(0,1), ylim=c(0,1), type = "n", axes = F, xlab = "", ylab = "", yaxs = "i")
  text(x=1, y=0.5, adj=c(0.5,1), srt=90, "Relative expression", xpd=T, cex=1.5)
  # panel 3, figure legend
  plot(x=0,y=0, xlim=c(0,1), type = "n", axes = F, xlab = "", ylab = "")
  legend.ncol <- length(cells)
  if (legend.ncol > maxColumn){ legend.ncol <- maxColumn}
  legend("bottom", legend = names(cells), col = unique(cell.colors), bty = "n", lwd=2, xpd = T, ncol=legend.ncol)
  # main figure
  if(is.null(nrow(this.data))){
    barplot(height = this.data, names.arg = F, col = cell.colors, border = cell.colors, lwd=2, las=1)
    mtext(side=3, text = genes)
  } else {
    for(j in 1:gene.num){
      barplot(height = this.data[j,], names.arg = F, col = cell.colors, border = cell.colors, lwd=2, las=1)
      mtext(side=3, text = row.names(this.data)[j])
    }
  }
  invisible(dev.off())
}

#' @title boxPlot
#' @description boxplot of gene expression values across different groups of cells
#' @param obj Seurat object
#' @param genes selected genes.
#' @param groupBy specify attributes in meta.data to group cells
#' @param label stamp for output file name
#' @param maxColumn maximum column number to show legend
#' @param genePerRow number of genes displayed in each row, only when forEach is TRUE.
#' @param forEach show boxplot for each gene. default: FALSE, as a gene set.
#' @param width figure width, default: 7
#' @param heigth figure height, default: 5
#' @examples
#' \dontrun{
#' barPlot(obj, genes, groupBy, label)
#' }
#' @export

boxPlot <- function(obj, genes, groupBy, label, maxColumn=6, genePerRow=3, forEach=F, width=7, height=5){
  genes <- genes[genes %in% row.names(obj@data)]
  gene.num <- length(genes)
  cells <- list()
  grps.names <- levels(obj@meta.data[[groupBy]])
  for(i in grps.names){
    cells[[i]] <- which(obj@meta.data[[groupBy]] %in% i)
  }
  this.data <- obj@data[genes, as.numeric(unlist(cells))]
  if(!isTRUE(forEach) && gene.num > 1){
    this.data <- Matrix::colSums(this.data)
    gene.num <- 1
  }
  cell.colors <- colorPick(cells)
  # groups and colors
  grps <- unlist(sapply(1:length(cells), function(x){
    rep(names(cells)[x], length(cells[[x]]))
  }))
  grps.col <- unique(cell.colors)
  # plot
  pdf(file=paste0(label, ".boxplot.pdf"), width=width, height=height)
  par(mar=c(1,1,1,2), oma=c(2,1,2,3))
  layout(matrix(c(rep(1, gene.num), seq(2, length.out=gene.num)), ncol = 2), widths =c(1.5,8.5))
  
  # panel 1, ylab
  plot(x=0,y=0, xlim=c(0,1), ylim=c(0,1), type = "n", axes = F, xlab = "", ylab = "", yaxs = "i")
  text(x=1, y=0.5, adj=c(0.5,1), srt=90, "Relative expression", xpd=T, cex=1.5)
  
  # main figure
  if(is.null(nrow(this.data))){
    this.gene.data <- data.frame(data=this.data, grps=grps)
    this.gene.data$grps <- factor(this.gene.data$grps, levels=grps.names)
    boxplot(data~grps, data=this.gene.data, col = grps.col, border = grps.col,
            las=1, outline=F, show.names=T, frame.plot=F, medcol="white", boxwex=0.5)
    mtext(side=3, text = label)
  } else {
    for(j in 1:gene.num){
      this.gene.data <- data.frame(data=this.data[j,], grps=grps)
      this.gene.data$grps <- factor(this.gene.data$grps, levels=grps.names)
      boxplot(data~grps, data=this.gene.data, col = grps.col, border = grps.col,
              las=1, outline=F, show.names=ifelse(j==gene.num, T, F), frame.plot=F, medcol="white", boxwex=0.5)
      mtext(side=3, text = row.names(this.data)[j])
    }
  }
  invisible(dev.off())
}


#' @title colorPick
#' @description pick colors for list of cells
#' @param cell index of list of cells
#' @examples
#' \dontrun{
#' colorPick(cell)
#' }
#' @import RColorBrewer
#' @export

colorPick <- function(cells){
  cols <- colorRampPalette(brewer.pal(n=9, name = "Set1"))(length(cells))
  col.cells <- c()
  for( i in 1:length(cells)){
    col.cells <- c(col.cells, rep(cols[i], length(cells[[i]])))
  }
  return(col.cells)
}
zhupinglab/Aodav documentation built on June 10, 2019, 2:31 a.m.