R/plot.R
In ssMutPA: Single-Sample Mutation-Based Pathway Analysis

Documented in dotplot get_heatmap mountain_plot Oncoplot

#' @title Plotting a heatmap with subtype labels.
#' @description The function `get_heatmap` is used to plot a heatmap with subtype labels.
#' @param Path_ES Single-sample mutation-based pathway enrichment score profiles.The file can be generated by the function `get_RWR_ES`.
#' @param Path_name The names of the pathways that you want to show in the heatmap.The Path_name must be included in the row names of the Path_ES .
#' @param samp_class A vector containing information about the subtype labels.The vector can be generated by the function `get_samp_class`.
#' @param scale character indicating if the values should be centered and scaled in either the row direction or the column direction, or none. Corresponding values are "row", "column" and "none".
#' @param cluster_rows Boolean values determining if rows should be clustered or hclust object.
#' @param cluster_cols Boolean values determining if columns should be clustered or hclust object.
#' @param show_rownames Boolean specifying if row names are be shown.
#' @param show_colnames Boolean specifying if column names are be shown.
#' @param fontsize base font size for the plot.
#' @param annotation_legend Boolean value showing if the legend for annotation tracks should be drawn .
#' @param annotation_names_row Boolean value showing if the names for row annotation tracks should be drawn.
#' @param annotation_names_col Boolean value showing if the names for column annotation tracks should be drawn.
#' @importFrom grDevices colorRampPalette
#' @importFrom pheatmap pheatmap
#' @return A heatmap
#' @export
#' @examples
#' #load the data
#' data(Path_ES,sample_class,Path_Name)
#' #perform the function `get_heatmap`.
#' get_heatmap(Path_ES,Path_name=Path_Name,samp_class=sample_class)
get_heatmap<-function(Path_ES,Path_name,samp_class,scale = "row",cluster_rows = TRUE,cluster_cols = FALSE,show_rownames = TRUE,
                      show_colnames = FALSE,fontsize = 8,annotation_legend = TRUE,annotation_names_row = TRUE,annotation_names_col = TRUE){
  Path_ES<-Path_ES[Path_name,]
  inter<-intersect(names(samp_class),colnames(Path_ES))
  samp_class<-as.data.frame(samp_class[inter])
  colnames(samp_class)<-"sample_class"
  Path_ES<-Path_ES[,inter]
  heat_data<-c()
  for (i in c(unique(samp_class[,1]))) {
    heat_data<-cbind(heat_data,Path_ES[,rownames(samp_class)[which(samp_class[,1]%in%i)]])
  }
  samp_class[,1]<-as.character(samp_class[,1])
  Num_class<-length(unique(samp_class[,1]))
  if(Num_class==2){
    ann_colors = list(
      sample_class = c("1" = "#00468b", "2" = "#ED0000")
    )
  } else if(Num_class==3){
    ann_colors = list(
      sample_class = c("1" = "#00468b", "2" = "#ED0000", "3" = "#42B540")
    )
  }else{
    sample_class <- colorRampPalette(c("#00468b","white","#ED0000"))(Num_class)
    names(sample_class)<-unique(samp_class[,1])
    ann_colors = list(
      sample_class
    )
  }
  pheatmap(heat_data,
           color = colorRampPalette(c("darkblue","blue", "white","red","darkred"))(50),
           annotation_colors =  ann_colors,
           scale = scale,
           cluster_rows = cluster_rows,
           cluster_cols = cluster_cols,
           legend = TRUE,
           show_rownames = show_rownames,
           show_colnames = show_colnames,
           fontsize = fontsize,
           annotation_col = samp_class,
           annotation_legend = annotation_legend,
           annotation_names_row = annotation_names_row,
           annotation_names_col = annotation_names_col
  )
}


#' @title Plotting the density ridges plot.
#' @description The function `mountain_plot` is used to draw a graph to reflect the distribution of the data.
#' @param data A pathway activity score matrix,which row names represent the pathways and the column names are samples.
#' @param sample_class A vector containing subtype labels of the samples.
#' @param Path_name The names of the pathways that you want to show in the graph.The 'Path_name' must be included in the row names of the data.
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 scale_y_discrete
#' @importFrom ggplot2 scale_x_continuous
#' @importFrom ggplot2 scale_fill_manual
#' @importFrom ggplot2 scale_color_manual
#' @importFrom ggplot2 scale_discrete_manual
#' @importFrom ggplot2 guides
#' @importFrom ggplot2 guide_legend
#' @importFrom ggplot2 ggtitle
#' @importFrom ggridges theme_ridges
#' @importFrom ggridges geom_density_ridges
#' @importFrom ggridges position_points_jitter
#' @return Density ridges plot
#' @export
#' @examples
#' #load the data
#' data(Path_ES,sample_class)
#' #perform the function `mountain_plot`.
#' mountain_plot(data=Path_ES,sample_class=sample_class,Path_name=rownames(Path_ES)[c(12,20,74)])
mountain_plot<-function(data,sample_class,Path_name){
  data<-data[Path_name,]
  inter<-intersect(colnames(data),names(sample_class))
  if(length(inter)==0){
    message("The data does not have a corresponding class label!")
  }
  data<-data[,inter]
  sample_class<-sample_class[inter]
  plot_data<-c()
  for (i in 1:nrow(data)) {
    mount_data<-as.data.frame(cbind(Pathway=rep(rownames(data)[i],ncol(data)),
                                    activity_score=data[i,],
                                    class=sample_class))
    plot_data<-rbind(plot_data,mount_data)
  }
  plot_data$activity_score<-as.numeric(plot_data$activity_score)
  if(length(unique(plot_data$class))==2){
    ggplot(plot_data, aes(x=plot_data$activity_score, y=plot_data$Pathway, color=plot_data$class, point_color=plot_data$class, fill=plot_data$class)) +
      geom_density_ridges(
        jittered_points=TRUE, scale = .95, rel_min_height = .01,
        point_shape = "|", point_size = 1, size = 0.05,
        position = position_points_jitter(height = 0)
      ) +
      scale_y_discrete(expand = c(.03, 0)) +
      scale_x_continuous(expand = c(0, 0), name = "Enrichment score") +
      scale_fill_manual(values = c("#00468B50", "#ED000050"), labels = c("Class 1", "Class 2")) +
      scale_color_manual(values = c("#00468B", "#ED0000"), guide = "none") +
      scale_discrete_manual("point_color", values = c("#00468B", "#ED0000"), guide = "none") +
      guides(fill = guide_legend(
        override.aes = list(
          fill = c("#00468B50", "#ED000050"),
          color = NA, point_color = NA))
      ) +
      ggtitle("The plot of pathways")
  }else{
    ggplot(plot_data, aes(x=plot_data$activity_score, y=plot_data$Pathway, color=plot_data$class, point_color=plot_data$class, fill=plot_data$class)) +
      geom_density_ridges(
        jittered_points=TRUE, scale = .95, rel_min_height = .01,
        point_shape = "|", point_size = 1, size = 0.05,
        position = position_points_jitter(height = 0)
      ) +
      scale_y_discrete(expand = c(.03, 0)) +
      scale_x_continuous(expand = c(0, 0), name = "Enrichment score") +
      scale_fill_manual(values = colorRampPalette(c("#00468b","lightblue"))(length(unique(plot_data$class)))) +
      scale_color_manual(values = colorRampPalette(c("#00468b","lightblue"))(length(unique(plot_data$class))), guide = "none") +
      scale_discrete_manual("point_color", values =colorRampPalette(c("#00468b","lightblue"))(length(unique(plot_data$class))), guide = "none") +
      guides(fill = guide_legend(
        override.aes = list(
          fill = colorRampPalette(c("#00468b","lightblue"))(length(unique(plot_data$class))),
          color = NA, point_color = NA))
      ) +
      ggtitle("The plot of pathways")
  }

}



#' @title Plotting the Dot plot.
#' @description The function is used to draw a graph to reflect the univariate HRs and P-values of the pathways in different cancer types.
#' @param data A pathway activity score matrix, which rows represent the pathways and the columns are samples.
#' @param low_col,high_col Colours for low and high ends of the gradient.
#' @param cut_point The threshold of HRs,when HR is greater than the cut_point,HR is assigned cut_point.
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 geom_tile
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 theme_bw
#' @importFrom ggplot2 scale_fill_gradient2
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 labs
#' @return A Dot plot
#' @export
#' @examples
#' #load the data
#' data(dot_data)
#' #perform the function `dotplot`.
#' dotplot(dot_data)
dotplot<-function(data,low_col="#6ADD26",high_col="#AB2513",cut_point=5){
  data<- cbind(data,-log10(data$p.value))
  colnames(data)[5]<-"-log10Pval"
  data[,4][which(data[,4]>cut_point)]<-cut_point
  data[,4][which(data[,4]< -cut_point)]<- -cut_point
  ggplot(data = data) +
    labs(x="Cancer_type",y="Pathways")+
    geom_tile(data = data,aes(x = data$cancer_type,y = data$pathway,fill = data$HR),size = 2,color = "white") +
    geom_point(aes(x = data$cancer_type,y = data$pathway,size = data$`-log10Pval`),color = "#2F4858")  +
    theme_bw() +
    scale_fill_gradient2(low = low_col,high = high_col,midpoint = 1,na.value ="#FFFFFF") +
    theme(axis.title = element_text(size = 20),
          axis.text = element_text(size = 15),
          axis.text.x = element_text(angle = 45,vjust = 0.5,hjust = 0.5))
}




#' @title Drawing a waterfall plot of a particular pathway.
#' @description Load the data in MAF format and draw a waterfall plot.
#' @param maf A data of MAF format.
#' @param samp_class A vector containing subtype labels of the samples.
#' @param sur A matrix containing the samples' survival time and survival status.
#' @param mut_status A binary mutations matrix.The file can be generated by the function `get_mut_status`.
#' @param pathway A list containing pathway information .
#' @param pathway_name The names of the pathways that you want to visualize.For example "JAK-STAT signaling pathway".
#' @param isTCGA Is input MAF file from TCGA source? If TRUE uses only first 12 characters from Tumor_Sample_Barcode.
#' @param top How many top genes to be drawn,genes are arranged from high to low depending on the frequency of mutations. defaults to 20.
#' @param clinicalFeatures Columns names from 'clinical.data' slot of MAF to be drawn in the plot.
#' @param class_col The color of sample class .
#' @param event_col The color of survival status .
#' @param sortByAnnotation Logical sort oncomatrix (samples) by provided 'clinicalFeatures'. Sorts based on first 'clinicalFeatures'. Defaults to TRUE. column-sort.
#' @param gene_mar Margin width for gene names.
#' @param removeNonMutated Logical. If TRUE removes samples with no mutations in the GenePathwayOncoplots for better visualization. Default FALSE.
#' @param drawRowBar Logical. Plots righ barplot for each gene. Default TRUE.
#' @param drawColBar Logical plots top barplot for each sample. Default TRUE.
#' @param leftBarData Data for leftside barplot. Must be a data.frame with two columns containing gene names and values. Default 'NULL'.
#' @param leftBarLims Limits for 'leftBarData'. Default 'NULL'.
#' @param rightBarData Data for rightside barplot. Must be a data.frame with two columns containing to gene names and values. Default 'NULL' which draws distibution by variant classification. This option is applicable when only 'drawRowBar' is TRUE.
#' @param rightBarLims Limits for 'rightBarData'. Default 'NULL'.
#' @param topBarData Default 'NULL' which draws absolute number of mutation load for each sample. Can be overridden by choosing one clinical indicator(Numeric) or by providing a two column data.frame contaning sample names and values for each sample. This option is applicable when only 'drawColBar' is TRUE.
#' @param logColBar Plot top bar plot on log10 scale. Default FALSE.
#' @param draw_titv Logical Includes TiTv plot. Default FALSE
#' @param showTumorSampleBarcodes Logical to include sample names.
#' @param fill Logical. If TRUE draws genes and samples as blank grids even when they are not altered.
#' @param showTitle Default TRUE.
#' @param titleText Custom title. Default 'NULL'.
#' @param vc_cols named vector of colors for each Variant_Classification.
#' @importFrom maftools subsetMaf
#' @importFrom maftools oncoplot
#' @importFrom RColorBrewer brewer.pal
#' @importFrom stats na.omit
#' @return A waterfall plot
#' @export
#' @examples
#' #load the data
#' mut_path <- system.file("extdata","maffile.txt",package = "ssMutPA")
#' maf<-maftools::read.maf(mut_path ,isTCGA = FALSE)
#' pathway_path <- system.file("extdata","kegg_323_gmt.Rdata",package = "ssMutPA")
#' load(pathway_path)
#' data(samp_class_onco,mut_onco,sur_onco)
#' samples <- names(samp_class_onco)
#' samp_class_onco <- paste0("class_",samp_class_onco)
#' names(samp_class_onco) <- samples
#' sur_onco$event <- ifelse(sur_onco$event%in%1,"Dead","Alive")
#' col <- c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3")
#' ##draw a waterfall plot
#' #win.graph()
#' Oncoplot(maf,samp_class_onco,sur_onco,mut_onco,kegg_323_gmt,"IL-17 signaling pathway",vc_cols=col)
Oncoplot<-function(maf,samp_class,sur,mut_status,pathway,pathway_name,isTCGA=FALSE,top=20,clinicalFeatures = c("sample_group","event"),
                   class_col=c("#00468B","#ED0000"),event_col=c("#B3DE69","#BC80BD"),sortByAnnotation = TRUE,gene_mar=7,
                   removeNonMutated= FALSE,drawRowBar= TRUE,drawColBar= TRUE,leftBarData= NULL,leftBarLims= NULL,
                   rightBarData= NULL,rightBarLims= NULL,topBarData= NULL,logColBar= FALSE,draw_titv= FALSE,
                   showTumorSampleBarcodes= FALSE,fill= TRUE,showTitle = TRUE,titleText = NULL,
                   vc_cols = NULL){

  a<-apply(mut_status,1,function(x){length(which(x!=0))/length(x)})
  pathway_list<-split(pathway[,2],pathway[,1])
  path_mutrate<-list()
  for (i in 1:length(pathway_list)) {
    path_mutrate[[i]]<-a[na.omit(match(pathway_list[[i]],names(a)))]
  }
  names(path_mutrate)<-names(pathway_list)
  path_clust<-as.data.frame(cbind(sample=names(samp_class),
                                  clust=samp_class))
  rownames(path_clust)<-path_clust[,1]
  sur_event<-as.data.frame(cbind(rownames(sur),sur$event),row.names =rownames(sur) )
  all_sample<-as.data.frame(maf@clinical.data)
  rownames(all_sample)<-all_sample[,1]
  inter<-intersect(intersect(all_sample[,1],rownames(path_clust)),rownames(sur_event))
  clinical<-as.data.frame(cbind(all_sample[inter,],sample_group=path_clust[inter,2],event=sur_event[inter,2]))
  colnames(clinical)[1]<-"Tumor_Sample_Barcode"
  rownames(clinical)<-clinical[,1]
  loc<-match(maf@clinical.data$Tumor_Sample_Barcode,clinical$Tumor_Sample_Barcode)
  maf@clinical.data$sample_group<-clinical$sample_group[loc]
  maf@clinical.data$event<-clinical$event
  maf<-subsetMaf(maf,tsb = clinical[,1])
  col<-class_col
  names(col)<-as.character(unique(samp_class))
  col_event<-event_col
  names(col_event)<-as.character(unique(sur$event))
  fabcolors<-list(sample_group=col,event=col_event)
  path_gene<-names(sort(path_mutrate[[pathway_name]],decreasing = T)[1:top])
  #win.graph(width = 400,height = 400)
  vc_cols <- vc_cols
  names(vc_cols) <- levels(maf@data$Variant_Classification)
  oncoplot(maf,genes=path_gene,clinicalFeatures = c("sample_group","event"),sortByAnnotation =sortByAnnotation,annotationColor = fabcolors,
           removeNonMutated = removeNonMutated,gene_mar=gene_mar,bgCol = "#F4F4F4",colors =vc_cols)
}