R/utils_groupComparisonPlots.R

Defines functions .plot.model.volcano .plotVolcano .plot.model.heatmap .plotHeatmap .format.model.plots .check.plotting.data

#' Check input data and parameters
#' @noRd
.check.plotting.data = function(data, type, sig, FCcutoff, logBase.pvalue, 
                                 ylimUp, ylimDown, xlimUp, x.axis.size, 
                                 y.axis.size, dot.size, text.size, text.angle, 
                                 legend.size, ProteinName,colorkey, numProtein, 
                                 width, height, which.Comparison, which.Protein,
                                 address){
  
  ## Column check
  assertChoice(toupper(type), c("HEATMAP", "VOLCANOPLOT"),
               .var.name = "Type") 
  assertNumeric(sig, .var.name = "Sig")
  if (!is.logical(FCcutoff)){
    assertNumeric(FCcutoff, .var.name = "FCcutoff")
  }
  
  assertChoice(logBase.pvalue, c(2, 10),
               .var.name = "Logbase Pvalue") 
  
  if (!is.logical(ylimUp)){
    assertNumeric(ylimUp, .var.name = "ylimUp")
  }
  if (!is.logical(ylimDown)){
    assertNumeric(ylimDown, .var.name = "ylimDown")
  }
  if (!is.logical(xlimUp)){
    assertNumeric(xlimUp, .var.name = "xlimUp")
  }
  
  ## Check plotting size vars
  assertNumeric(x.axis.size, .var.name = ("x.axis.size"))
  assertNumeric(y.axis.size, .var.name = ("y.axis.size"))
  assertNumeric(dot.size, .var.name = ("dot.size"))
  assertNumeric(text.size, .var.name = ("text.size"))
  assertNumeric(text.angle, .var.name = ("text.angle"))
  assertNumeric(legend.size, .var.name = ("legend.size"))
  
  assertLogical(colorkey, .var.name = ("colorkey"))
  
  assertNumeric(width, .var.name = ("width"))
  assertNumeric(height, .var.name = ("height"))
  
  ## Check data columns
  min.cols = c("Protein", "Label", "log2FC", "SE", "DF", "pvalue","adj.pvalue")
  
  if (!is.null(data[["PTM.Model"]])){
    missing = setdiff(min.cols, colnames(data[["PTM.Model"]]))
    if (length(missing) > 0){
      msg = paste("Missing columns in the PTM input:",
                  paste(missing, collapse = " "))
      stop(msg)
    }
  }
  if (!is.null(data[["PROTEIN.Model"]])){
    missing = setdiff(min.cols, colnames(data[["PROTEIN.Model"]]))
    if (length(missing) > 0){
      msg = paste("Missing columns in the Protein input:",
                   paste(missing, collapse = " "))
      stop(msg)
    }
  }
  if (!is.null(data[["ADJUSTED.Model"]])){
    missing = setdiff(min.cols, colnames(data[["ADJUSTED.Model"]]))
    if (length(missing) > 0){
      msg = paste("Missing columns in the Adjusted PTM input:",
                   paste(missing, collapse = " "))
      stop(msg)
    }
  }
                
}

#' Format models into plotting format
#' @noRd
.format.model.plots = function(data, which.Comparison, which.Protein){
  
  ptm.model = data[['PTM.Model']]
  ptm.model = as.data.table(ptm.model)
  protein.model = data[['PROTEIN.Model']]
  protein.model = as.data.table(protein.model)
  adjusted.model = data[['ADJUSTED.Model']]
  adjusted.model = as.data.table(adjusted.model)
  
  ptm.model$Protein = factor(ptm.model$Protein)
  ptm.model$Label = factor(ptm.model$Label)
  
  if (nrow(protein.model) > 0){
    protein.model$Protein = factor(protein.model$Protein)
    protein.model$Label = factor(protein.model$Label)
    adjusted.model$Protein = factor(adjusted.model$Protein)
    adjusted.model$Label = factor(adjusted.model$Label)
  }
  
  if (which.Comparison[[1]] != "all") {
    ## check which.comparison is name of comparison
    if (is.character(which.Comparison)) {
      
      temp.name = which.Comparison
      ## message if name of comparison is wrong.
      if (length(setdiff(temp.name, unique(ptm.model$Label))) > 0) {
        
        ## TODO: Logging        
        # processout = rbind(processout, 
        #                     paste0("Please check labels of comparions. ", 
        #                            "Result does not have this comparison. - ", 
        #                            toString(temp.name)))
        # write.table(processout, file=finalfile, row.names=FALSE)
        
        stop(paste0("Please check labels of comparisons. ", 
                    "Result does not have this comparison. - ", 
                    toString(temp.name)))
      }
    }
    ## check which.comparison is order number of comparison
    if (is.numeric(which.Comparison)) {
      
      temp.name = levels(ptm.model$Label)[which.Comparison]
      
      ## message if name of comparison is wrong.
      if (length(levels(ptm.model$Label))<max(which.Comparison)) {
        stop(paste0("Please check your selection of comparisons. There are ",
                    length(levels(ptm.model$Label)), 
                    " comparisons in this result."))
      }
    }  
    
    ## use only assigned comparisons
    ptm.model = ptm.model[which(ptm.model$Label %in% temp.name), ]
    if (nrow(protein.model) > 0){
      protein.model = protein.model[which(
        protein.model$Label %in% temp.name), ]
      adjusted.model = adjusted.model[which(
        adjusted.model$Label %in% temp.name),]
    }

  }
  
  if (which.Protein[[1]] != "all") {
    ## check which.comparison is name of comparison
    if (is.character(which.Protein)) {
      
      temp.name = which.Protein
      ## message if name of comparison is wrong.
      if (length(setdiff(temp.name, unique(ptm.model$Protein))) > 0) {
        
        ## TODO: Logging        
        # processout = rbind(processout, 
        #                     paste0("Please check labels of comparions. ", 
        #                            "Result does not have this comparison. - ", 
        #                            toString(temp.name)))
        # write.table(processout, file=finalfile, row.names=FALSE)
        
        stop(paste0("Please check labels of proteins ", 
                    "Result does not have all these proteins. - ", 
                    toString(temp.name)))
      }
    }
    ## check which.comparison is order number of comparison
    if (is.numeric(which.Protein)) {
      
      temp.name = levels(ptm.model$Protein)[which.Protein]
      
      ## message if name of comparison is wrong.
      if (length(levels(ptm.model$Protein)) < max(which.Protein)) {
        stop(paste0("Please check your selection of proteins. There are ",
                    length(levels(ptm.model$Protein)), 
                    " comparisons in this result."))
      }
    }
    
    ## use only assigned comparisons
    ptm.model = ptm.model[which(ptm.model$Protein %in% temp.name), ]
    if (nrow(protein.model) > 0){
      adjusted.model = adjusted.model[
        which(adjusted.model$Protein %in% temp.name), ]
      protein.model = protein.model[which(protein.model$Protein %in% unique(
        adjusted.model$GlobalProtein)), ]
    }
  }
  
  ptm.model$Protein = factor(ptm.model$Protein)
  ptm.model$Label = factor(ptm.model$Label)
  
  if (nrow(protein.model) > 0){
    protein.model$Protein = factor(protein.model$Protein)
    protein.model$Label = factor(protein.model$Label)
    adjusted.model$Protein = factor(adjusted.model$Protein)
    adjusted.model$Label = factor(adjusted.model$Label)
  }
  
  return(list('PTM.Model' = ptm.model, 'PROTEIN.Model' = protein.model, 
              'ADJUSTED.Model' = adjusted.model))
}

#' Wrapper for plotting heatmap for every model provided
#' @noRd
.plotHeatmap = function(data, sig, FCcutoff, logBase.pvalue, ylimUp, ylimDown, 
                     text.angle, x.axis.size, y.axis.size, dot.size, colorkey, 
                     numProtein, width, height, address){
  ## If there are the file with the same name, 
  ## add next numbering at the end of file name
  if (address != FALSE) {
    allfiles = list.files()
    
    num = 0
    filenaming = paste0(address, "Heatmap")
    finalfile = paste0(address, "Heatmap.pdf")
    
    while (is.element(finalfile, allfiles)) {
      num = num + 1
      finalfile = paste0(paste(filenaming, num, sep="-"), ".pdf")
    }	
    
    pdf(finalfile, width=width, height=height)
  }

  .plot.model.heatmap(data[['PTM.Model']], sig, FCcutoff, logBase.pvalue, 
                      ylimUp, ylimDown, x.axis.size, y.axis.size, text.angle,
                      colorkey, numProtein,
                      "Unadjusted PTM")
  
  if (nrow(data[['PROTEIN.Model']]) > 0){
    .plot.model.heatmap(data[['PROTEIN.Model']], sig, FCcutoff, logBase.pvalue, 
                        ylimUp, ylimDown, x.axis.size, y.axis.size, text.angle,
                        colorkey, numProtein,
                        "Protein")
    .plot.model.heatmap(data[['ADJUSTED.Model']], sig, FCcutoff, logBase.pvalue, 
                        ylimUp, ylimDown, x.axis.size, y.axis.size, text.angle,
                        colorkey, numProtein,
                        "Adjusted PTM")
  }
  
  if (address != FALSE) {
    dev.off()
  }
}

#' Plots individual model heatmap
#' @noRd
.plot.model.heatmap = function(data, sig, FCcutoff, logBase.pvalue, ylimUp,
                                 ylimDown, x.axis.size, y.axis.size, text.angle,
                                 colorkey, numProtein, model){
  
  Label = Protein = sign_adj_pval = NULL
  
  if (logBase.pvalue == 2) {
    y.limUp = 30
  } else if (logBase.pvalue == 10) {
    y.limUp = 10
  }
  
  if (is.numeric(ylimUp)) {
    y.limUp = ylimUp 
  }
  
  
  ## if FCcutoff is assigned, make p-value insignificant.
  if (is.numeric(FCcutoff)) {
    if (colnames(data)[3] == "log2FC") {
      data$adj.pvalue[data[, 3] < log2(FCcutoff) & data[, 3] > (
        -log2(FCcutoff))] = 1
    }
    if (colnames(data)[3] == "log10FC") {
      data$adj.pvalue[data[, 3] < log10(FCcutoff) & data[, 3] > (
        -log10(FCcutoff))] = 1
    }
  }

  ## based on p-value
  if (logBase.pvalue == 2) {
      temp =  -log2(data$adj.pvalue) * sign(data[, 3])
  } else if (logBase.pvalue == 10) {
      temp =  -log10(data$adj.pvalue) * sign(data[, 3])
  }
  
  data$sign_adj_pval = temp
  obj = data[, c("Protein", "Label", "sign_adj_pval")]
  
  ## maximum number of proteins per heatmap
  unique_prot = unique(obj$Protein)
  totalpro = length(unique_prot)
  numheatmap = totalpro %/% numProtein + 1
  
  ## draw heatmap
  ## loop for numProtein
  for (j in seq_len(numheatmap)) {
    
    plot_prot = unique_prot[((j - 1) * numProtein + 1):(j * numProtein)]
    temp_obj = obj[obj$Protein %in% plot_prot]
    
    limits = c(-1,1)*max(abs(temp_obj$sign_adj_pval[
      is.finite(temp_obj$sign_adj_pval)]))
    
    temp_heatmap = ggplot(temp_obj, aes(Label, Protein, fill = sign_adj_pval)
                           ) + geom_tile() + scale_fill_distiller(
        palette = "RdBu", name = "(sign) Adj pvalue", 
        limits = limits) + labs(
        title = paste0(model, " - Page #", as.character(j)), y = model, 
        x = "Comparison") +
      theme_msstats(type = "COMPARISONPLOT", x.axis.size, y.axis.size, 13, 
                    element_rect(fill = "gray95"),
                    element_text(colour = c("#00B0F6"), size = 14),
                    "bottom", text_angle = text.angle)
    print(temp_heatmap)
  } 
}

#' Wrapper for plotting volcano plot for every model provided
#' @noRd
.plotVolcano = function(data, sig, FCcutoff, logBase.pvalue, ylimUp, ylimDown, 
                     xlimUp, x.axis.size, y.axis.size, dot.size, text.size, 
                     legend.size, ProteinName, colorkey, numProtein,
                     width, height, address, plot_name_list){
  ## If there are the file with the same name
  ## add next numbering at the end of file name
  if (address != FALSE) {
    allfiles = list.files()
    
    num = 0
    filenaming = paste0(address, "VolcanoPlot")
    finalfile = paste0(address, "VolcanoPlot.pdf")
    
    while (is.element(finalfile, allfiles)) {
      num = num + 1
      finalfile = paste0(paste(filenaming, num, sep="-"), ".pdf")
    }	
    
    pdf(finalfile, width=width, height=height)
  }

  .plot.model.volcano(data[['PTM.Model']], sig, FCcutoff, logBase.pvalue, 
                      ylimUp, ylimDown, xlimUp, x.axis.size, y.axis.size, 
                      dot.size, text.size, legend.size, ProteinName,
                      plot_name_list[[1]])
  
  if (nrow(data[['PROTEIN.Model']])) {
    .plot.model.volcano(data[['PROTEIN.Model']], sig, FCcutoff, logBase.pvalue, 
                        ylimUp, ylimDown, xlimUp, x.axis.size, y.axis.size, 
                        dot.size, text.size, legend.size, ProteinName,
                        plot_name_list[[2]])
    .plot.model.volcano(data[['ADJUSTED.Model']], sig, FCcutoff, logBase.pvalue,
                        ylimUp, ylimDown, xlimUp, x.axis.size, y.axis.size, 
                        dot.size, text.size, legend.size, ProteinName,
                        plot_name_list[[3]])
  }
  
  if (address != FALSE) {
    dev.off()
  }
}

#' Plots individual model volcano plot
#' @noRd
.plot.model.volcano = function(data, sig, FCcutoff, logBase.pvalue, ylimUp,
                                ylimDown, xlimUp, x.axis.size, y.axis.size, 
                                dot.size, text.size, legend.size, ProteinName,
                                model){
  log2FC = Protein = NULL
  
  if (logBase.pvalue == 2) {
    y.limUp  = 30
  } else if (logBase.pvalue == 10) {
    y.limUp  = 10
  }
  
  if (is.numeric(ylimUp)) {
    y.limUp = ylimUp 
  }
  
  ## remove the result, NA and Inf
  data = data[!is.na(data$adj.pvalue), ]
  data = data[is.finite(data$log2FC)]
  
  ## group for coloring dots
  if (!FCcutoff) {  
    data[data$adj.pvalue >= sig, "colgroup"] = "black"
    data[which(data$adj.pvalue < sig & data[, 3] > 0), "colgroup"] = "red"
    data[which(data$adj.pvalue < sig & data[, 3] < 0), "colgroup"] = "blue" 
  }
  
  if (is.numeric(FCcutoff)) {
    data$colgroup = "black"
    
    if (colnames(data)[3] == "log2FC") {
      data[which(data$adj.pvalue < sig & data[, 3] > log2(FCcutoff)), 
           "colgroup"] = "red"
      data[which(data$adj.pvalue < sig & data[, 3] < (-log2(FCcutoff))), 
           "colgroup"] = "blue"
    }
    
    if (colnames(data)[3] == "log10FC") {
      data = data[is.finite(data$log10FC)]
      data[which(data$adj.pvalue < sig & data[, 3] > log10(FCcutoff)), 
           "colgroup"] = "red"
      data[which(data$adj.pvalue < sig & data[, 3] < (-log10(FCcutoff))), 
           "colgroup"] = "blue"
    }
  }
  
  data$colgroup = factor(data$colgroup, levels=c("black", "blue", "red"))
  
  ## for multiple volcano plots, 
  for (i in seq_len(nlevels(data$Label))) {
    
    sub = data[data$Label == levels(data$Label)[i], ]
    
    if (logBase.pvalue == 2) {
      sub$adj.pvalue[sub$adj.pvalue < 2^(-y.limUp)] = 2^(-y.limUp)
    } else if (logBase.pvalue == 10) {
      sub$adj.pvalue[sub$adj.pvalue < 10^(-y.limUp)] = 10^(-y.limUp)
    }
    
    sub = as.data.table(sub)
    
    if (logBase.pvalue == 2) {
      y.limup = ceiling(max(-log2(sub[!is.na(sub$adj.pvalue), "adj.pvalue"])))
      if (y.limup < (-log2(sig))) {
        y.limup = (-log2(sig) + 1) ## for too small y.lim
      }
    } else if (logBase.pvalue == 10) {
      y.limup = ceiling(max(-log10(sub[!is.na(sub$adj.pvalue), "adj.pvalue"])))
      if (y.limup < (-log10(sig))) {
        y.limup = (-log10(sig) + 1) ## for too small y.lim
      }
    }
    
    ## ylimDown
    y.limdown = 0 ## default is zero
    if (is.numeric(ylimDown)) {
      y.limdown = ylimDown
    }
    
    ## x.lim
    x.lim = ceiling(max(abs(sub[is.finite(log2FC), log2FC])))
    
    if (x.lim < 3) {
      x.lim = 3
    }
    if (is.numeric(xlimUp)) {
      x.lim = xlimUp
    }
    ## for assigning x in ggplot2
    subtemp = sub
    colnames(subtemp)[3] = "logFC"
    
    if (logBase.pvalue == 2) {
      subtemp$logadjp = (-log2(subtemp$adj.pvalue))
    } else if (logBase.pvalue == 10) {
      subtemp$logadjp = (-log10(subtemp$adj.pvalue))
    }
    
    ## for x limit for inf or -inf
    subtemp$newlogFC = subtemp$logFC
    subtemp[!is.na(subtemp$issue) &
              subtemp$issue == "oneConditionMissing" & 
              subtemp$logFC == Inf, "newlogFC"] = (x.lim - 0.2)
    subtemp[!is.na(subtemp$issue) & 
              subtemp$issue == "oneConditionMissing" & 
              subtemp$logFC == (-Inf), "newlogFC"] = (x.lim - 0.2) * (-1)
    
    ## add (*) in Protein name for Inf or -Inf
    subtemp$Protein = as.character(subtemp$Protein)
    subtemp[!is.na(subtemp$issue) & 
              subtemp$issue == "oneConditionMissing", "Protein"] = 
      paste0("*", subtemp[!is.na(subtemp$issue) & 
                            subtemp$issue == "oneConditionMissing", "Protein"])
    
    if (logBase.pvalue == 2) {
      log_title = 'Log2'
    } else if (logBase.pvalue == 10) {
      log_title = 'Log10'
    }
    
    ptemp = ggplot(aes_string(x='logFC', y='logadjp',
                               color='colgroup',
                               label='Protein'),
                    data=subtemp) +
      geom_point(size=dot.size) +
      scale_colour_manual(values=c("gray65", "blue", "red"), 
                          limits=c("black", "blue", "red"), 
                          breaks=c("black", "blue", "red"), 
                          labels=c("No regulation", "Down-regulated", 
                                   "Up-regulated")) +
      scale_y_continuous(paste0('-', log_title, ' (adjusted p-value)'), 
                         limits=c(y.limdown, y.limup)) + 
      scale_x_continuous(paste0(log_title, ' fold change'), 
                         limits=c(-x.lim, x.lim)) +
      labs(title= paste0(model, ": ", unique(sub$Label)))
    
    ## add protein name
    if (ProteinName) {
      if (length(unique(subtemp$colgroup)) == 1 & 
          any(unique(subtemp$colgroup) == 'black')) {
        message(paste0("The volcano plot for ", unique(subtemp$Label), 
      " does not show the protein names because none of them are significant."))
      } else {
        ptemp = ptemp +
          geom_text_repel(data=subtemp[subtemp$colgroup != "black", ],
                          aes(label=Protein),
                          size=text.size,
                          col='black')
      }
    }
    
    ## For legend of linetype for cutoffs
    ## first assign line type
    ltypes = c("type1"="twodash", "type2"="dotted")
    
    ## cutoff lines, FDR only
    if (!FCcutoff) {
      if (logBase.pvalue == 2){
        sigcut = data.table("Protein"='sigline', 
                             "logFC"=seq(-x.lim, x.lim, length.out=20),
                             "logadjp"=(-log2(sig)),
                             "line"='twodash')
      } else if (logBase.pvalue == 10) {
        sigcut = data.table("Protein"='sigline', 
                             "logFC"=seq(-x.lim, x.lim, length.out=20),
                             "logadjp"=(-log10(sig)),
                             "line"='twodash')
      }
      
      pfinal = ptemp +
        geom_line(data=sigcut,
                  aes_string(x='logFC', y='logadjp', linetype='line'),
                  colour="darkgrey",
                  size=0.6,
                  show.legend=TRUE) +
        scale_linetype_manual(values=c('twodash'=6),
                              labels=c(paste0("Adj p-value cutoff (", sig, ")"))
                              ) +
        guides(colour=guide_legend(override.aes=list(linetype=0)),
               linetype=guide_legend())
    }
    if (is.numeric(FCcutoff)) {
      if (logBase.pvalue == 2) {
        ## three different lines
        sigcut = data.table("Protein"='sigline', 
                             "logFC"=seq(-x.lim, x.lim, length.out=10), 
                             "logadjp"=(-log2(sig)),
                             "line"='twodash')
        FCcutpos = data.table("Protein"='sigline', 
                               "logFC"=log2(FCcutoff), 
                               "logadjp"=seq(y.limdown, y.limup, length.out=10), 
                               "line"='dotted')
        FCcutneg = data.table("Protein"='sigline', 
                               "logFC"=(-log2(FCcutoff)), 
                               "logadjp"=seq(y.limdown, y.limup, length.out=10), 
                               "line"='dotted')
      } else if (logBase.pvalue == 10) {
        ## three different lines
        sigcut = data.table("Protein"='sigline', 
                             "logFC"=seq(-x.lim, x.lim, length.out=10), 
                             "logadjp"=(-log10(sig)),
                             "line"='twodash')
        FCcutpos = data.table("Protein"='sigline', 
                               "logFC"=log10(FCcutoff), 
                               "logadjp"=seq(y.limdown, y.limup, length.out=10), 
                               "line"='dotted')
        FCcutneg = data.table("Protein"='sigline', 
                               "logFC"=(-log10(FCcutoff)), 
                               "logadjp"=seq(y.limdown, y.limup, length.out=10), 
                               "line"='dotted')
      }
      ## three lines, with order color first and then assign linetype manual
      pfinal = ptemp +
        geom_line(data=sigcut, 
                  aes_string(x='logFC', y='logadjp', linetype='line'),
                  colour="darkgrey",
                  size=0.6,
                  show.legend=TRUE) +
        geom_line(data=FCcutpos,
                  aes_string(x='logFC', y='logadjp', linetype='line'),
                  colour="darkgrey",
                  size=0.6,
                  show.legend=TRUE) +
        geom_line(data=FCcutneg,
                  aes_string(x='logFC', y='logadjp', linetype='line'),
                  colour="darkgrey",
                  size=0.6) +
        scale_linetype_manual(values=c('dotted'=3, 'twodash'=6),
                              labels=c(paste0("Fold change cutoff (", FCcutoff, 
                                              ")"),
                                       paste0("Adj p-value cutoff (", sig, ")"))
                              ) +
        guides(colour=guide_legend(override.aes=list(linetype=0)),
               linetype=guide_legend())
    }
    
    pfinal = pfinal +
      theme_msstats(type = "VOLCANO", x.axis.size, y.axis.size, legend.size, 
                    element_rect(fill = "gray95"),
                    element_text(colour = c("#00B0F6"), size = 14),
                    "bottom", text_angle = text.angle, 
                    legend.title = element_blank())
      # theme(
      #   panel.background = element_rect(fill='white', colour="black"),
      #   panel.grid.minor = element_blank(),
      #   axis.text.x = element_text(size=x.axis.size, colour="black"),
      #   axis.text.y = element_text(size=y.axis.size, colour="black"),
      #   axis.ticks = element_line(colour="black"),
      #   axis.title.x = element_text(size=x.axis.size+5, vjust=-0.4),
      #   axis.title.y = element_text(size=y.axis.size+5, vjust=0.3),
      #   title = element_text(size=x.axis.size+8, vjust=1.5),
      #   legend.position="bottom",
      #   legend.key = element_rect(fill='white', colour='white'),
      #   legend.text = element_text(size=legend.size),
      #   legend.title = element_blank()
      # )
    
    print(pfinal)
  }
}
Vitek-Lab/MSstatsPTM documentation built on May 1, 2024, 8:25 a.m.