R/groupComparisonPTM.R

Defines functions groupComparisonPTM

Documented in groupComparisonPTM

#' Model PTM and/or protein data and make adjustments if needed
#'
#' Takes summarized PTM and protein data from proteinSummarization. 
#' If protein data is unavailable, PTM data only can be passed into the 
#' function. Including protein data allows for adjusting PTM Fold Change by the 
#' change in protein abundance without modification.
#' MSstatsContrastMatrix
#' @export
#' @importFrom data.table data.table as.data.table `:=`
#' @importFrom MSstats groupComparison MSstatsContrastMatrix
#' @importFrom MSstatsTMT groupComparisonTMT
#' @importFrom MSstatsConvert MSstatsLogsSettings MSstatsSaveSessionInfo
#' @importFrom stats p.adjust xtabs
#' @importFrom stringr str_match
#' 
#' @param data list of summarized datasets. Output of MSstatsPTM summarization 
#' function \code{\link[MSstatsPTM]{dataSummarizationPTM}}  or 
#' \code{\link[MSstatsPTM]{dataSummarizationPTM_TMT}} depending on acquisition
#' type.
#' @param data.type string indicating experimental acquisition type. "TMT" is 
#' used for TMT labeled experiments. For all other experiments (Label Free/ DDA/ 
#' DIA) use "LabelFree".
#' @param contrast.matrix comparison between conditions of interests. Default 
#' models full pairwise comparison between all conditions
#' @param moderated For TMT experiments only. TRUE will moderate t statistic; 
#' FALSE (default) uses ordinary t statistic. Default is FALSE.
#' @param adj.method For TMT experiemnts only. Adjusted method for multiple 
#' comparison. "BH" is default. "BH" is used for all other experiment types
#' @param log_base For non-TMT experiments only. The base of the logarithm used 
#' in summarization.
#' @param use_log_file logical. If TRUE, information about data processing
#' will be saved to a file.
#' @param append logical. If TRUE, information about data processing will be 
#' added to an existing log file.
#' @param verbose logical. If TRUE, information about data processing will be 
#' printed to the console.
#' @param log_file_path character. Path to a file to which information about 
#' data processing will be saved. 
#' If not provided, such a file will be created automatically.
#' If `append = TRUE`, has to be a valid path to a file.
#' @param base start of the file name.
#' @return list of modeling results. Includes PTM, PROTEIN, and ADJUSTED
#'         data.tables with their corresponding model results.
#'         
#' @examples 
#' 
#' model.lf.msstatsptm <- groupComparisonPTM(summary.data, 
#'                                      data.type = "LabelFree",
#'                                      verbose = FALSE)
groupComparisonPTM <- function(data, data.type,
                               contrast.matrix = "pairwise",
                               moderated = FALSE, 
                               adj.method = "BH",
                               log_base = 2,
                               use_log_file = TRUE, 
                               append = FALSE,
                               verbose = TRUE, 
                               log_file_path = NULL,
                               base = "MSstatsPTM_log_") {
  
  ## Start log  
  if (is.null(log_file_path) & use_log_file == TRUE){
    time_now <- Sys.time()
    path <- paste0(base, gsub("[ :\\-]", "_", time_now), 
                  ".log")
    file.create(path)
  } else {path <- log_file_path}
  
  if (data.type == 'TMT'){
    pkg <- "MSstatsTMT"
    option_log <- "MSstatsTMTLog"
  } else {
    pkg <- "MSstats"
    option_log <- "MSstatsLog"
  }
  
  MSstatsLogsSettings(use_log_file, append,
                      verbose, log_file_path = path, 
                      pkg_name = pkg)
  
  getOption(option_log)("INFO", "Starting parameter and data checks..")
  
  Label = Site = NULL
  
  data.ptm <- data[["PTM"]]
  data.protein <- data[["PROTEIN"]]
  
  ## Check for missing variables in PTM
  ## Determine if PTM should be adjusted for protein level.
  if (!is.null(data.protein)){
    adj.protein <- TRUE
  } else{
    adj.protein <- FALSE
  }
  
  ## Create pairwise matrix for label free
  if (contrast.matrix[1] == "pairwise" & data.type == 'LabelFree'){
    getOption(option_log)("INFO", "Building pairwise matrix.")
    labels <- unique(data.ptm$ProteinLevelData$GROUP)
    contrast.matrix <- MSstatsContrastMatrix('pairwise', labels)
  }
  
  ## PTM Modeling
  message("Starting PTM modeling...")
  if (data.type == 'TMT'){
    getOption(option_log)("INFO", "Starting TMT PTM Model")
    ptm_model <- groupComparisonTMT(data.ptm, contrast.matrix = contrast.matrix,
                                    moderated = moderated, 
                                    adj.method = adj.method,
                                    use_log_file = use_log_file,append = append,
                                    verbose = verbose, log_file_path = path)
    ptm_model <- ptm_model$ComparisonResult
    
  } else if (data.type == 'LabelFree') {
    getOption(option_log)("INFO", "Starting non-TMT PTM Model")
    ptm_model_full <- groupComparison(contrast.matrix,
                                      data.ptm, TRUE, log_base, 
                                      use_log_file, append, verbose, 
                                      log_file_path = path)
    ptm_model <- ptm_model_full$ComparisonResult
  }
  
  models <- list('PTM.Model' = ptm_model)
  
  if (adj.protein) {
    
    ## Protein Modeling
    message("Starting Protein modeling...")
    if (data.type == 'TMT'){
      getOption(option_log)("INFO", "Starting TMT Protein Model")
      protein_model <- groupComparisonTMT(data.protein, 
                                          contrast.matrix = contrast.matrix,
                                          moderated = moderated, 
                                          adj.method = adj.method,
                                          use_log_file = use_log_file,
                                          append = append,
                                          verbose = verbose, 
                                          log_file_path = path)
      protein_model <- protein_model$ComparisonResult
    } else if (data.type == 'LabelFree') {
      getOption(option_log)("INFO", "Starting non-TMT Protein Model")
      protein_model_full <- groupComparison(contrast.matrix, 
                                            data.protein,
                                            TRUE, log_base, use_log_file, 
                                            append, verbose, 
                                            log_file_path = path)
      protein_model <- protein_model_full$ComparisonResult
    }
    
    ptm_model <- as.data.table(ptm_model)
    protein_model <- as.data.table(protein_model)
    
    message("Starting adjustment...")
    getOption(option_log)("INFO", "Starting Protein Adjustment")
    ptm_model_site_sep <- ptm_model
    
    ## extract global protein name
    ptm_model_site_sep <- .extractProtein(ptm_model_site_sep, protein_model)
    getOption(option_log)("INFO", "Rcpp function extracted protein info")
    ## adjustProteinLevel function can only compare one label at a time
    comparisons <- unique(ptm_model_site_sep[, Label])
    
    adjusted_model_list <- list()
    for (i in seq_len(length(comparisons))) {
      getOption(option_log)("INFO", paste0("Adjusting for Comparison - ", 
                                             as.character(i)))
      temp_adjusted_model <- .applyPtmAdjustment(comparisons[[i]],
                                                   ptm_model_site_sep,
                                                   protein_model)
      adjusted_model_list[[i]] <- temp_adjusted_model
    }
    
    adjusted_models <- rbindlist(adjusted_model_list)
    
    adjusted_models$GlobalProtein <- adjusted_models$Protein
    adjusted_models$Protein <- adjusted_models$Site
    adjusted_models[, Site := NULL]
    
    getOption(option_log)("INFO", "Adjustment complete, returning models.")
    models <- list('PTM.Model' = ptm_model, 'PROTEIN.Model' = protein_model,
                   'ADJUSTED.Model' = adjusted_models)
  }

  return(models)

}
devonjkohler/MSstatsPTM documentation built on Dec. 19, 2021, 11:01 p.m.