R/quantitative_analysis.R

Defines functions mygsub detect_fractions get_experiment_from_col_data get_channel_from_col_data tmt_quant_analysis lfq_quant_analysis

Documented in lfq_quant_analysis

#' Run Pairwise Protein Quantification on a level of LFQ Maxquant output
#'
#' @param dt A maxquant txt output text file (eg: proteinGroups.txt)
#' @param des An experiment design file.
#' @param id_var A column in dt identifying the unit (eg: majority.protein.id)
#' @param output_folder An output folder to store produced files
#' @param quant_fn A quantitative results file name
#' @param dt_int_fn An intensity results file name
#' @param conditions_dict A table with original and R-safe condition names
#' @param imputStDev The Standard Deviation parameter for MNAR Imputation
#' @param imputePosition The Position parameter for MNAR Imputation
#' @return a list containing the quantitative results first and intentisty results second.
#' @examples
#' \dontrun{
#' #producing results per modified peptide id
#' mod_pep_list <- lfq_quant_analysis(mod_pept, expdes, id_var = "id", output_folder,
#'  "modificationSpecificPeptides_quant.txt",
#'  "modificationSpecificPeptides_quant_intensities.txt",
#'  conditions_dict=conditions_dict,
#'  imputeStDev = imputeStDev,
#'  imputePosition = imputePosition)
#'
#' mod_pept <- mod_pep_list[[1]]
#' mod_pept_int <- mod_pep_list[[2]]
#' }
#' @export lfq_quant_analysis
lfq_quant_analysis <- function(dt, des, id_var, output_folder, quant_fn, dt_int_fn,
                               conditions_dict=conditions_dict,
                               imputeStDev=imputeStDev,
                               imputePosition=imputePosition) {
  # DEBUG
  # dt <- copy(prot)
  # des <- copy(expdes)
  # id_var = "id"
  # output_folder
  # quant_fn <- "proteinGroups_quant.txt"
  # dt_int_fn <- "proteinGroups_quant_intensities.txt"
  
  # quant_fn <- "modificationSpecificPeptides_quant.txt"
  # dt_int_fn <- "modificationSpecificPeptides_quant_intensities.txt"
  
  if (length(grep("^lfq intensity ", colnames(dt))) > 0) {
    dt_int <- melt.data.table(
      dt,
      id.vars = id_var,
      measure.vars = grep("lfq intensity ", colnames(dt), value = T),
      value.name = "intensity",
      variable.name = "mqExperiment"
    )
    dt_int[, mqExperiment := str_replace(mqExperiment, "lfq intensity ", "")]
    
  } else {
    dt_int <- melt.data.table(
      dt,
      id.vars = id_var,
      measure.vars = grep("intensity ", colnames(dt), value = T),
      value.name = "intensity",
      variable.name = "mqExperiment"
    )
    dt_int[, mqExperiment := str_replace(mqExperiment, "intensity ", "")]
  }
  
  dt_int <- merge(dt_int, des, by = "mqExperiment", all = T)
  dt_int[, Imputed := 0L]
  dt_int[intensity == 0, Imputed := 1L]
  dt_int[, log2NInt := 0.0]
  dt_int[intensity > 0 , log2NInt := log2(intensity)]
  
  # imputation
  dt_int <- impute_lfq(
    myQuantDT = dt_int,
    id_type = "id",
    int_type = "log2NInt",
    imputeStDev,
    imputePosition
  )
  dt_int[, run_id := str_c(experiment, Replicate, sep = ".")]
  
  
  results <- limma_stats_fun(
    ID_type = id_var,
    int_type = "log2NInt",
    condition_col_name = "experiment",
    run_id_col_name = "run_id",
    rep_col_name = "Replicate",
    funDT = dt_int
  )
  
  dt_quant <- results$stats 
  conditionComparisonMapping <- results$pairwise.comp
  
  dt_quant[, eval(id_var) := as.integer(get(id_var))]
  
  dt <- merge(dt, dt_quant, by = id_var, all.x = T)
  
  # recover original condition names
  dt <- condition_name_decode_quantitative_data(dt = dt, dict = conditions_dict)
  dt_int <- condition_name_decode_intensity_data(dt = dt_int, dict = conditions_dict)
  
  
  setnames(dt_int, "experiment", "condition")
  fwrite(dt, file.path(output_folder, quant_fn), row.names = FALSE, sep = "\t", quote = FALSE, showProgress = T, verbose = T, nThread = detectCores() - 1)
  fwrite(dt_int, file.path(output_folder, dt_int_fn), row.names = FALSE, sep = "\t", quote = FALSE, showProgress = T, verbose = T, nThread = detectCores() - 1)
  
  return(list(
    dt, dt_int, conditionComparisonMapping
  ))
}


#' @export tmt_quant_analysis
tmt_quant_analysis <- function(dt, des, id_var = "id",
                               imputeStDev=0.3,
                               imputePosition=1.8) {
  
  has_fractions <- detect_fractions(des)
  colnames(des) <- tolower(colnames(des))
  print(colnames(des))
  
  measure_vars = get_tmt_intensity_columns(dt, des)
  
  # melt data
  dt_int <- melt.data.table(
    dt,
    id.vars = id_var,
    measure.vars = measure_vars,
    value.name = "intensity",
    variable.name = "experimental_measure" # this is a row in the exp design
  )
  stopifnot("intensity" %in% colnames(dt_int))

  #print(grep(measure_regex_channel, colnames(dt), value = T))
  dt_int <- as.data.table(dt_int) # make sure it's a data table

  #get ready for imputation, log intensity column
  dt_int[, Imputed := 0L]
  dt_int[intensity == 0, Imputed := 1L]
  dt_int[, log2NInt := 0.0]
  dt_int[intensity > 0 , log2NInt := log2(intensity)]

  # extract info from columns
  dt_int[, experiment := mygsub("reporter intensity .?.?.? ?corrected [0-9]+? (.*)", experimental_measure)]
  dt_int[, experiment := tolower(experiment)]
  dt_int[, reporter_channel := mygsub("reporter intensity .?.?.? ?corrected ([0-9]+?) .*", experimental_measure)]

  # check all measure vars are in exp design 
  stopifnot(length(measure_vars)==nrow(unique(dt_int[,c("experimental_measure", "reporter_channel", "experiment")])))
  
  # perform median subtraction for each reporter_channel group
  # reporter channel + condition level median subtraction
  dt_int[Imputed == F, ExpChannelMedian := median(log2NInt), by = list(reporter_channel,experiment)]
  dt_int[Imputed == F, log2NIntNorm := log2NInt - median(log2NInt), by = list(reporter_channel,experiment)]

  # imputation
  dt_int <- impute_lfq(
    myQuantDT = dt_int,
    id_type = id_var,
    int_type = "log2NIntNorm",
    f_imputeStDev = imputeStDev,
    f_imputePosition = imputePosition
  )
  
  
  cat(paste("nrows dt_int:", nrow(dt_int)))
  
  # merge channel info with experiment design
  des$experiment <- tolower(des$experiment)
  dt_int <- merge(dt_int, des, by = c("experiment","reporter_channel"), all.x = T)
  cat(paste("nrows dt_int:", nrow(dt_int)))
  # create run id's
  dt_int[, run_id := str_c(condition,experiment, replicate, sep = ".")]
  
  #call limma stats
  results <- limma_stats_fun(
    ID_type = id_var,
    int_type = "log2NIntNorm",
    condition_col_name = "condition",
    run_id_col_name = "run_id",
    rep_col_name = "replicate",
    funDT = dt_int
  )
  dt_quant <- results$stats 
  conditionComparisonMapping <- results$pairwise.comp
  
  dt$id <- as.character(dt$id)
  
  # check for duplicated column names
  dt <- merge(dt, dt_quant, by = id_var , all.x = T)
  
  
  return(list(dt, dt_int, conditionComparisonMapping))
}


get_channel_from_col_data <- function(data){
  tmp = gsub("reporter.intensity.corrected.","", data)
  channels = gsub(" .*","",tmp)
  return(channels)
}

get_experiment_from_col_data <- function(data){
  tmp = gsub("reporter.intensity.corrected.","", data)
  channels = gsub(".* ","",tmp)
  return(channels)
}

detect_fractions <- function(des){
  if (("fractions" %in% tolower(colnames(des)))){
    if (length(unique(des$fraction)) == 1){
      return(FALSE)
    } else {
      return(TRUE)
    }
  } else {
    return (FALSE)
  }
}


# return the regex capture if it is there, otherwise return an empty string
mygsub <- function( pattern, x){
  ans <- ifelse(grepl(pattern, x), 
                gsub(pattern, "\\1", x), 
                "")
  return(ans)
}
MassDynamics/lfq_processing documentation built on May 4, 2023, 11:20 p.m.