R/importFromMaxQuant.R

# Due to: http://stackoverflow.com/questions/24501245/data-table-throws-object-not-found-error
.datatable.aware=TRUE

#' Import peptide profiles from an MaxQuant DDA data analysis search result
#' table.
#' @import data.table
#' @param file.name Quantitative MS data result file. Defaults to 'peptides.txt'
#' @param annotation.table experimentalDesignTemplate table from MaxQuant,
#'     containing columns 'Name', 'Experiment' and 'Fraction'.
#'     Fraction should contain the fraction_number.
#' @param quanttype Type of quant to extract, defaults to 'Intensity ' and can
#'     be replaced by other prefixes as given in the MaxQuant result table
#'     header. E.g. '^Intensity H ' or '^Intensity L ' can be used to
#'     differentiate channels of SILAC-label data.
#' @return An object of class Traces (peptide level) containing a 'traces.wide',
#'     and 'ids' table that can be processed with the herein contained functions.
#'         
#' @export
importFromMaxQuant <- function(file.name='peptides.txt', quanttype='^Intensity ',
                               annotation.table='experimentalDesignTemplate.txt') {
  # Read data & annotation table & clean up
  ##################################################
  # data  <- data.table::fread(file.name, sep='\t', header=TRUE) #produces funny numbers for one of 88 columns (?)
  data <- as.data.table(read.csv(file.name, sep ='\t', header=TRUE,
                                 check.names=FALSE))
  message('reading results file ...')
  annotation <- data.table::fread(annotation.table)
  # Retain only those peptides with a 'sp|*' fasta entry in Proteins column
  data.s <- data[grep('sp\\|', data$Proteins)]
  
  # Select Intensity columns of proteotypic peptides 
  ##################################################
  quantcolumns <- grep(quanttype, names(data.s))
  labelcolumns <- which(names(data.s) %in% c('Sequence', 'Proteins'))
  
  data.s.traces <- data.s[`Unique (Proteins)` == 'yes', quantcolumns,
                          with=FALSE]
  data.s.labels <- data.s[`Unique (Proteins)` == 'yes', labelcolumns,
                          with=FALSE]
  # Extract Uniprot id (protein_id) and Uniprot entry name (protein_name) from fasta_id
  data.s.labels[, protein_id := sapply(as.character(data.s.labels$Proteins),
                                       function(x) { 
                                           strsplit(x, split='\\|')[[1]][2]
                                       })]
  data.s.labels[, protein_name := sapply(as.character(data.s.labels$Proteins),
                                         function(x){
                                             strsplit(x, split='\\|')[[1]][3]
                                         })]
  
  # Replace column headers by fraction number and order ascending
  nruns <- length(names(data.s.traces))
  column_headers_experiment <- gsub(quanttype, '', names(data.s.traces))
  column_headers_fraction <-
      sapply(column_headers_experiment,
             function(x){
                 annotation[Experiment %in% x, 'Fraction', with=FALSE]
             })
  column_headers_fraction <-
      sapply(column_headers_fraction, function(x){ x[[1]] })
  names(data.s.traces) <- sprintf('%02d', column_headers_fraction)
  data.s.traces <- data.s.traces[, order(as.numeric(names(data.s.traces))),
                                 with=FALSE]
  
  # Assemble and output result 'Traces' object
  #################################################
  traces.wide <- data.table(protein_id=data.s.labels$protein_id,
                            peptide_id=data.s.labels$Sequence, data.s.traces)
  setorder(traces.wide, protein_id)
  # traces.long <- melt(traces.wide,
  #                     id.vars=c('protein_id','peptide_id'),
  #                     value.name='Intensity', 
  #                     variable.name='fraction_number')
  labels <- data.s.labels[,c(2, 4, 3, 1), with=FALSE]
  setnames(labels, 'Sequence', 'peptide_id')
  setnames(labels, 'Proteins', 'fasta_id')
  setorder(labels, protein_id)
  result <- list(traces.wide=traces.wide, ids=labels)
  class(result) <- 'Traces'

  return(result)
}
hafenr/SECprofiler documentation built on May 17, 2019, 2:25 p.m.