R/msstats_processing.R

Defines functions preprocess.MSstats process.MSstats gen.CompMatrix compare.MSstats gen.CompMatrix

Documented in compare.MSstats gen.CompMatrix preprocess.MSstats process.MSstats

# MSstats processing ------------------------------------------------------
#' Preprocess Skyline files for MSstats analysis
#'
#' Combines feature intensities and annotation to yield appropriate MSstats input
#'
#' @param input.csv csv file created by MSstats Skyline report
#' @param annot.df data frame with 'Condition', 'Run', and 'BioReplicate' columns
#'
#' @return dataframe suitable for MSstats processing
#'
#' @examples
#' rd.msstats <- preprocess.MSstats(paste0(proj.dir,'Data/MSstats_Input.csv'), annot)
#'
#' @export
preprocess.MSstats <- function(input.csv, annot.df) {
  # Takes a Skyline-generated MSstats input CSV, corrects it, and adds annotations
  # annot.df should have "Condition", "Run", and "BioReplicate" columns
  # Output is appropriate to feed into process.MSstats()

  if(!require(tidyverse)) library(tidyverse)

  rd <- read_csv(input.csv)
  rd %>%
    rename('Run' = `File Name`) %>%
    separate(Run, into=c('Run', 'd'), sep='\\.') -> rd.input

  names(rd.input) <- make.names(names(rd.input), unique=TRUE)

  # Annotate with Condition and BioReplicate
  rd.msstats <- SkylinetoMSstatsFormat(input = rd.input,
                                       annotation = annot.df,
                                       filter_with_Qvalue = FALSE)

  return(rd.msstats)
}

#' Process feature intensities with MSstats default parameters
#'
#' Normalizes and processes MRM data using default/recommended parameters for MSstats.
#' Saves result to disk and reloads on subsequent knits unless recompute is True
#'
#' @param raw.features MSstats feature data. Can be generated by preprocess.MSstats()
#' @param file.path Path to save the resulting normalized data object
#' @param recompute Boolean on whether to recompute normalization. Default is False
#'
#' @return MSstats object containing normalized feature data
#'
#' @examples
#' rd.proposed <- process.MSstats(rd.msstats, filepath=paste0(proj.dir,'Analysis/MSstats.rds'), recompute = F)
#'
#' @export
process.MSstats <- function(raw.features, filepath='MSstats.rds', recompute=F) {
  # Either process raw features with MSstats to normalize
  # or just reload from disk to save time when compiling Rmarkdown documents

  processed.data <- filepath
  if(!file.exists(processed.data) | recompute == T){
    rd.proposed <- dataProcess(raw = raw.features,
                               normalization = 'equalizeMedians',
                               summaryMethod = 'TMP',
                               censoredInt = "0",
                               cutoffCensored = 'minFeature',
                               MBimpute = TRUE,
                               maxQuantileforCensored = 0.999)
    saveRDS(rd.proposed, file=filepath)
  } else {rd.proposed <- readRDS(processed.data)}
  return(rd.proposed)
}

#' Generate a comparison matrix
#'
#' Given a matrix of desired comparisons and original group levels, generate the comparison matrix for MSstats
#'
#' @param mat A matrix with desired condition comparisons, 2 or 4 columns
#' @param lev Condition levels from the processed data
#'
#' @return An MSstats comparison matrix
#'
#' @examples
#' # Set up a matrix with desired comparisons
#' # For a double difference, the conditions in positions 1 & 4 will be set to 1
#' # and conditions in positions 2 & 3 will be set to -1, so these are like
#' # (Trt.Trted - Trt.Baseline) - (Veh.Trted -  Veh.Baseline)
#'
#' el <- matrix(c('B.3','B.1','A.3','A.1',
#'                'B.4','B.1','A.4','A.1',
#'                'C.3','C.1','A.3','A.1',
#'                'C.4','C.1','A.4','A.1',
#'                'C.3','C.1','B.3','B.1',
#'                'C.4','C.1','B.4','B.1',
#'                'D.3','D.1','E.3','E.1',
#'                'D.4','D.1','E.4','E.1',
#'                'Y.3','Y.1','Z.3','Z.1',
#'                'Y.4','Y.1','Z.4','Z.1'),
#'              ncol=4, byrow = T)
#'
#' orig.levels <- levels(rd.proposed$ProcessedData$GROUP_ORIGINAL)
#'
#' # Generate MSstats comparison matrix from the graph and original groups
#' comparisons <- gen.CompMatrix(el, orig.levels)
#'
#' @export
gen.CompMatrix <- function(mat, lev) {
  # Given a matrix of desired comparisons and original group levels,
  # Generate the comparison matrix for MSstats

  # This code currently feels very inefficient with too many ifs and for-loops,
  # but I think it does work at least!

  stopifnot(ncol(mat) == 2 | ncol(mat) == 4)

  # Generate a vector with the final flags for comparison directionality
  if (ncol(mat) == 4) {
    flags <- c(1,-1,-1,1)
  } else if (ncol(mat) == 2) {
    flags <- c(1,-1)
  } else {
    print("This should never display")
  }

  # Generate an "empty" vector with appropriate dimensions
  comparisons <- matrix(0, nrow=nrow(mat), ncol=length(lev))

  # Generate an empty vector of rownames
  rnames <- rep_len('',nrow(mat))

  # Iterate through input matrix, set flags and rownames in the output matrix
  for (i in 1:nrow(mat)) {
    thiscomp <- mat[i,]

    # Set row name
    if (ncol(mat) == 4) {
      rname <- paste0('(',thiscomp[1],'-',thiscomp[2],')','-',
                      '(',thiscomp[3],'-',thiscomp[4],')')
    } else {
      rname <- paste(thiscomp[1],thiscomp[2],sep='-')
    }
    rnames[i] <- rname

    # Determine where in the output row to set the flags
    lev.indices <- match(thiscomp, lev)

    for (k in 1:length(lev.indices)) {
      if (is.na(lev.indices[k])) {
        print(paste('Error: Comparison condition',thiscomp[k],'not found in original levels'))
      } else {
        j <- lev.indices[k]
        comparisons[i,j] <- flags[k]
      }
    }
  }

  rownames(comparisons) <- rnames
  return(comparisons)
}


#' Perform an MSstats comparison
#'
#' Performs MSstats differential analysis. Saves result to disk and reloads unless recompute is True
#'
#' @param processed.features Object with normalized data. Can be produced with process.MSstats()
#' @param comparison.matrix A comparison matrix as described in MSstats user guide.
#' @param file.path Path to save the resulting comparison data object
#' @param recompute Boolean on whether to recompute comparisons. Default is False
#'
#' @return An object containing comparison results
#'
#' @examples
#' pd.comparisons <- compare.MSstats(rd.proposed, comparisons, recompute=F)
#'
#' @export
compare.MSstats <- function(processed.features, comparison.matrix,
                            filepath='MSstats_comparisons.rds', recompute=F) {
  # Given a set of normalized/processed features from process.MSstats()
  # and a matrix of comparisons, perform MSstats group comparisons or
  # reload from disk if already cached

  comparison.data <- filepath
  if(!file.exists(comparison.data) | recompute == T){
    pd.comparisons <- groupComparison(contrast.matrix = comparison.matrix, data=processed.features)
    saveRDS(pd.comparisons, file=comparison.data)
  } else {pd.comparisons <- readRDS(comparison.data)}

  return(pd.comparisons)
}

gen.CompMatrix <- function(mat, lev) {
  # Given a matrix of desired comparisons and original group levels,
  # Generate the comparison matrix for MSstats
  
  # This code currently feels very inefficient with too many ifs and for-loops,
  # but I think it does work at least!
  
  stopifnot(ncol(mat) == 2 | ncol(mat) == 4)
  
  # Generate a vector with the final flags for comparison directionality
  if (ncol(mat) == 4) {
    flags <- c(1,-1,-1,1)
  } else if (ncol(mat) == 2) {
    flags <- c(1,-1)
  } else {
    print("This should never display")
  }
  
  # Generate an "empty" vector with appropriate dimensions
  comparisons <- matrix(0, nrow=nrow(mat), ncol=length(lev))
  
  # Generate an empty vector of rownames
  rnames <- rep_len('',nrow(mat))
  
  # Iterate through input matrix, set flags and rownames in the output matrix
  for (i in 1:nrow(mat)) {
    thiscomp <- mat[i,]
    
    # Set row name
    if (ncol(mat) == 4) {
      rname <- paste0('(',thiscomp[1],'-',thiscomp[2],')','-',
                        '(',thiscomp[3],'-',thiscomp[4],')')
    } else {
      rname <- paste(thiscomp[1],thiscomp[2],sep='-')
    }
    rnames[i] <- rname
    
    # Determine where in the output row to set the flags
    lev.indices <- match(thiscomp, lev)
    
    for (k in 1:length(lev.indices)) {
      if (is.na(lev.indices[k])) {
        print(paste('Error: Comparison condition',thiscomp[k],'not found in original levels'))
      } else {
        j <- lev.indices[k]
        comparisons[i,j] <- flags[k]
      }
    }
  }
  
  rownames(comparisons) <- rnames
  return(comparisons)
}
jwinget/mrmr documentation built on Nov. 4, 2019, 3:28 p.m.