# MSstats processing ------------------------------------------------------
#' Preprocess Skyline files for MSstats analysis
#'
#' Combines feature intensities and annotation to yield appropriate MSstats input
#'
#' @param input.csv csv file created by MSstats Skyline report
#' @param annot.df data frame with 'Condition', 'Run', and 'BioReplicate' columns
#'
#' @return dataframe suitable for MSstats processing
#'
#' @examples
#' rd.msstats <- preprocess.MSstats(paste0(proj.dir,'Data/MSstats_Input.csv'), annot)
#'
#' @export
preprocess.MSstats <- function(input.csv, annot.df) {
# Takes a Skyline-generated MSstats input CSV, corrects it, and adds annotations
# annot.df should have "Condition", "Run", and "BioReplicate" columns
# Output is appropriate to feed into process.MSstats()
if(!require(tidyverse)) library(tidyverse)
rd <- read_csv(input.csv)
rd %>%
rename('Run' = `File Name`) %>%
separate(Run, into=c('Run', 'd'), sep='\\.') -> rd.input
names(rd.input) <- make.names(names(rd.input), unique=TRUE)
# Annotate with Condition and BioReplicate
rd.msstats <- SkylinetoMSstatsFormat(input = rd.input,
annotation = annot.df,
filter_with_Qvalue = FALSE)
return(rd.msstats)
}
#' Process feature intensities with MSstats default parameters
#'
#' Normalizes and processes MRM data using default/recommended parameters for MSstats.
#' Saves result to disk and reloads on subsequent knits unless recompute is True
#'
#' @param raw.features MSstats feature data. Can be generated by preprocess.MSstats()
#' @param file.path Path to save the resulting normalized data object
#' @param recompute Boolean on whether to recompute normalization. Default is False
#'
#' @return MSstats object containing normalized feature data
#'
#' @examples
#' rd.proposed <- process.MSstats(rd.msstats, filepath=paste0(proj.dir,'Analysis/MSstats.rds'), recompute = F)
#'
#' @export
process.MSstats <- function(raw.features, filepath='MSstats.rds', recompute=F) {
# Either process raw features with MSstats to normalize
# or just reload from disk to save time when compiling Rmarkdown documents
processed.data <- filepath
if(!file.exists(processed.data) | recompute == T){
rd.proposed <- dataProcess(raw = raw.features,
normalization = 'equalizeMedians',
summaryMethod = 'TMP',
censoredInt = "0",
cutoffCensored = 'minFeature',
MBimpute = TRUE,
maxQuantileforCensored = 0.999)
saveRDS(rd.proposed, file=filepath)
} else {rd.proposed <- readRDS(processed.data)}
return(rd.proposed)
}
#' Generate a comparison matrix
#'
#' Given a matrix of desired comparisons and original group levels, generate the comparison matrix for MSstats
#'
#' @param mat A matrix with desired condition comparisons, 2 or 4 columns
#' @param lev Condition levels from the processed data
#'
#' @return An MSstats comparison matrix
#'
#' @examples
#' # Set up a matrix with desired comparisons
#' # For a double difference, the conditions in positions 1 & 4 will be set to 1
#' # and conditions in positions 2 & 3 will be set to -1, so these are like
#' # (Trt.Trted - Trt.Baseline) - (Veh.Trted - Veh.Baseline)
#'
#' el <- matrix(c('B.3','B.1','A.3','A.1',
#' 'B.4','B.1','A.4','A.1',
#' 'C.3','C.1','A.3','A.1',
#' 'C.4','C.1','A.4','A.1',
#' 'C.3','C.1','B.3','B.1',
#' 'C.4','C.1','B.4','B.1',
#' 'D.3','D.1','E.3','E.1',
#' 'D.4','D.1','E.4','E.1',
#' 'Y.3','Y.1','Z.3','Z.1',
#' 'Y.4','Y.1','Z.4','Z.1'),
#' ncol=4, byrow = T)
#'
#' orig.levels <- levels(rd.proposed$ProcessedData$GROUP_ORIGINAL)
#'
#' # Generate MSstats comparison matrix from the graph and original groups
#' comparisons <- gen.CompMatrix(el, orig.levels)
#'
#' @export
gen.CompMatrix <- function(mat, lev) {
# Given a matrix of desired comparisons and original group levels,
# Generate the comparison matrix for MSstats
# This code currently feels very inefficient with too many ifs and for-loops,
# but I think it does work at least!
stopifnot(ncol(mat) == 2 | ncol(mat) == 4)
# Generate a vector with the final flags for comparison directionality
if (ncol(mat) == 4) {
flags <- c(1,-1,-1,1)
} else if (ncol(mat) == 2) {
flags <- c(1,-1)
} else {
print("This should never display")
}
# Generate an "empty" vector with appropriate dimensions
comparisons <- matrix(0, nrow=nrow(mat), ncol=length(lev))
# Generate an empty vector of rownames
rnames <- rep_len('',nrow(mat))
# Iterate through input matrix, set flags and rownames in the output matrix
for (i in 1:nrow(mat)) {
thiscomp <- mat[i,]
# Set row name
if (ncol(mat) == 4) {
rname <- paste0('(',thiscomp[1],'-',thiscomp[2],')','-',
'(',thiscomp[3],'-',thiscomp[4],')')
} else {
rname <- paste(thiscomp[1],thiscomp[2],sep='-')
}
rnames[i] <- rname
# Determine where in the output row to set the flags
lev.indices <- match(thiscomp, lev)
for (k in 1:length(lev.indices)) {
if (is.na(lev.indices[k])) {
print(paste('Error: Comparison condition',thiscomp[k],'not found in original levels'))
} else {
j <- lev.indices[k]
comparisons[i,j] <- flags[k]
}
}
}
rownames(comparisons) <- rnames
return(comparisons)
}
#' Perform an MSstats comparison
#'
#' Performs MSstats differential analysis. Saves result to disk and reloads unless recompute is True
#'
#' @param processed.features Object with normalized data. Can be produced with process.MSstats()
#' @param comparison.matrix A comparison matrix as described in MSstats user guide.
#' @param file.path Path to save the resulting comparison data object
#' @param recompute Boolean on whether to recompute comparisons. Default is False
#'
#' @return An object containing comparison results
#'
#' @examples
#' pd.comparisons <- compare.MSstats(rd.proposed, comparisons, recompute=F)
#'
#' @export
compare.MSstats <- function(processed.features, comparison.matrix,
filepath='MSstats_comparisons.rds', recompute=F) {
# Given a set of normalized/processed features from process.MSstats()
# and a matrix of comparisons, perform MSstats group comparisons or
# reload from disk if already cached
comparison.data <- filepath
if(!file.exists(comparison.data) | recompute == T){
pd.comparisons <- groupComparison(contrast.matrix = comparison.matrix, data=processed.features)
saveRDS(pd.comparisons, file=comparison.data)
} else {pd.comparisons <- readRDS(comparison.data)}
return(pd.comparisons)
}
gen.CompMatrix <- function(mat, lev) {
# Given a matrix of desired comparisons and original group levels,
# Generate the comparison matrix for MSstats
# This code currently feels very inefficient with too many ifs and for-loops,
# but I think it does work at least!
stopifnot(ncol(mat) == 2 | ncol(mat) == 4)
# Generate a vector with the final flags for comparison directionality
if (ncol(mat) == 4) {
flags <- c(1,-1,-1,1)
} else if (ncol(mat) == 2) {
flags <- c(1,-1)
} else {
print("This should never display")
}
# Generate an "empty" vector with appropriate dimensions
comparisons <- matrix(0, nrow=nrow(mat), ncol=length(lev))
# Generate an empty vector of rownames
rnames <- rep_len('',nrow(mat))
# Iterate through input matrix, set flags and rownames in the output matrix
for (i in 1:nrow(mat)) {
thiscomp <- mat[i,]
# Set row name
if (ncol(mat) == 4) {
rname <- paste0('(',thiscomp[1],'-',thiscomp[2],')','-',
'(',thiscomp[3],'-',thiscomp[4],')')
} else {
rname <- paste(thiscomp[1],thiscomp[2],sep='-')
}
rnames[i] <- rname
# Determine where in the output row to set the flags
lev.indices <- match(thiscomp, lev)
for (k in 1:length(lev.indices)) {
if (is.na(lev.indices[k])) {
print(paste('Error: Comparison condition',thiscomp[k],'not found in original levels'))
} else {
j <- lev.indices[k]
comparisons[i,j] <- flags[k]
}
}
}
rownames(comparisons) <- rnames
return(comparisons)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.