R/IRSnorm.R

Defines functions IRSnorm checkArg_IRSnorm

Documented in IRSnorm

# Argument check function
checkArg_IRSnorm <- function(MSnSetObj, IRSname, groupingColumn){
  assert_that(is_MSnSet(MSnSetObj))
  assert_that(is_validIRSname(IRSname,MSnSetObj))
  assert_that(is.string(groupingColumn))
  assert_that(is_validMetadataColumn(groupingColumn, MSnSetObj))
}

#' Batch Correction by Internal Reference Scale
#'
#' Performs batch correction on multiple runs using an Internal Reference Scale
#' sample.
#'
#' The Internal Reference Scale sample (IRS) should ideally be representative of
#' the entire proteome detectable across all sample in the experiment, e.g. a
#' pooled sample made up of aliquots of protein from all samples. The IRS is
#' then run and measured in each TMT experiment. The normalization procedure
#' makes measurements of the IRS from different TMT batches exactly the same,
#' and puts all of the reporter ions on the same "intensity scale". The argument
#' 'IRSname' is used to define the name of the Reference group within the
#' SampleGroup column. The argument "groupingColumn" takes one of the column of
#' pData(MSnSetObj) to define separate batches to correct; the default variable
#' name is "Plex".
#'
#' @param MSnSetObj MSnSet; an object of class MSnSet
#' @param IRSname character; name of the Reference group within the SampleGroup
#'   column
#' @param groupingColumn character; the pData(MSnSetObj) column name used to
#'   define batches; default="Plex"
#' @return An object of class \code{MSnSet} (see \code{\link{MSnSet-class}})
#' @examples
#'
#' data(human_anno)
#' data(ER_ARID1A_KO_MCF7)
#' MSnset_SET1 <- convertToMSnset(ER_ARID1A_KO_MCF7$intensities_Set1,
#'                                metadata=ER_ARID1A_KO_MCF7$metadata_Set1,
#'                                indExpData=c(7:15),
#'                                Sequences=2,
#'                                Accessions=6)
#' MSnset_SET2 <- convertToMSnset(ER_ARID1A_KO_MCF7$intensities_Set2,
#'                                metadata=ER_ARID1A_KO_MCF7$metadata_Set2,
#'                                indExpData=c(7:15),
#'                                Sequences=2,
#'                                Accessions=6)
#' MSnset_SET3 <- convertToMSnset(ER_ARID1A_KO_MCF7$intensities_Set3,
#'                                metadata=ER_ARID1A_KO_MCF7$metadata_Set3,
#'                                indExpData=c(7:15),
#'                                Sequences=2,
#'                                Accessions=6)
#' MSnset_SET4 <- convertToMSnset(ER_ARID1A_KO_MCF7$intensities_Set4,
#'                                metadata=ER_ARID1A_KO_MCF7$metadata_Set4,
#'                                indExpData=c(7:14),
#'                                Sequences=2,
#'                                Accessions=6)
#' MSnset_SET5 <- convertToMSnset(ER_ARID1A_KO_MCF7$intensities_Set5,
#'                                metadata=ER_ARID1A_KO_MCF7$metadata_Set5,
#'                                indExpData=c(7:15),
#'                                Sequences=2,
#'                                Accessions=6)
#' MSnset_SET1_norm <- normalizeScaling(MSnset_SET1, median)
#' MSnset_SET2_norm <- normalizeScaling(MSnset_SET2, median)
#' MSnset_SET3_norm <- normalizeScaling(MSnset_SET3, median)
#' MSnset_SET4_norm <- normalizeScaling(MSnset_SET4, median)
#' MSnset_SET5_norm <- normalizeScaling(MSnset_SET5, median)
#' MSnset_SET1_Pnorm <- summarizeIntensities(MSnset_SET1_norm, sum, human_anno)
#' MSnset_SET2_Pnorm <- summarizeIntensities(MSnset_SET2_norm, sum, human_anno)
#' MSnset_SET3_Pnorm <- summarizeIntensities(MSnset_SET3_norm, sum, human_anno)
#' MSnset_SET4_Pnorm <- summarizeIntensities(MSnset_SET4_norm, sum, human_anno)
#' MSnset_SET5_Pnorm <- summarizeIntensities(MSnset_SET5_norm, sum, human_anno)
#' MSnset_SET1_Pnorm <- updateSampleNames(updateFvarLabels(MSnset_SET1_Pnorm))
#' MSnset_SET2_Pnorm <- updateSampleNames(updateFvarLabels(MSnset_SET2_Pnorm))
#' MSnset_SET3_Pnorm <- updateSampleNames(updateFvarLabels(MSnset_SET3_Pnorm))
#' MSnset_SET4_Pnorm <- updateSampleNames(updateFvarLabels(MSnset_SET4_Pnorm))
#' MSnset_SET5_Pnorm <- updateSampleNames(updateFvarLabels(MSnset_SET5_Pnorm))
#' MSnset_comb <- MSnbase::combine(MSnset_SET1_Pnorm,
#'                                 MSnset_SET2_Pnorm,
#'                                 MSnset_SET3_Pnorm,
#'                                 MSnset_SET4_Pnorm,
#'                                 MSnset_SET5_Pnorm)
#' tokeep <- complete.cases(fData(MSnset_comb))
#' MSnset_comb <- MSnset_comb[tokeep,]
#' sampleNames(MSnset_comb) <- pData(MSnset_comb)$SampleName
#' fData(MSnset_comb) <- fData(MSnset_comb)[,c(2,3,6)]
#' colnames(fData(MSnset_comb)) <- c("Sequences", "Modifications", "Accessions")
#' MSnset_comb_corr <- IRSnorm(MSnset_comb, IRSname="Ref", groupingColumn="Run")
#'
#' @import MSnbase
#' @importFrom BiocGenerics lapply
#' @importFrom Biobase pData exprs exprs<-
#'
#' @export IRSnorm


IRSnorm <- function(MSnSetObj, IRSname="RefPool", groupingColumn="Plex") {
  checkArg_IRSnorm(MSnSetObj, IRSname, groupingColumn)
  
  Ref_Set <- MSnSetObj[,pData(MSnSetObj)$SampleGroup==IRSname]
  allgrps <- split(Ref_Set, groupingColumn)
  grpMean <- BiocGenerics::lapply(allgrps, function(x) {rowMeans(exprs(x))})
  irs_data <- matrix(unlist(grpMean), ncol = length(grpMean), byrow = FALSE)
  irs_data_geomean <- apply(irs_data, 1, function(x) {exp(mean(log(x)))})
  irs_factors <- irs_data_geomean/irs_data
  colnames(irs_factors) <- names(grpMean)
  
  for (i in names(grpMean)){
    plexCols <- which(pData(MSnSetObj)[,groupingColumn]==i)
    exprs(MSnSetObj)[,plexCols] <- exprs(MSnSetObj)[,plexCols] * irs_factors[,i]
  }
  return(MSnSetObj)
}
crukci-bioinformatics/qPLEXanalyzer documentation built on Oct. 23, 2023, 2:27 a.m.