R/iPQF.R
In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics

Documented in iPQF

##' Feature-based weighting of peptides for protein ratio estimation
##' (called by iPQF main function))
##'
##' @param pos list of proteins with corresponding peptide spectra
##' position assignment (in object)
##' @param ma ratio matrix/intensity matrix
##' @param features data.frame of peptide features
##' @return A matrix with estimated protein ratios (rows are proteins,
##' columns are samples).
##' @author Martina Fisher
##' @noRd
iPQF.method  <- function(pos, mat, features, feature.weight) {
    ## calculated protein ratios based on feature ranking
    feat.trend <- matrix(data = NA_real_, nrow = length(pos),
                         ncol = dim(mat)[2], byrow = FALSE)
    rownames(feat.trend) <- names(pos)
    #weight.list <- vector("list", length(pos))   ## peptide weights
    #avrank.list <- vector("list", length(pos))   ## peptide feature average ranks
    #rankmat.list <- vector("list", length(pos))  ## feature rank matrix

    for (i in 1:length(pos)) {
        Mat <- mat[pos[[i]], ]
        ## Ranking: smaller rank is better (~more reliable peptide)
        ## combined redundant-unique-dist vector
        ru.pep <- features$ru.dist[pos[[i]]]
        ru.rank <- rank(ru.pep)
        charge.rank <- rank(features$charge[pos[[i]]])
        ll.rank <- rank(features$seq.l[pos[[i]]])
        sc.rank <- rank(-features$score[pos[[i]]])
        mo.rank <- rank(features$mod.stat[pos[[i]]])
        mass.rank <- rank(features$prec.mass[pos[[i]]])
        int.rank <- rank(-features$mean.ionInt[pos[[i]]])

        ## ranking matrix
        rank.mat <- cbind(ru.rank, charge.rank,
                          ll.rank, sc.rank,
                          mo.rank, mass.rank,
                          int.rank)
        #rankmat.list[[i]] <- rank.mat

        ## feature weighting  (based on correlation study in manuscript)
        rank.mat2 <- (t(apply(rank.mat, 1,
                              function(x) feature.weight * x)))

        ## normalizing: divide by peptide number (rank zw 0-1)
        rmat.n <- apply(rank.mat2, 2, function(x) x/length(which(!is.na(x))))

        ## sum ranks for each peptide:
        pep.sumrank <- rowSums(rmat.n, na.rm = TRUE)

        ## divide by number of features: "average rank"
        av.rank <- pep.sumrank/sum(c(1:7)^2)     ## worst av.rank = 1
        av.rank <- 1 - av.rank  ## reverse: best = 1 "weight" = high = reliable peptide
        av.rank <- av.rank^2
        #avrank.list[[i]] <- av.rank

        ## Approach: Feature-Weighting
        weight <- av.rank
        weight <- weight / sum(weight)
        trend <- apply(Mat, 2, function(x) weighted.mean(x, w=weight))

        feat.trend[i,] <- trend
        #weight.list[[i]] <- weight

    }
    return(feat.trend)
}


#####  Internal - FUNCTIONS    < called within iPQF main function !>
## to build internal objects required for iPQF.method

## Ratio Matrix Construction Function
# Define Ratio Calculation: all Channels to individual channel, or sum of all channels
ratio.mat <- function(mat, method) {
    if (method == "sum") {
        ## equivalent to normalise(., "sum")
        ## ration.mat <- mat/rowSums(mat)
        ratio.mat <- t(apply(mat, 1, function(x) x/sum(x)))
    } else {
        base.channel <- match(method, colnames(mat))
        ## ratio.mat <- mat/mat[, base.channel]
        ratio.mat <- t(apply(mat, 1,
                             function(x) x /x[base.channel]))
  }
  return(ratio.mat)
}


## example call:
## exprs(object) <- ratio.mat(exprs(object), method="sum")
## ratio.mat(head(mat), method="X114_ions")

## Function: 'Build uniques.all'
## list elements= proteins
## entries per list.element: discrete numbers presenting spectra
## assigned to the specific protein with same numbers for identical
## sequences
uniques.list <- function(pos, sequence) {
    pep.all <- lapply(pos, function(i) sequence[i])
    un.pep.all <- lapply(pos, function(i) unique(sequence[i]))
    uniques.all <- lapply(1:length(pos),
                         function(x) match(pep.all[[x]], un.pep.all[[x]]))
    names(uniques.all) <- names(pos)
    return(uniques.all)
}



## Function: 'Redundant peptides' (multiply measured sequence):
## pos.r: position of peptides with redundantly measured status
## pos.rd: Mean Distance of each peptide to other peptides within a redundant group  (follows pos.r structure)
redundant.dist <- function(pos, uniques.all, mat) {
    uni.tab <- lapply(uniques.all, table)    ## list: name=individual sequence, value= ## appearance of same sequence
    anz.uni <- lapply(uni.tab,length)        ## number of different sequences in a protein profile

    red.name <- lapply(uni.tab, function(x) which(x>1))        ## which protein has redundant sequences?
    red.prot <- which(lapply(red.name, length)>0)              ## list index (protein profiles) with redundant sequences

    ## redundant group of peptides:
    pos.r <- vector("list", length(pos))                ## pos.r: position of redundant peptide spectra
    pos.rd <- vector("list", length(pos))           ## distance of redundant peptides
    for (i in red.prot) {
        nn <- names(red.name[[i]])
        a <- dist <- c()
        for (k in nn) {
            posr <- which(uniques.all[[i]] == k)
            a <- c(a, pos[[i]][posr])
            Mat <- mat[pos[[i]][posr], ]
            red.vec <- vector("numeric", length(posr))
            for (j in 1:length(posr)){
                if (length(posr) == 2)
                    red.vec[j] <-  sqrt(sum((Mat[-j,]-Mat[j,])^2))
                if (length(posr) > 2)
                    red.vec[j] <- mean(apply(Mat[-j,], 1,
                                             function(x) sqrt(sum((x-Mat[j,])^2))))
            }
            dist <- c(dist, red.vec)
        }
        pos.r[[i]] <- a
        pos.rd[[i]] <- dist
    }
    return(list(pos.r, pos.rd))
}



## Function: Unique peptide spectrum match (sequence only measured once)
## pos.u: position of peptides with uniquely measured status
## pos.ud: mean distance of a 'unique' peptide to other 'unique' sequences assigned to the protein (follows pos.u structure)

uni.measured.dist <- function(pos, uniques.all, mat) {
    uni.tab <- lapply(uniques.all, table)
    uni.name <- lapply(uni.tab, function(x) which(x==1)) ## which protein has single measured peptides?
    uni.prot <- which(lapply(uni.name, length) > 0)      ## index protein profiles with single measured Peptides

    num.unis <- unlist(lapply(uni.name, length))  ## number single uniques for each protein
    table(num.unis)

    pos.u <- vector("list", length(pos))
    pos.ud <- vector("list", length(pos))

    for (i in uni.prot) {
        uni.p <- match(names(uni.name[[i]]), uniques.all[[i]])
        pos.u[[i]] <- pos[[i]][uni.p]
        Mat <- mat[pos.u[[i]], ]
        uni.vec <- vector("numeric", length(pos.u[[i]]))
        for (j in 1:length(uni.vec)){
            if (length(uni.vec) == 1) {
                Mat <- mat[pos[[i]], ]
                k <- match(pos.u[[i]], pos[[i]]) ## for singles: mean distance to all other peptides of the protein
                uni.vec[j] <- mean(apply(Mat[-k,], 1,
                                         function(x) sqrt(sum((x-Mat[k,])^2))))
            }
            if (length(uni.vec) == 2)
                uni.vec[j] <-  sqrt(sum((Mat[-j,]-Mat[j,])^2))
            if (length(uni.vec) > 2)
                uni.vec[j] <- mean(apply(Mat[-j,], 1, function(x) sqrt(sum((x-Mat[j,])^2))))
        }
        pos.ud[[i]] <- uni.vec
    }
    return(list(pos.u, pos.ud))
}


##' The iPQF spectra-to-protein summarisation method integrates
##' peptide spectra characteristics and quantitative values for protein
##' quantitation estimation. Spectra features, such as charge state,
##' sequence length, identification score and others, contain valuable
##' information concerning quantification accuracy. The iPQF algorithm
##' assigns weights to spectra according to their overall feature reliability
##' and computes a weighted mean to estimate protein quantities.
##' See also \code{\link{combineFeatures}} for a more
##' general overview of feature aggregation and examples.
##'
##' @title iPQF: iTRAQ (and TMT) Protein Quantification based on
##'     Features
##' @param object An instance of class \code{MSnSet} containing
##'     absolute ion intensities.
##' @param groupBy Vector defining spectra to protein
##'     matching. Generally, this is a feature variable such as
##'     \code{fData(object)$accession}.
##' @param low.support.filter A \code{logical} specifying if proteins
##'     being supported by only 1-2 peptide spectra should be filtered
##'     out. Default is \code{FALSE}.
##' @param ratio.calc Either \code{"none"} (don't calculate any
##'     ratios), \code{"sum"} (default), or a specific channel (one of
##'     \code{sampleNames(object)}) defining how to calculate relative
##'     peptides intensities.
##' @param feature.weight Vector \code{"numeric"} giving weight to the
##'     different features. Default is the squared order of the
##'     features redundant -unique-distance metric, charge state, ion
##'     intensity, sequence length, identification score, modification
##'     state, and mass based on a robustness analysis.
##' @param method.combine A \code{logical} defining whether to further
##'     use median polish to combine features.
##' @return A \code{matrix} with estimated protein ratios.
##' @author Martina Fischer
##' @references iPQF: a new peptide-to-protein summarization method
##'     using peptide spectra characteristics to improve protein
##'     quantification. Fischer M, Renard BY.  Bioinformatics. 2016
##'     Apr 1;32(7):1040-7. doi:10.1093/bioinformatics/btv675. Epub
##'     2015 Nov 20. PubMed PMID:26589272.
##' @examples
##' data(msnset2)
##' head(exprs(msnset2))
##' prot <- combineFeatures(msnset2,
##'                         groupBy = fData(msnset2)$accession,
##'                         method = "iPQF")
##' head(exprs(prot))
iPQF <- function(object, groupBy,
                 low.support.filter = FALSE,
                 ratio.calc = "sum",
                 method.combine = FALSE,
                 feature.weight = c(7,6,4,3,2,1,5)^2
                 ) {

    if (!inherits(object,"MSnSet"))
        stop("'object' is required to be of class MSnSet")
    ## Check NA/Zero values still in data set?
    rm.pos <- apply(exprs(object), 2,
                    function(x) which(is.na(x) | x==0))
    rm.rows <- unique(unlist(rm.pos))
    if (length(rm.rows) > 0)
        stop("Remove NA/Zero Intensities in you data",
             "before peptide summarization. ",
             length(rm.rows),
             " spectr[a/um] should be removed.")

    ## Check mzTab standard names are provdied?
    mzTab.names <- c("sequence", "accession",
                     "charge", "modifications",
                     "mass_to_charge",
                     "search_engine_score")
    wrong.colnames <- which( mzTab.names %in% colnames(fData(object)) == FALSE)
    if (length(wrong.colnames) > 0) {
        stop(" In FeatureData the following column names, according to the mzTab standard, are required: ",
             "\n", paste(mzTab.names[wrong.colnames], collapse = ", ")) }

    ## Extract individual features
    sequence <- as.character(fData(object)$sequence)
    accession <- as.character(fData(object)$accession)

    charge <- as.integer(as.vector(fData(object)$charge))
    seq.l  <- sapply(sequence, nchar)
    prec.mass   <- as.numeric(as.vector(fData(object)$mass_to_charge)) * charge

    ses.na <- which(is.na(fData(object)$search_engine_score))
    score  <- as.numeric(as.vector(fData(object)$search_engine_score))
    if (length(which(is.na(score))) > length(ses.na))
        stop("Search engine scores are expected as column of numeric values only.")

    mod.stat.org <- as.character(fData(object)$modifications)
    mod.stat <- ifelse(mod.stat.org == "null" | mod.stat.org == "0" , 0, 1) ## FIXME

    ## Should the above not be NULL and 0? These are probably badly
    ## imported values from mzTab.

    ion.mat <- exprs(object)               ## absolute ion intensity
    mean.ionInt <- apply(ion.mat, 1, mean) ## mean absolute ion intensity

    ## Calculate Ratio Matrix
    if (ratio.calc != "none") {
        mat <- ratio.mat(exprs(object), method=ratio.calc)  ## relative intensities
    }
    else mat <- ion.mat  ## absolute intensities

    ## Protein-Peptide Spectra position assignment
    ## Build list 'pos.pep': Protein ID (accession name)- Peptide spectra assignment (position in object)

    ## group.by character vector! groupBy = feature pep reihenfolge
    uni.ids <- levels(as.factor(groupBy))
    pos.all <- sapply(uni.ids, function(y) which(groupBy==y))
    names(pos.all) <- uni.ids

    ## remove low-supported proteins
    singles <- which(unlist(lapply(pos.all, length)) < 3) ## proteins supported by only 1-2 peptide spectra

    if (length(singles) == length(pos.all)) {
        warning("No proteins with > 2 peptide spectra - keeping everything.")
        pos.pep <- pos.all
    } else {
        pos.pep <- pos.all[-singles] ## proteins supported by >2 peptide spectra
    }

    ## Redundantly measured peptide spectrum status in a protein profile (uniques.all):
    uniques.all <- uniques.list(pos.pep, sequence)

    ## Redundant peptides
    red.result <- redundant.dist(pos.pep, uniques.all, mat)
    pos.r <- red.result[[1]]
    pos.rd <- red.result[[2]]
    ## Single measured peptides
    uni.result <- uni.measured.dist(pos.pep, uniques.all, mat)
    pos.u <- uni.result[[1]]
    pos.ud <- uni.result[[2]]

    ## Combined distance vector:
    ru.dist <- rep(NA, nrow(object))
    ru.dist[unlist(pos.r)] <- unlist(pos.rd)
    ru.dist[unlist(pos.u)] <- unlist(pos.ud)


    ## Data.frame of selected peptide features:
    features <- data.frame(charge, seq.l, prec.mass,
                           score, mod.stat, mean.ionInt, ru.dist)


    ## Protein Quantification - Peptide Summarization
    iPQF.result  <- iPQF.method(pos.pep, mat, features, feature.weight=feature.weight)

    if (!low.support.filter) {
        single.prots <- unique(accession[unlist(pos.all[singles])])
        msg <- paste0("The following ", length(single.prots), " proteins are only supported by 1 or 2 peptide spectra,\n",
                      "hence, protein quantification is not reliable and can only be calculated\n",
                      "by the 'mean' in these cases, corresponding protein accessions are:\n  ",
                      paste(single.prots, collapse = ", "))
        message(msg)
        single.quant <- lapply(pos.all[singles],
                               function(i) {
                                   if (length(i) == 2) apply(mat[i,],2,mean) else mat[i,]
                               })
        single.quant <- do.call(rbind, single.quant)
        quant.result <- rbind(iPQF.result, single.quant)
    } else quant.result <- iPQF.result

    quant.result <- quant.result[match(uni.ids, rownames(quant.result)), ]
    ## Method combination: iPQF with MedianPolish
    if (method.combine) {
        MP.quant <- lapply(1:length(pos.all),
                          function(i) {
                              medpol <- medpolish(mat[pos.all[[i]],],
                                                  trace.iter = FALSE)
                              if (length(pos.all[[i]]) == 1)
                                  result <- medpol$overall + medpol$row
                              else
                                  result <- medpol$overall + medpol$col
                              return(result)
                          } )
        MP.quant <- do.call(rbind, MP.quant)
        quant.result <- t(sapply(1:length(pos.all),
                                 function(k) apply(rbind(MP.quant[k,], quant.result[k,]), 2, mean)))
        rownames(quant.result) <- names(pos.all)
    }
    return(quant.result)
}
Any scripts or data that you put into this service are public.
MSnbase documentation built on Jan. 23, 2021, 2 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MSnbase
Base Functions and Classes for Mass Spectrometry and Proteomics

R/iPQF.R
In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics

Defines functions iPQF uni.measured.dist redundant.dist uniques.list ratio.mat iPQF.method

Documented in iPQF

Try the MSnbase package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MSnbase Base Functions and Classes for Mass Spectrometry and Proteomics

R/iPQF.R In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics

Defines functions iPQF uni.measured.dist redundant.dist uniques.list ratio.mat iPQF.method

Documented in iPQF

Try the MSnbase package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MSnbase
Base Functions and Classes for Mass Spectrometry and Proteomics

R/iPQF.R
In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics