#' Rolling-Up Features Abundance Measurements
#'
#' A primary purpose of this function is to roll-up peptide
#' measurements to protein level. Ultimately it can be used
#' for rolling-up or combining any features according to a grouping
#' factor.
#'
#' @param msnset msnset (or most likely eset subclass) object
#' @param rollBy character. A column name in pData(msnset)
#' @param rollFun function for roll-up. "-" for log-transformed data,
#' "/" for normal scale
#' @param verbose logical. Controls real-time output.
#'
#' @return MSnSet object
#' @importFrom Biobase exprs fData
#' @importFrom outliers grubbs.test outlier
#' @export rrollup
#'
#' @examples
#' data(srm_msnset)
#' dim(msnset)
#' msnset2 <- rrollup(msnset, rollBy="Gene", rollFun="-", verbose=TRUE)
#' dim(msnset2)
rrollup <- function(msnset, rollBy, rollFun, verbose=TRUE){
summarisedFeatures <- list()
unique_rollBy <- unique(fData(msnset)[[rollBy]])
for (i in 1:length(unique_rollBy)) {
# Subset msnset to each rollBy group
msnset_sub <- msnset[fData(msnset)[[rollBy]] == unique_rollBy[i], ,
drop = FALSE]
summarisedFeatures[[i]] <- rrollup_a_feature_set(msnset_sub, rollBy,
rollFun, verbose)
}
exprs.new <- do.call(rbind, summarisedFeatures)
rownames(exprs.new) <- unique_rollBy
if(nrow(unique(fData(msnset))) == nrow(exprs.new)){
fData.new <- unique(fData(msnset))
}else{
fData.new <- data.frame(rollBy = rownames(exprs.new),
stringsAsFactors = FALSE)
colnames(fData.new) <- rollBy
rownames(fData.new) <- fData.new[, rollBy]
}
msnset.new <- MSnSet(exprs = as.matrix(exprs.new),
fData = fData.new,
pData = pData(msnset))
return(msnset.new)
}
rrollup_a_feature_set <- function(mat, rollBy, rollFun, verbose){
# Get the group name (protein name)
rollBy_name <- unique(fData(mat)[[rollBy]])
# We don't want a named vector if nrow(exprs(mat)) == 1
# We need a data frame
mat <- exprs(mat) %>% as.data.frame()
if (nrow(mat) > 1) {
# Calculate reference index
refIdx <- which.max(rowSums(!is.na(mat)))
# Count number of times each row overlaps with reference
overlap_count <-
apply(mat, 1,
function(x)
sum(!is.na(as.numeric(x) == as.numeric(mat[refIdx, ]))))
overlap_count <- as.numeric(overlap_count)
# Subset to rows that overlap more than once with reference
mat <- mat[overlap_count != 1, , drop = F]
# If rows were removed and verbose = TRUE, output a message
# that includes the group name.
if (any(overlap_count == 1) & verbose) {
message(paste0("Some rows associated with group ",
rollBy_name, " have been removed."))
}
# Recalculate index using subset mat
refIdx <- which.max(rowSums(!is.na(mat)))
mat.ratios <- sweep(mat, 2, as.numeric(mat[refIdx,]), FUN = rollFun)
scale.factors <- apply(mat.ratios, 1, median, na.rm = TRUE)
mat <- sweep(mat, 1, scale.factors, FUN = rollFun)
#
# for each sample, check if any of the feature (peptide)
# abundance measurements
# representing the set (protein) are not in line with other.
maxVals <- apply(mat, 1, max, na.rm = TRUE)
# If all values in a row are NA, the max is -Inf
# Change infinite values to NA
maxVals[is.infinite(maxVals)] <- NA
if(nrow(mat) > 2){
while(grubbs.test(maxVals)$p.value < 0.05 &
sum(!is.na(maxVals)) > 2){
i <- which(maxVals == outlier(maxVals))
maxVals[i] <- NA
}
}
# protein abundance estimates
mat <- mat[!is.na(maxVals), , drop = FALSE]
protProfile <- apply(mat, 2, median, na.rm=TRUE)
} else {
# The output should be a named vector,
# not a data frame with 1 row.
protProfile <- mat[1, ]
}
return(protProfile)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.