# --------------------------------------------------------------------------------------------
# Date: January 28, 2021
# Author: Kristen Yeh, Sophie Castel
# Title: Normalization Method - Quantile
# --------------------------------------------------------------------------------------------
# Quantile normalization consists of two steps:
# 1. Create a mapping between ranks and values. For rank 1, find the n values,
# one per spectrum, that are the smallest value on the spectrum, and save their
# averages. Similarly for rank 2 and the second smallest values, and on up to
# the n largest values, one per spectrum.
# 2. For each spectrum, replace the actual values with these averages.
# Basically, index each intensity value in each spectrum then sort
# by intensity (value). Probably need 2 truncate. Average spectrum for the sorted spectra
# is calculated. The averages are then inserted into the sorted spectra.
# The spectra are then reverted to their unsorted order using the index.
norm_quantile <- function(dat, mass_dat, spectra_cols){
full_mz <- Intensity <- Spectrum <- NULL
# ---------------------
# LOGICAL CHECKS
# ---------------------
stopifnot(
is.character(mass_dat),
is.character(spectra_cols),
mass_dat %in% colnames(dat),
all(spectra_cols %in% colnames(dat))
)
if(length(spectra_cols) < 2){
stop("Only one spectrum input. Please enter two spectra.")
}
mz <- dat[[mass_dat]]
spectra <- lapply(spectra_cols, function(x){dat[x]})
i <- do.call(what = data.frame, args = c(spectra))
i <- data.frame(mz, i)
colnames(i) <- c("full_mz", spectra_cols)
i_melt <- reshape2::melt(i, id.vars = "full_mz")
colnames(i_melt) <- c("full_mz", "Spectrum", "Intensity")
i_melt <- dplyr::filter(i_melt, Intensity != 0)
i_melt <- i_melt %>% dplyr::group_by(Spectrum) %>% dplyr::mutate(id = dplyr::row_number())
ordr <- i_melt[order(i_melt$Intensity, decreasing = TRUE),]
r <- rep(NA, length(spectra_cols))
for(j in 1:length(spectra_cols)){
r[j] <- max(as.numeric(i_melt$id[which(i_melt$Spectrum == spectra_cols[j])]))
}
r_min <- min(r)
logic <-which(r == r_min)
trunc <- ordr[(which(ordr$id <= r[logic])),]
i_trunc <- data.frame(matrix(ncol = length(unique(trunc$Spectrum)), nrow = max(trunc$id)))
for(j in 1:length(unique(trunc$Spectrum))){
i_trunc[j] <- trunc$Intensity[which(trunc$Spectrum == spectra_cols[j])]
colnames(i_trunc)[j] <- spectra_cols[j]
}
i_trunc <- data.frame(i_trunc, avg = apply(i_trunc, 1, FUN = mean, na.rm = TRUE))
for(j in 1:length(unique(trunc$Spectrum))){
trunc$Intensity[which(trunc$Spectrum == colnames(i_trunc)[j])] <- i_trunc$avg
}
dat <- trunc[order(trunc[mass_dat], decreasing = FALSE), ] %>% spread(Spectrum, Intensity)
dat[is.na(dat)] <- 0
dat <- dat[-2]
return(as.data.frame(dat))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.