R/normSpectra.R
In ChemoSpec: Exploratory Chemometrics for Spectroscopy

Documented in normSpectra

#'
#' Normalize a Spectra Object
#'
#' This function carries out normalization of the spectra in a
#' \code{\link{Spectra}} object.  There are currently four options:
#' \itemize{
#'   \item \code{"PQN"} carries out "Probabalistic Quotient Normalization" as described
#'     in the reference.  This is probably the best option for many data sets.  However, please
#'     be careful if your sample has protein in it, PQN is potentially biased.
#'     See the references.
#'   \item \code{"TotInt"} normalizes by total intensity.  In this
#'     case, the y-data of a \code{\link{Spectra}} object is normalized by dividing
#'     each y-value by the sum of the y-values in a given spectrum.  Thus each
#'     spectrum sums to 1.  This method assumes that the total concentration of
#'     all substances giving peaks does not vary across samples which may not be true.
#'   \item \code{"Range"} allows one to do something similar to \code{"TotInt"} but rather than using the
#'     sum of the entire spectrum as the denominator, only the sum of the given
#'     range is used.  This would be appropriate if there was an internal standard
#'     in the spectrum which was free of interferance, and one wanted to normalize
#'     relative to it.
#'   \item \code{"zero2one"} scales each spectrum separately to a `[0...1]` scale.
#'     This is sometimes useful for visual comparison of chromatograms but is
#'     inappropriate for spectral data sets.
#' }
#'
#' @param spectra `r .writeDoc_Spectra1()`
#'
#' @param method One of \code{c("PQN", "TotInt", "Range", "zero2one")} giving
#' the method for normalization.
#'
#' @param RangeExpress A vector of
#' logicals (must be of \code{length(Spectra$freq)}).  This vector should be \code{TRUE} for
#' the frequency range you want to serve as the basis for norming, and \code{FALSE} otherwise.
#' The entire spectrum will be divided by the sum of the \code{TRUE} range.  See the examples.
#'
#' @return `r .writeDoc_Spectra1()`
#'
#' @author `r .writeDoc_Authors("BH")`
#'
#' @references
#' \itemize{
#'   \item {Probabalistic Quotient Normalization is reported in F. Dieterle
#'         et al. Analytical Chemistry vol. 78 pages 4281-4290 (2006).}
#'   \item {The exact same mathematics are called "median fold change normalization"
#'         by Nicholson's group, reported in K. A. Veselkov et. al. Analytical
#'         Chemistry vol. 83 pages 5864-5872 (2011).}
#'   \item {Corriea et al. "1H NMR Signals from Urine Excreted Protein are
#'         a Source of Bias in Probablistic Quotient Normalization" Analytical Chemistry
#'         vol. 94 pages 6919-6923 (2022).}
#' }
#'
#' @seealso Additional documentation at \url{https://bryanhanson.github.io/ChemoSpec/}
#'
#' @keywords utilities manip
#'
#' @examples
#' # This example assumes the graphics output is set to ggplot2 (see ?GraphicsOptions).
#' library("ggplot2")
#' data(SrE.IR)
#'
#' # Reference spectrum before normalization
#' p1 <- plotSpectra(SrE.IR) + ggtitle("Original Spectrum")
#' p1
#'
#' # Default PQN normalization
#' res1 <- normSpectra(SrE.IR)
#' p2 <- plotSpectra(res1) + ggtitle("PQN Normalization")
#' p2
#'
#' # Norm over carbonyl region
#' RE <- SrE.IR$freq > 1650 & SrE.IR$freq < 1800
#' res2 <- normSpectra(SrE.IR, method = "Range", RangeExpress = RE)
#' p3 <- plotSpectra(res2) + ggtitle("Normalized to Carbonyl Peaks")
#' p3
#'
#' # Check numerically
#' rowSums(res2$data[, RE]) # compare to rowSums(SrE.IR$data[,RE])
#' 
#' @export normSpectra
#' @importFrom stats median
#'
normSpectra <- function(spectra, method = "PQN", RangeExpress = NULL) {

  # Function to Normalize the data in a Spectra object
  # Part of the ChemoSpec package
  # Bryan Hanson, DePauw University, Nov 2009

  .chkArgs(mode = 11L)
  chkSpectra(spectra)

  # normalize using the probablistic quotient normalization (PQN)

  if (method == "PQN") {

    # Do a standard TotInt normalization
    S <- normSpectra(spectra, method = "TotInt")$data
    if (any(S < 0)) S <- S - min(S)

    # Compute the median spectrum for reference
    M <- apply(S, 2, median)

    # Divide each normed spectrum by the reference column medians (the ref spectrum)
    F <- S
    for (i in 1:nrow(F)) F[i, ] <- F[i, ] / M

    # Get the row medians (per spectrum median) of the ratioed spectra
    # These are the apparent 'fold' dilution factors
    # for each spectrum/sample
    F <- apply(F, 1, median)

    # Divide each row of the original data by it's median
    for (i in 1:nrow(S)) S[i, ] <- S[i, ] / F[i]

    spectra$data <- S
  }

  # normalize a row by the sum of its entries:

  if (method == "TotInt") {
    for (n in 1:length(spectra$names)) {
      S <- sum(as.numeric(spectra$data[n, ]))
      spectra$data[n, ] <- spectra$data[n, ] / S
    }
  }

  # normalize by a range of specified values:

  if (method == "Range") {
    if (is.null(RangeExpress)) stop("No range expression given")
    rfi <- which(RangeExpress)
    for (n in 1:length(spectra$names)) {
      S <- sum(as.numeric(spectra$data[n, rfi]))
      spectra$data[n, ] <- spectra$data[n, ] / S
    }
  }

  # normalize each spectrum to a [0...1] range:

  if (method == "zero2one") {
    for (i in 1:length(spectra$names)) {
      rMin <- min(spectra$data[i, ])
      spectra$data[i, ] <- spectra$data[i, ] - rMin
      rMax <- max(spectra$data[i, ])
      spectra$data[i, ] <- spectra$data[i, ] / rMax
    }
  }

  chkSpectra(spectra)
  spectra
}