R/msi.dataset_funcs.R
In SPUTNIK: Spatially Automatic Denoising for Imaging Mass Spectrometry Toolkit

## .normIntensity
#' @importFrom stats median
#' @import edgeR
.normIntensity <- function(x, method = "median", peak.ind = NULL, zero.offset = 0) {
  accept.method <- c("median", "PQN", "TIC", "TMM", "upperQuartile")
  if (!any(method %in% accept.method)) {
    stop(".normIntensity: Valid methods are: ", paste0(accept.method, collapse = ", "), ".")
  }

  if (is.null(peak.ind)) {
    peak.ind <- c(1:ncol(x))
  }
  peak.ind <- sort(unique(peak.ind))

  if (zero.offset != 0) {
    x[is.na(x)] <- 0
    x <- x + zero.offset
  }
  
  x[x == 0] <- NA
  x <- switch(method,

    "TMM" = {
      # Using the default parameters from edgeR
      logratioTrim <- 0.3
      sumTrim <- 0.05
      doWeighting <- TRUE
      Acutoff <- -1e10

      x[is.na(x)] <- 0
      
      # Remove empty pixels
      empty.pixel <- rowSums(x) == 0
      if (sum(empty.pixel)) {
        warning(cat(sum(empty.pixel), " empty pixels found.\n"))
        # Save the original number of pixels for final reconstruction
        orig.num.pixels <- nrow(x)
        x <- x[!empty.pixel, ]
      }
      
      x <- t(x) # TMM requires samples along columns
      nsamples <- ncol(x)

      # Check lib.size
      # Force lib.size = NULL
      lib.size <- NULL
      if (is.null(lib.size)) {
        lib.size <- colSums(x)
      } else {
        if (anyNA(lib.size)) stop("NA lib.sizes not permitted")
        if (length(lib.size) != nsamples) {
          if (length(lib.size) > 1L) {
            warning("calcNormFactors: length(lib.size) doesn't match number of samples", call. = FALSE)
          }
          lib.size <- rep(lib.size, length = nsamples)
        }
      }

      # Remove all zero rows
      allzero <- .rowSums(x > 0, nrow(x), nsamples) == 0L
      if (any(allzero)) {
        x <- x[!allzero, , drop = FALSE]
      }

      # Degenerate cases
      if (nrow(x) == 0 || nsamples == 1) {
        stop("Too many empty samples. Cannot apply TMM.")
      }

      f75 <- .calcFactorQuantile(data = x, lib.size = lib.size, p = 0.75)
      # Force refColumns = NULL
      refColumn <- NULL
      if (is.null(refColumn)) {
        refColumn <- which.min(abs(f75 - mean(f75)))
      }
      if (length(refColumn) == 0L | refColumn < 1 | refColumn > nsamples) refColumn <- 1L
      f <- rep(NA, nsamples)
      for (i in 1:nsamples) {
        f[i] <- .calcFactorTMMwsp(
          obs = x[, i], ref = x[, refColumn], libsize.obs = lib.size[i],
          libsize.ref = lib.size[refColumn], logratioTrim = logratioTrim,
          sumTrim = sumTrim, doWeighting = doWeighting, Acutoff = Acutoff
        )
      }
      f <- f / exp(mean(log(f)))

      # Output
      names(f) <- colnames(x)

      x <- x / f
      x <- t(x) # Restore the original order (rows = samples)
      
      # If empty pixels, restore the original dimensions
      if (sum(empty.pixel) > 0) {
        X <- matrix(0, orig.num.pixels, ncol(x))
        X[!empty.pixel, ] <- x
        x <- X
        rm(X)
        gc()
      }
      
      x
    },

    "upperQuartile" = {
      for (i in 1:nrow(x))
      {
        quartile <- quantile(x[i, ], probs = 0.75, na.rm = TRUE)
        if (is.na(quartile)) {
          warning("Upper quartile is 0!")
          quartile <- 1
        }
        x[i, ] <- x[i, ] / quartile
      }
      x
    },

    "TIC" = {
      if (any(is.na(x)) && zero.offset == 0) {
        warning("Found peaks with zero intensity. Remember to add a numeric offset to the variance stabilizing transformation.")
      }
      cat('IMPORTANT!!! Use CLR transformation for proportional data calling varTransform(object, method = "clr")\n')
      for (i in 1:nrow(x))
      {
        tic.value <- sum(x[i, peak.ind], na.rm = TRUE)
        if (tic.value == 0) {
          warning(paste0("Pixel ", i, "has TIC = 0. The spectrum will not be scaled"))
        }
        x[i, ] <- x[i, ] / tic.value
      }
      x
    },

    "median" = {
      for (i in 1:nrow(x))
      {
        med.value <- median(x[i, peak.ind], na.rm = TRUE)
        if (is.na(med.value)) {
          warning(paste0("Pixel ", i, " has median intensiy equal to 0. The spectrum will not be scaled."))
          med.value <- 1
        }
        x[i, ] <- x[i, ] / med.value
      }
      x
    },

    "PQN" = {
      if (!all(peak.ind == c(1:ncol(x)))) {
        warning("PQN can be only applied on all peaks")
      }
      for (i in 1:nrow(x))
      {
        tic.value <- sum(x[i, ], na.rm = TRUE)
        if (tic.value == 0) {
          stop("Error: scaling factor is 0!")
        }
        x[i, ] <- x[i, ] / tic.value
      }
      x[x == 0] <- NA

      ## Try to use the faster method, if there is not enough RAM, then
      ## use the slower method

      ## Reference spectrum = non-zero median peaks
      ref.spectrum <- tryCatch(apply(x, 2, median, na.rm = T),
        error = function(e) {
          warning("Low memory. Using the slower method to calculate the reference spectrum.")
          z <- array(NA, ncol(x))
          for (j in 1:ncol(x))
          {
            z[j] <- median(x[, j], na.rm = T)
          }
          return(z)
        }
      )
      ## Quotients
      quotients <- tryCatch(x / rep(ref.spectrum, each = nrow(x)),
        error = function(e) {
          warning("Low memory. Using the slower method to calculate the quotients.")
          z <- matrix(NA, nrow(x), ncol(x))
          for (j in 1:nrow(x))
          {
            z[j, ] <- x[j, ] / ref.spectrum
          }
          return(z)
        }
      )
      quotients[quotients == 0] <- NA
      ## Scaling factors
      sc.factor <- tryCatch(apply(quotients, 1, median, na.rm = T),
        error = function(e) {
          warning("Low memory. Using the slower method to calculate the scaling factors.")
          z <- array(NA, nrow(quotients))
          for (j in 1:nrow(quotients))
          {
            z[j] <- median(quotients[j, ], na.rm = T)
          }
          return(z)
        }
      )
      rm(quotients)
      ## Normalized intensities
      x <- tryCatch({
        sc.factor.mat <- sapply(sc.factor, function(z) rep(z, ncol(x)))
        x / t(sc.factor.mat)
      },
      error = function(e) {
        warning("Low memory. Using the slower method to calculate the normalized intensities.")
        for (j in 1:nrow(x))
        {
          x[j, ] <- x[j, ] / sc.factor[j]
        }
        return(x)
      }
      )

      x[is.na(x)] <- 0

      x
    }
  )

  # Setting all missing values to 0
  x[is.na(x)] <- 0

  return(x)
}

## Reduce heteroscedasticity
.varTransf <- function(x, method = "log", zero.offset = 0, norm.method) {
  ## Check if NAs are present
  if (any(is.na(x))) {
    stop("NAs values found in the matrix.")
  }
  ## Check if negative values are present
  if (min(x) < 0) {
    stop("found negative values in the matrix.")
  }
  # Check the selected method
  accept.method <- c("log", "log2", "log10", "sqrt", "clr")
  if (!any(method %in% accept.method)) {
    stop("Valid methods are:", paste0(accept.method, collapse = ", "), ".")
  }
  
  x <- x + zero.offset
  
  ## If the smallest intensity is not zero, show a warning saying that the intensities
  ## will be still summed to 1
  if (min(x) == 0 && method %in% c("log", "log2", "log10", "clr")) {
    stop("Method ", method, " cannot be applied if zeros are present. Add an numeric offset.")
  }
  
  # CLR must be used for TIC normalization
  if (norm.method == "TIC" && method != "clr") {
    stop("'clr' must be used with TIC scaling normalized data!!!")
  }
  
  x <- switch(method,
    "log" = log(x),
    "log2" = log2(x),
    "log10" = log10(x),
    "sqrt" = sqrt(x),
    "clr" = log(x) - apply(log(x), 1, mean)
  )
  return(x)
}

##################################
## TMM normalization from edgeR ##
##################################

.calcFactorTMM <- function(obs, ref, libsize.obs = NULL, libsize.ref = NULL,
                           logratioTrim = .3, sumTrim = 0.05, doWeighting = TRUE,
                           Acutoff = -1e10)
                           # TMM between two libraries
# Mark Robinson
{
  obs <- as.numeric(obs)
  ref <- as.numeric(ref)

  if (is.null(libsize.obs)) nO <- sum(obs) else nO <- libsize.obs
  if (is.null(libsize.ref)) nR <- sum(ref) else nR <- libsize.ref

  logR <- log2((obs / nO) / (ref / nR)) # log ratio of expression, accounting for library size
  absE <- (log2(obs / nO) + log2(ref / nR)) / 2 # absolute expression
  v <- (nO - obs) / nO / obs + (nR - ref) / nR / ref # estimated asymptotic variance

  # 	remove infinite values, cutoff based on A
  fin <- is.finite(logR) & is.finite(absE) & (absE > Acutoff)

  logR <- logR[fin]
  absE <- absE[fin]
  v <- v[fin]

  if (max(abs(logR)) < 1e-6) {
    return(1)
  }

  # 	taken from the original mean() function
  n <- length(logR)
  loL <- floor(n * logratioTrim) + 1
  hiL <- n + 1 - loL
  loS <- floor(n * sumTrim) + 1
  hiS <- n + 1 - loS

  # 	keep <- (rank(logR) %in% loL:hiL) & (rank(absE) %in% loS:hiS)
  # 	a fix from leonardo ivan almonacid cardenas, since rank() can return
  # 	non-integer values when there are a lot of ties
  keep <- (rank(logR) >= loL & rank(logR) <= hiL) & (rank(absE) >= loS & rank(absE) <= hiS)

  if (doWeighting) {
    f <- sum(logR[keep] / v[keep], na.rm = TRUE) / sum(1 / v[keep], na.rm = TRUE)
  } else {
    f <- mean(logR[keep], na.rm = TRUE)
  }

  # 	Results will be missing if the two libraries share no features with positive counts
  # 	In this case, return unity
  if (is.na(f)) {
    f <- 0
  }
  return(2^f)
}

.calcFactorQuantile <- function(data, lib.size, p = 0.75)
# 	Generalized version of upper-quartile normalization
# 	Mark Robinson
# 	Created 16 Aug 2010
{
  # 	i <- apply(data<=0,1,all)
  # 	if(any(i)) data <- data[!i,,drop=FALSE]
  y <- t(t(data) / lib.size)
  f <- apply(y, 2, function(x) quantile(x, p = p))
  return(f)
  # 	f/exp(mean(log(f)))
}

.calcFactorTMMwsp <- function(obs, ref, libsize.obs = NULL, libsize.ref = NULL, logratioTrim = .3, sumTrim = 0.05, doWeighting = TRUE, Acutoff = -1e10)
                              # 	TMM with pairing of singleton positive counts between the obs and ref libraries
                              # 	Gordon Smyth
# 	Created 19 Sep 2018. Last modified 23 April 2019.
{
  obs <- as.numeric(obs)
  ref <- as.numeric(ref)

  # 	epsilon serves as floating-point zero
  eps <- 1e-14

  # 	Identify zero counts
  pos.obs <- (obs > eps)
  pos.ref <- (ref > eps)
  npos <- 2L * pos.obs + pos.ref

  # 	Remove double zeros and NAs
  i <- which(npos == 0L | is.na(npos))
  if (length(i)) {
    obs <- obs[-i]
    ref <- ref[-i]
    npos <- npos[-i]
  }

  # 	Check library sizes
  if (is.null(libsize.obs)) libsize.obs <- sum(obs)
  if (is.null(libsize.ref)) libsize.ref <- sum(ref)

  # 	Pair up as many singleton positives as possible
  # 	The unpaired singleton positives are discarded so that no zeros remain
  zero.obs <- (npos == 1L)
  zero.ref <- (npos == 2L)
  k <- (zero.obs | zero.ref)
  n.eligible.singles <- min(sum(zero.obs), sum(zero.ref))
  if (n.eligible.singles > 0L) {
    refk <- sort(ref[k], decreasing = TRUE)[1:n.eligible.singles]
    obsk <- sort(obs[k], decreasing = TRUE)[1:n.eligible.singles]
    obs <- c(obs[!k], obsk)
    ref <- c(ref[!k], refk)
  } else {
    obs <- obs[!k]
    ref <- ref[!k]
  }

  # 	Any left?
  n <- length(obs)
  if (n == 0L) {
    return(1)
  }

  # 	Compute M and A values
  obs.p <- obs / libsize.obs
  ref.p <- ref / libsize.ref
  M <- log2(obs.p / ref.p)
  A <- 0.5 * log2(obs.p * ref.p)

  # 	If M all zero, return 1
  if (max(abs(M)) < 1e-6) {
    return(1)
  }

  # 	M order, breaking ties by shrunk M
  obs.p.shrunk <- (obs + 0.5) / (libsize.obs + 0.5)
  ref.p.shrunk <- (ref + 0.5) / (libsize.ref + 0.5)
  M.shrunk <- log2(obs.p.shrunk / ref.p.shrunk)
  o.M <- order(M, M.shrunk)

  # 	A order
  o.A <- order(A)

  # 	Trim
  loM <- as.integer(n * logratioTrim) + 1L
  hiM <- n + 1L - loM
  keep.M <- rep.int(FALSE, n)
  keep.M[o.M[loM:hiM]] <- TRUE
  loA <- as.integer(n * sumTrim) + 1L
  hiA <- n + 1L - loA
  keep.A <- rep.int(FALSE, n)
  keep.A[o.A[loA:hiA]] <- TRUE
  keep <- keep.M & keep.A
  M <- M[keep]

  # 	Average the M values
  if (doWeighting) {
    obs.p <- obs.p[keep]
    ref.p <- ref.p[keep]
    v <- (1 - obs.p) / obs.p / libsize.obs + (1 - ref.p) / ref.p / libsize.ref
    w <- (1 + 1e-6) / (v + 1e-6)
    TMM <- sum(w * M) / sum(w)
  } else {
    TMM <- mean(M)
  }

  2^TMM
}

Any scripts or data that you put into this service are public.

SPUTNIK documentation built on May 29, 2024, 11:53 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SPUTNIK
Spatially Automatic Denoising for Imaging Mass Spectrometry Toolkit

R/msi.dataset_funcs.R
In SPUTNIK: Spatially Automatic Denoising for Imaging Mass Spectrometry Toolkit

Defines functions .calcFactorTMMwsp .calcFactorQuantile .calcFactorTMM .varTransf .normIntensity

Try the SPUTNIK package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SPUTNIK Spatially Automatic Denoising for Imaging Mass Spectrometry Toolkit

R/msi.dataset_funcs.R In SPUTNIK: Spatially Automatic Denoising for Imaging Mass Spectrometry Toolkit

Defines functions .calcFactorTMMwsp .calcFactorQuantile .calcFactorTMM .varTransf .normIntensity

Try the SPUTNIK package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SPUTNIK
Spatially Automatic Denoising for Imaging Mass Spectrometry Toolkit

R/msi.dataset_funcs.R
In SPUTNIK: Spatially Automatic Denoising for Imaging Mass Spectrometry Toolkit