tests/normalizeFragmentLength-ex1.R

library("aroma.light")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Example 1: Single-enzyme fragment-length normalization of 6 arrays
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Number samples
I <- 9

# Number of loci
J <- 1000

# Fragment lengths
fl <- seq(from=100, to=1000, length.out=J)

# Simulate data points with unknown fragment lengths
hasUnknownFL <- seq(from=1, to=J, by=50)
fl[hasUnknownFL] <- NA_real_

# Simulate data
y <- matrix(0, nrow=J, ncol=I)
maxY <- 12
for (kk in 1:I) {
  k <- runif(n=1, min=3, max=5)
  mu <- function(fl) {
    mu <- rep(maxY, length(fl))
    ok <- !is.na(fl)
    mu[ok] <- mu[ok] - fl[ok]^{1/k}
    mu
  }
  eps <- rnorm(J, mean=0, sd=1)
  y[,kk] <- mu(fl) + eps
}

# Normalize data (to a zero baseline)
yN <- apply(y, MARGIN=2, FUN=function(y) {
  normalizeFragmentLength(y, fragmentLengths=fl, onMissing="median")
})

# The correction factors
rho <- y-yN
print(summary(rho))
# The correction for units with unknown fragment lengths
# equals the median correction factor of all other units
print(summary(rho[hasUnknownFL,]))

# Plot raw data
layout(matrix(1:9, ncol=3))
xlim <- c(0,max(fl, na.rm=TRUE))
ylim <- c(0,max(y, na.rm=TRUE))
xlab <- "Fragment length"
ylab <- expression(log2(theta))
for (kk in 1:I) {
  plot(fl, y[,kk], xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
  ok <- (is.finite(fl) & is.finite(y[,kk]))
  lines(lowess(fl[ok], y[ok,kk]), col="red", lwd=2)
}

# Plot normalized data
layout(matrix(1:9, ncol=3))
ylim <- c(-1,1)*max(y, na.rm=TRUE)/2
for (kk in 1:I) {
  plot(fl, yN[,kk], xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab)
  ok <- (is.finite(fl) & is.finite(y[,kk]))
  lines(lowess(fl[ok], yN[ok,kk]), col="blue", lwd=2)
}
HenrikBengtsson/aroma.light documentation built on July 3, 2023, 1:57 a.m.