# WITHIN ARRAY NORMALIZATION
MA.RG <- function(object, bc.method="subtract", offset=0) {
# Convert RGList to MAList
# Gordon Smyth
# 2 March 2003. Last revised 9 Dec 2004.
if(is.null(object$R) || is.null(object$G)) stop("Object doesn't contain R and G components")
object <- backgroundCorrect(object, method=bc.method, offset=offset)
R <- object$R
G <- object$G
# Log
R[R <= 0] <- NA
G[G <= 0] <- NA
R <- log(R,2)
G <- log(G,2)
# Minus and Add
object$R <- object$G <- object$Rb <- object$Gb <- object$other <- NULL
object$M <- as.matrix(R-G)
object$A <- as.matrix((R+G)/2)
new("MAList",unclass(object))
}
RG.MA <- function(object) {
# Convert MAList to RGList
# Gordon Smyth
# 5 September 2003. Last modified 9 Dec 2004.
object$R <- 2^( object$A+object$M/2 )
object$G <- 2^( object$A-object$M/2 )
object$M <- NULL
object$A <- NULL
new("RGList",unclass(object))
}
normalizeWithinArrays <- function(object,layout=object$printer,method="printtiploess",weights=object$weights,span=0.3,iterations=4,controlspots=NULL,df=5,robust="M",bc.method="subtract",offset=0)
# Within array normalization
# Gordon Smyth
# 2 March 2003. Last revised 16 March 2013.
{
# Check input arguments
# and get non-intensity dependent methods out of the way
if(!is(object,"MAList")) object <- MA.RG(object,bc.method=bc.method,offset=offset)
choices <- c("none","median","loess","printtiploess","composite","control","robustspline")
method <- match.arg(method,choices)
if(method=="none") return(object)
if(is.vector(object$M)) object$M <- as.matrix(object$M)
nprobes <- nrow(object$M)
narrays <- ncol(object$M)
if(!is.null(weights)) weights <- asMatrixWeights(weights,dim=c(nprobes,narrays))
if(method=="median") {
if(is.null(weights))
for (j in 1:narrays) object$M[,j] <- object$M[,j] - median(object$M[,j],na.rm=TRUE)
else
for (j in 1:narrays) object$M[,j] <- object$M[,j] - weighted.median(object$M[,j],weights[,j],na.rm=TRUE)
return(object)
}
# All remaining methods use regression of M-values on A-values
if(is.vector(object$A)) object$A <- as.matrix(object$A)
if(nrow(object$A) != nprobes) stop("row dimension of A doesn't match M")
if(ncol(object$A) != narrays) stop("col dimension of A doesn't match M")
switch(method,
loess = {
for (j in 1:narrays) {
y <- object$M[,j]
x <- object$A[,j]
w <- weights[,j]
object$M[,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals
}
},
printtiploess = {
if(is.null(layout)) stop("Layout argument not specified")
ngr <- layout$ngrid.r
ngc <- layout$ngrid.c
nspots <- layout$nspot.r * layout$nspot.c
nprobes2 <- ngr*ngc*nspots
if(nprobes2 != nprobes) stop("printer layout information does not match M row dimension")
for (j in 1:narrays) {
spots <- 1:nspots
for (gridr in 1:ngr)
for (gridc in 1:ngc) {
y <- object$M[spots,j]
x <- object$A[spots,j]
w <- weights[spots,j]
object$M[spots,j] <- loessFit(y,x,w,span=span,iterations=iterations)$residuals
spots <- spots + nspots
}
}
},
composite = {
if(is.null(layout)) stop("Layout argument not specified")
if(is.null(controlspots)) stop("controlspots argument not specified")
ntips <- layout$ngrid.r * layout$ngrid.c
nspots <- layout$nspot.r * layout$nspot.c
for (j in 1:narrays) {
y <- object$M[,j]
x <- object$A[,j]
w <- weights[,j]
f <- is.finite(y) & is.finite(x)
if(!is.null(w)) f <- f & is.finite(w)
y[!f] <- NA
fit <- loess(y~x,weights=w,span=span,subset=controlspots,na.action=na.exclude,degree=0,surface="direct",family="symmetric",trace.hat="approximate",iterations=iterations)
alpha <- global <- y
global[f] <- predict(fit,newdata=x[f])
alpha[f] <- (rank(x[f])-1) / sum(f)
spots <- 1:nspots
for (tip in 1:ntips) {
y <- object$M[spots,j]
x <- object$A[spots,j]
w <- weights[spots,j]
local <- loessFit(y,x,w,span=span,iterations=iterations)$fitted
object$M[spots,j] <- object$M[spots,j] - alpha[spots]*global[spots]-(1-alpha[spots])*local
spots <- spots + nspots
}
}
},
control = {
# if(is.null(layout)) stop("Layout argument not specified")
if(is.null(controlspots)) stop("controlspots argument not specified")
# ntips <- layout$ngrid.r * layout$ngrid.c
# nspots <- layout$nspot.r * layout$nspot.c
for (j in 1:narrays) {
y <- object$M[,j]
x <- object$A[,j]
w <- weights[,j]
f <- is.finite(y) & is.finite(x)
if(!is.null(w)) f <- f & is.finite(w)
y[!f] <- NA
fit <- loess(y~x,weights=w,span=span,subset=controlspots,na.action=na.exclude,degree=1,surface="direct",family="symmetric",trace.hat="approximate",iterations=iterations)
y[f] <- y[f]-predict(fit,newdata=x[f])
object$M[,j] <- y
}
},
robustspline = {
# if(is.null(layout)) stop("Layout argument not specified")
for (j in 1:narrays)
object$M[,j] <- normalizeRobustSpline(object$M[,j],object$A[,j],layout,df=df,method=robust)
}
)
object
}
normalizeRobustSpline <- function(M,A,layout=NULL,df=5,method="M") {
# Robust spline normalization
# Gordon Smyth
# 27 April 2003. Last revised 14 January 2015.
if(!requireNamespace("MASS",quietly=TRUE)) stop("MASS package required but is not installed (or can't be loaded)")
if(!requireNamespace("splines",quietly=TRUE)) stop("splines package required but is not installed (or can't be loaded)")
if(is.null(layout)) {
ngrids <- 1
nspots <- length(M)
} else {
ngrids <- round(layout$ngrid.r * layout$ngrid.c)
nspots <- round(layout$nspot.r * layout$nspot.c)
if(ngrids < 1) stop("layout incorrectly specified")
if(nspots < df+1) stop("too few spots in each print-tip block")
}
# Global splines
O <- is.finite(M) & is.finite(A)
X <- matrix(NA,ngrids*nspots,df)
X[O,] <- splines::ns(A[O],df=df,intercept=TRUE)
x <- X[O,,drop=FALSE]
y <- M[O]
w <- rep(1,length(y))
s0 <- summary(MASS::rlm(x,y,weights=w,method=method),method="XtWX",correlation=FALSE)
beta0 <- s0$coefficients[,1]
covbeta0 <- s0$cov * s0$stddev^2
# Case with only one tip-group
if(ngrids <= 1) {
M[O] <- s0$residuals
M[!O] <- NA
return(M)
}
# Tip-wise splines
beta <- array(1,c(ngrids,1)) %*% array(beta0,c(1,df))
covbeta <- array(0,c(ngrids,df,df))
spots <- 1:nspots
for (i in 1:ngrids) {
o <- O[spots]
y <- M[spots][o]
if(length(y)) {
x <- X[spots,][o,,drop=FALSE]
r <- qr(x)$rank
if(r<df) x <- x[,1:r,drop=FALSE]
w <- rep(1,length(y))
s <- summary(MASS::rlm(x,y,weights=w,method=method),method="XtWX",correlation=FALSE)
beta[i,1:r] <- s$coefficients[,1]
covbeta[i,1:r,1:r] <- s$cov * s$stddev^2
}
spots <- spots + nspots
}
# Empirical Bayes estimates
res.cov <- cov(beta) - apply(covbeta,c(2,3),mean)
a <- max(0, sum(diag(res.cov)) / sum(diag(covbeta0)) )
Sigma0 <- covbeta0 * a
# Sigma0 <- covbeta0 * max(0,mean(eigen(solve(covbeta0,res.cov))$values))
# Shrunk splines
if(a==0) {
M[O] <- s0$residuals
M[!O] <- NA
} else {
spots <- 1:nspots
for (i in 1:ngrids) {
beta[i,] <- beta0 + Sigma0 %*% solve(Sigma0+covbeta[i,,],beta[i,]-beta0)
o <- O[spots]
x <- X[spots,][o,]
M[spots][o] <- M[spots][o] - x %*% beta[i,]
M[spots][!o] <- NA
spots <- spots + nspots
}
}
M
}
# PRINTORDER
normalizeForPrintorder <- function(object,layout,start="topleft",method="loess",separate.channels=FALSE,span=0.1,plate.size=32) {
# Pre-normalize the foreground intensities for print order
# Gordon Smyth
# 11 Mar 2002. Last revised 18 June 2003.
if(is.null(object$R) || is.null(object$G)) stop("List must contain components R and G")
start <- match.arg(start,c("topleft","topright"))
method <- match.arg(method,c("loess","plate"))
po <- printorder(layout,start=start)$printorder
nslides <- NCOL(object$R)
for (i in 1:nslides) {
RG <- normalizeForPrintorder.rg(R=object$R[,i],G=object$G[,i],printorder=po,method=method,separate.channels=separate.channels,span=span,plate.size=plate.size)
object$R[,i] <- RG$R
object$G[,i] <- RG$G
}
object
}
normalizeForPrintorder.rg <- function(R,G,printorder,method="loess",separate.channels=FALSE,span=0.1,plate.size=32,plot=FALSE) {
# Pre-normalize the foreground intensities for print order, given R and G for a single array.
# Gordon Smyth
# 8 Mar 2002. Last revised 3 January 2007.
if(plot) ord <- order(printorder)
Rf <- log(R,2)
Gf <- log(G,2)
Rf[is.infinite(Rf)] <- NA
Gf[is.infinite(Gf)] <- NA
if(!separate.channels) Af <- (Rf+Gf)/2
method <- match.arg(method,c("loess","plate"))
if(method=="plate") {
# Correct for plate pack (usually four 384-well plates)
plate <- 1 + (printorder-0.5) %/% plate.size
if(!requireNamespace("MASS",quietly=TRUE)) stop("MASS package required but is not installed (or can't be loaded)")
hubermu <- function(...) MASS::huber(...)$mu
if(separate.channels) {
plate.mR <- tapply(Rf,plate,hubermu)
plate.mG <- tapply(Gf,plate,hubermu)
mR <- mG <- Rf
for (i in 1:max(plate)) {
mR[plate==i] <- plate.mR[i]
mG[plate==i] <- plate.mG[i]
}
if(plot) {
plot(printorder,Rf,xlab="Print Order",ylab="Log Intensity",type="n")
points(printorder,Rf,pch=".",col="red")
points(printorder,Gf,pch=".",col="green")
lines(printorder[ord],mR[ord],col="red")
lines(printorder[ord],mG[ord],col="green")
return(invisible())
}
mR <- mR - mean(mR,na.rm=TRUE)
mG <- mG - mean(mG,na.rm=TRUE)
} else {
plate.m <- tapply(Af,plate,hubermu)
m <- Af
for (i in 1:max(plate)) m[plate==i] <- plate.m[i]
if(plot) {
plot(printorder,Af,xlab="Print Order",ylab="Log Intensity",pch=".")
lines(printorder[ord],m[ord])
}
mR <- mG <- m - mean(m,na.rm=TRUE)
}
} else {
# Smooth correction for time order
if(separate.channels) {
mR <- fitted(loess(Rf~printorder,span=span,degree=0,family="symmetric",trace.hat="approximate",iterations=5,surface="direct",na.action=na.exclude))
mG <- fitted(loess(Gf~printorder,span=span,degree=0,family="symmetric",trace.hat="approximate",iterations=5,surface="direct",na.action=na.exclude))
if(plot) {
plot(printorder,Rf,xlab="Print Order",ylab="Log Intensity",type="n")
points(printorder,Rf,pch=".",col="red")
points(printorder,Gf,pch=".",col="green")
lines(printorder[ord],mR[ord],col="red")
lines(printorder[ord],mG[ord],col="green")
return(invisible())
}
mR <- mR - mean(mR,na.rm=TRUE)
mG <- mG - mean(mG,na.rm=TRUE)
} else {
m <- fitted(loess(Af~printorder,span=span,degree=0,family="symmetric",trace.hat="approximate",iterations=5,surface="direct",na.action=na.exclude))
if(plot) {
plot(printorder,Af,xlab="Print Order",ylab="Log Intensity",pch=".")
lines(printorder[ord],m[ord])
}
mR <- mG <- m - mean(m,na.rm=TRUE)
}
}
list(R=2^(Rf-mR),G=2^(Gf-mG),R.trend=mR,G.trend=mG)
}
plotPrintorder <- function(object,layout,start="topleft",slide=1,method="loess",separate.channels=FALSE,span=0.1,plate.size=32) {
# Pre-normalize the foreground intensities for print order.
# Gordon Smyth
# 9 Apr 2002. Last revised 18 June 2003.
if(is.null(object$R) || is.null(object$G)) stop("List must contain components R and G")
G <- object$G[,slide]
R <- object$R[,slide]
if(length(R) != length(G)) stop("R and G must have same length")
start <- match.arg(start,c("topleft","topright"))
po <- printorder(layout,start=start)$printorder
invisible(normalizeForPrintorder.rg(R=R,G=G,printorder=po,method=method,separate.channels=separate.channels,span=span,plate.size=plate.size,plot=TRUE))
}
# BETWEEN ARRAY NORMALIZATION
normalizeBetweenArrays <- function(object, method=NULL, targets=NULL, cyclic.method="fast", ...) {
# Normalize between arrays
# Gordon Smyth
# 12 Apri 2003. Last revised 13 June 2016.
if(is.data.frame(object)) {
object <- as.matrix(object)
if(mode(object) != "numeric") stop("'object' is a data.frame and not all columns are numeric")
}
# Default method
if(is.null(method)) {
if(is(object,"matrix")) {
method="quantile"
} else if(is(object,"EListRaw")) {
method="quantile"
} else {
method="Aquantile"
}
}
choices <- c("none","scale","quantile","Aquantile","Gquantile","Rquantile","Tquantile","vsn","cyclicloess")
method <- match.arg(method,choices)
if(method=="vsn") stop("vsn method no longer supported. Please use normalizeVSN instead.")
# Methods for matrices
if(is(object,"matrix")) {
if(!(method %in% c("none","scale","quantile","cyclicloess"))) stop("method not applicable to single-channel data")
return(switch(method,
none = object,
scale = normalizeMedianValues(object),
quantile = normalizeQuantiles(object, ...),
cyclicloess = normalizeCyclicLoess(object,method=cyclic.method, ...)
))
}
# Treat EListRaw objects as matrices
if(is(object,"EListRaw")) {
if(method=="cyclicloess")
object$E <- Recall(log2(object$E),method=method,...)
else
object$E <- log2(Recall(object$E,method=method,...))
object <- new("EList",unclass(object))
return(object)
}
# From here assume two-color data
if(is(object,"RGList")) object <- MA.RG(object)
if(is.null(object$M) || is.null(object$A)) stop("object doesn't appear to be RGList or MAList object")
switch(method,
scale = {
object$M <- normalizeMedianAbsValues(object$M)
object$A <- normalizeMedianAbsValues(object$A)
},
quantile = {
narrays <- NCOL(object$M)
Z <- normalizeQuantiles(cbind(object$A-object$M/2,object$A+object$M/2),...)
G <- Z[,1:narrays]
R <- Z[,narrays+(1:narrays)]
object$M <- R-G
object$A <- (R+G)/2
},
Aquantile = {
object$A <- normalizeQuantiles(object$A,...)
},
Gquantile = {
G <- object$A-object$M/2
E <- normalizeQuantiles(G,...) - G
object$A <- object$A + E
},
Rquantile = {
R <- object$A+object$M/2
E <- normalizeQuantiles(R,...) - R
object$A <- object$A + E
},
Tquantile = {
narrays <- NCOL(object$M)
if(NCOL(targets)>2) targets <- targets[,c("Cy3","Cy5")]
targets <- as.vector(targets)
Z <- cbind(object$A-object$M/2,object$A+object$M/2)
for (u in unique(targets)) {
j <- targets==u
Z[,j] <- normalizeQuantiles(Z[,j],...)
}
G <- Z[,1:narrays]
R <- Z[,narrays+(1:narrays)]
object$M <- R-G
object$A <- (R+G)/2
})
object
}
normalizeVSN <- function(x,...)
{
if(!requireNamespace("Biobase",quietly=TRUE)) stop("Biobase package required but is not installed (or can't be loaded)")
if(!requireNamespace("vsn",quietly=TRUE)) stop("vsn package required but is not installed (or can't be loaded)")
UseMethod("normalizeVSN")
}
normalizeVSN.RGList <- function(x,...)
# vsn background correction and normalization for RGList objects
# Gordon Smyth
# 9 Sep 2010.
{
x <- backgroundCorrect(x,method="subtract")
y <- cbind(x$G,x$R)
x$G <- x$R <- NULL
y <- Biobase::exprs(vsn::vsnMatrix(x=y,...))
n2 <- ncol(y)/2
G <- y[,1:n2]
R <- y[,n2+(1:n2)]
x$M <- R-G
x$A <- (R+G)/2
new("MAList",unclass(x))
}
normalizeVSN.EListRaw <- function(x,...)
# vsn background correction and normalization for EListRaw objects
# Gordon Smyth
# 9 Sep 2010.
{
x <- backgroundCorrect(x,method="subtract")
x$E <- Biobase::exprs(vsn::vsnMatrix(x=x$E,...))
new("EList",unclass(x))
}
normalizeVSN.default <- function(x,...)
# vsn background correction and normalization for matrices
# Gordon Smyth
# 9 Sep 2010.
{
Biobase::exprs(vsn::vsnMatrix(x=as.matrix(x),...))
}
normalizeQuantiles <- function(A, ties=TRUE) {
# Normalize columns of a matrix to have the same quantiles, allowing for missing values.
# Gordon Smyth
# 25 June 2002. Last revised 16 May 2019.
n <- dim(A)
if(is.null(n)) return(A)
if(n[2]==1) return(A)
O <- S <- array(,n)
nobs <- rep(n[1],n[2])
i <- (0:(n[1]-1))/(n[1]-1)
for (j in 1:n[2]) {
Si <- sort(A[,j], method="quick", index.return=TRUE)
nobsj <- length(Si$x)
if(nobsj < n[1]) {
nobs[j] <- nobsj
isna <- is.na(A[,j])
S[,j] <- approx((0:(nobsj-1))/(nobsj-1), Si$x, i, ties=list("ordered",mean))$y
O[!isna,j] <- ((1:n[1])[!isna])[Si$ix]
} else {
S[,j] <- Si$x
O[,j] <- Si$ix
}
}
m <- rowMeans(S)
for (j in 1:n[2]) {
if(ties) r <- rank(A[,j])
if(nobs[j] < n[1]) {
isna <- is.na(A[,j])
if(ties)
A[!isna,j] <- approx(i, m, (r[!isna]-1)/(nobs[j]-1), ties=list("ordered",mean))$y
else
A[O[!isna,j],j] <- approx(i, m, (0:(nobs[j]-1))/(nobs[j]-1), ties=list("ordered",mean))$y
} else {
if(ties)
A[,j] <- approx(i, m, (r-1)/(n[1]-1), ties=list("ordered",mean))$y
else
A[O[,j],j] <- m
}
}
A
}
normalizeMedianAbsValues <- function(x)
# Normalize columns of a matrix to have the same median absolute value
# Gordon Smyth
# 12 April 2003. Last modified 19 Oct 2005.
{
narrays <- NCOL(x)
if(narrays==1) return(x)
cmed <- log(apply(abs(x), 2, median, na.rm=TRUE))
cmed <- exp(cmed - mean(cmed))
t(t(x)/cmed)
}
normalizeMedianValues <- function(x)
# Normalize columns of a matrix to have the same median value
# Gordon Smyth
# 24 Jan 2011. Last modified 24 Jan 2011.
{
narrays <- NCOL(x)
if(narrays==1) return(x)
cmed <- log(apply(x, 2, median, na.rm=TRUE))
cmed <- exp(cmed - mean(cmed))
t(t(x)/cmed)
}
normalizeCyclicLoess <- function(x, weights = NULL, span=0.7, iterations = 3, method="fast")
# Cyclic loess normalization of columns of matrix
# incorporating probes weights.
# Yunshun (Andy) Chen and Gordon Smyth
# 14 April 2010. Last modified 24 Feb 2012.
{
x <- as.matrix(x)
method <- match.arg(method, c("fast","affy","pairs"))
n <- ncol(x)
if(method=="pairs") {
for (k in 1:iterations)
for (i in 1:(n-1)) for (j in (i+1):n) {
m <- x[,j] - x[,i]
a <- .5*(x[,j] + x[,i])
f <- loessFit(m, a, weights=weights, span=span)$fitted
x[,i] <- x[,i] + f/2
x[,j] <- x[,j] - f/2
}
}
if(method=="fast") {
for (k in 1:iterations) {
a <- rowMeans(x,na.rm=TRUE)
for (i in 1:n){
m <- x[,i] - a
f <- loessFit(m, a, weights=weights, span=span)$fitted
x[,i] <- x[,i] - f
}
}
}
if(method=="affy") {
g <- nrow(x)
for (k in 1:iterations) {
adjustment <- matrix(0,g,n)
for (i in 1:(n-1)) for (j in (i+1):n) {
m <- x[,j] - x[,i]
a <- .5*(x[,j] + x[,i])
f <- loessFit(m, a, weights=weights, span=span)$fitted
adjustment[,j] <- adjustment[,j] + f
adjustment[,i] <- adjustment[,i] - f
}
x <- x-adjustment/n
}
}
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.