Description Usage Arguments Value Multi-enzyme normalization Target functions Author(s) References Examples
Normalizes signals for PCR fragment-length effects. Some or all signals are used to estimated the normalization function. All signals are normalized.
1 2 3 |
y |
A |
fragmentLengths |
An |
targetFcns |
An optional |
subsetToFit |
The subset of data points used to fit the
normalization function.
If |
onMissing |
Specifies how data points for which there is no
fragment length is normalized.
If |
.isLogged |
A |
... |
Additional arguments passed to |
.returnFit |
A |
Returns a numeric
vector
of the normalized signals.
It is assumed that the fragment-length effects from multiple enzymes added (with equal weights) on the intensity scale. The fragment-length effects are fitted for each enzyme separately based on units that are exclusively for that enzyme. If there are no or very such units for an enzyme, the assumptions of the model are not met and the fit will fail with an error. Then, from the above single-enzyme fits the average effect across enzymes is the calculated for each unit that is on multiple enzymes.
It is possible to specify custom target function effects for each
enzyme via argument targetFcns
. This argument has to be a
list
containing one function
per enzyme and ordered in the same
order as the enzyme are in the columns of argument
fragmentLengths
.
For instance, if one wish to normalize the signals such that their
mean signal as a function of fragment length effect is contantly
equal to 2200 (or the intensity scale), the use
targetFcns=function(fl, ...) log2(2200)
which completely
ignores fragment-length argument 'fl' and always returns a
constant.
If two enzymes are used, then use
targetFcns=rep(list(function(fl, ...) log2(2200)), 2)
.
Note, if targetFcns
is NULL
, this corresponds to
targetFcns=rep(list(function(fl, ...) 0), ncol(fragmentLengths))
.
Alternatively, if one wants to only apply minimial corrections to the signals, then one can normalize toward target functions that correspond to the fragment-length effect of the average array.
Henrik Bengtsson
[1] H. Bengtsson, R. Irizarry, B. Carvalho, and T. Speed, Estimation and assessment of raw copy numbers at the single locus level, Bioinformatics, 2008.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Example 1: Single-enzyme fragment-length normalization of 6 arrays
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Number samples
I <- 9;
# Number of loci
J <- 1000;
# Fragment lengths
fl <- seq(from=100, to=1000, length.out=J);
# Simulate data points with unknown fragment lengths
hasUnknownFL <- seq(from=1, to=J, by=50);
fl[hasUnknownFL] <- NA;
# Simulate data
y <- matrix(0, nrow=J, ncol=I);
maxY <- 12;
for (kk in 1:I) {
k <- runif(n=1, min=3, max=5);
mu <- function(fl) {
mu <- rep(maxY, length(fl));
ok <- !is.na(fl);
mu[ok] <- mu[ok] - fl[ok]^{1/k};
mu;
}
eps <- rnorm(J, mean=0, sd=1);
y[,kk] <- mu(fl) + eps;
}
# Normalize data (to a zero baseline)
yN <- apply(y, MARGIN=2, FUN=function(y) {
normalizeFragmentLength(y, fragmentLengths=fl, onMissing="median");
})
# The correction factors
rho <- y-yN;
print(summary(rho));
# The correction for units with unknown fragment lengths
# equals the median correction factor of all other units
print(summary(rho[hasUnknownFL,]));
# Plot raw data
layout(matrix(1:9, ncol=3));
xlim <- c(0,max(fl, na.rm=TRUE));
ylim <- c(0,max(y, na.rm=TRUE));
xlab <- "Fragment length";
ylab <- expression(log2(theta));
for (kk in 1:I) {
plot(fl, y[,kk], xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab);
ok <- (is.finite(fl) & is.finite(y[,kk]));
lines(lowess(fl[ok], y[ok,kk]), col="red", lwd=2);
}
# Plot normalized data
layout(matrix(1:9, ncol=3));
ylim <- c(-1,1)*max(y, na.rm=TRUE)/2;
for (kk in 1:I) {
plot(fl, yN[,kk], xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab);
ok <- (is.finite(fl) & is.finite(y[,kk]));
lines(lowess(fl[ok], yN[ok,kk]), col="blue", lwd=2);
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Example 2: Two-enzyme fragment-length normalization of 6 arrays
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(0xbeef);
# Number samples
I <- 5;
# Number of loci
J <- 3000;
# Fragment lengths (two enzymes)
fl <- matrix(0, nrow=J, ncol=2);
fl[,1] <- seq(from=100, to=1000, length.out=J);
fl[,2] <- seq(from=1000, to=100, length.out=J);
# Let 1/2 of the units be on both enzymes
fl[seq(from=1, to=J, by=4),1] <- NA;
fl[seq(from=2, to=J, by=4),2] <- NA;
# Let some have unknown fragment lengths
hasUnknownFL <- seq(from=1, to=J, by=15);
fl[hasUnknownFL,] <- NA;
# Sty/Nsp mixing proportions:
rho <- rep(1, I);
rho[1] <- 1/3; # Less Sty in 1st sample
rho[3] <- 3/2; # More Sty in 3rd sample
# Simulate data
z <- array(0, dim=c(J,2,I));
maxLog2Theta <- 12;
for (ii in 1:I) {
# Common effect for both enzymes
mu <- function(fl) {
k <- runif(n=1, min=3, max=5);
mu <- rep(maxLog2Theta, length(fl));
ok <- is.finite(fl);
mu[ok] <- mu[ok] - fl[ok]^{1/k};
mu;
}
# Calculate the effect for each data point
for (ee in 1:2) {
z[,ee,ii] <- mu(fl[,ee]);
}
# Update the Sty/Nsp mixing proportions
ee <- 2;
z[,ee,ii] <- rho[ii]*z[,ee,ii];
# Add random errors
for (ee in 1:2) {
eps <- rnorm(J, mean=0, sd=1/sqrt(2));
z[,ee,ii] <- z[,ee,ii] + eps;
}
}
hasFl <- is.finite(fl);
unitSets <- list(
nsp = which( hasFl[,1] & !hasFl[,2]),
sty = which(!hasFl[,1] & hasFl[,2]),
both = which( hasFl[,1] & hasFl[,2]),
none = which(!hasFl[,1] & !hasFl[,2])
)
# The observed data is a mix of two enzymes
theta <- matrix(NA, nrow=J, ncol=I);
# Single-enzyme units
for (ee in 1:2) {
uu <- unitSets[[ee]];
theta[uu,] <- 2^z[uu,ee,];
}
# Both-enzyme units (sum on intensity scale)
uu <- unitSets$both;
theta[uu,] <- (2^z[uu,1,]+2^z[uu,2,])/2;
# Missing units (sample from the others)
uu <- unitSets$none;
theta[uu,] <- apply(theta, MARGIN=2, sample, size=length(uu))
# Calculate target array
thetaT <- rowMeans(theta, na.rm=TRUE);
targetFcns <- list();
for (ee in 1:2) {
uu <- unitSets[[ee]];
fit <- lowess(fl[uu,ee], log2(thetaT[uu]));
class(fit) <- "lowess";
targetFcns[[ee]] <- function(fl, ...) {
predict(fit, newdata=fl);
}
}
# Fit model only to a subset of the data
subsetToFit <- setdiff(1:J, seq(from=1, to=J, by=10))
# Normalize data (to a target baseline)
thetaN <- matrix(NA, nrow=J, ncol=I);
fits <- vector("list", I);
for (ii in 1:I) {
lthetaNi <- normalizeFragmentLength(log2(theta[,ii]), targetFcns=targetFcns,
fragmentLengths=fl, onMissing="median",
subsetToFit=subsetToFit, .returnFit=TRUE);
fits[[ii]] <- attr(lthetaNi, "modelFit");
thetaN[,ii] <- 2^lthetaNi;
}
# Plot raw data
xlim <- c(0, max(fl, na.rm=TRUE));
ylim <- c(0, max(log2(theta), na.rm=TRUE));
Mlim <- c(-1,1)*4;
xlab <- "Fragment length";
ylab <- expression(log2(theta));
Mlab <- expression(M==log[2](theta/theta[R]));
layout(matrix(1:(3*I), ncol=I, byrow=TRUE));
for (ii in 1:I) {
plot(NA, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, main="raw");
# Single-enzyme units
for (ee in 1:2) {
# The raw data
uu <- unitSets[[ee]];
points(fl[uu,ee], log2(theta[uu,ii]), col=ee+1);
}
# Both-enzyme units (use fragment-length for enzyme #1)
uu <- unitSets$both;
points(fl[uu,1], log2(theta[uu,ii]), col=3+1);
for (ee in 1:2) {
# The true effects
uu <- unitSets[[ee]];
lines(lowess(fl[uu,ee], log2(theta[uu,ii])), col="black", lwd=4, lty=3);
# The estimated effects
fit <- fits[[ii]][[ee]]$fit;
lines(fit, col="orange", lwd=3);
muT <- targetFcns[[ee]](fl[uu,ee]);
lines(fl[uu,ee], muT, col="cyan", lwd=1);
}
}
# Calculate log-ratios
thetaR <- rowMeans(thetaN, na.rm=TRUE);
M <- log2(thetaN/thetaR);
# Plot normalized data
for (ii in 1:I) {
plot(NA, xlim=xlim, ylim=Mlim, xlab=xlab, ylab=Mlab, main="normalized");
# Single-enzyme units
for (ee in 1:2) {
# The normalized data
uu <- unitSets[[ee]];
points(fl[uu,ee], M[uu,ii], col=ee+1);
}
# Both-enzyme units (use fragment-length for enzyme #1)
uu <- unitSets$both;
points(fl[uu,1], M[uu,ii], col=3+1);
}
ylim <- c(0,1.5);
for (ii in 1:I) {
data <- list();
for (ee in 1:2) {
# The normalized data
uu <- unitSets[[ee]];
data[[ee]] <- M[uu,ii];
}
uu <- unitSets$both;
if (length(uu) > 0)
data[[3]] <- M[uu,ii];
uu <- unitSets$none;
if (length(uu) > 0)
data[[4]] <- M[uu,ii];
cols <- seq(along=data)+1;
plotDensity(data, col=cols, xlim=Mlim, xlab=Mlab, main="normalized");
abline(v=0, lty=2);
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.