### ORIGINAL AUTHOR: Andrew Teschendorff
# The original BMIQ function from Teschendorff 2013 adjusts for the type-2 bias in
# Illumina Infinium 450k data.
# Later functions and edits were provided by yours truly, Steve Horvath.
# I changed the code so that one can calibrate methylation data to a gold standard.
# Specifically, I took version v_1.2 by Teschendorff and fixed minor issues.
# Also I made the code more robust e.g. by changing the optimization algorithm.
# Toward this end, I used the method="Nelder-Mead" in optim()
### Later functions and edits by Steve Horvath
### # Steve Horvath took version v_1.2 by Teschendorff
# and fixed minor errors. Also he made the code more robust.
# Importantly, SH changed the optimization algorithm to make it #more robust.
# SH used method="Nelder-Mead" in optim() since the other #optimization method sometimes gets stuck.
#Toward this end, the function blc was replaced by blc2.
require(RPMM);
betaEst2=function (y, w, weights)
{
yobs = !is.na(y)
if (sum(yobs) <= 1)
return(c(1, 1))
y = y[yobs]
w = w[yobs]
weights = weights[yobs]
N = sum(weights * w)
p = sum(weights * w * y)/N
v = sum(weights * w * y * y)/N - p * p
logab = log(c(p, 1 - p)) + log(pmax(1e-06, p * (1 - p)/v -
1))
if (sum(yobs) == 2)
return(exp(logab))
opt = try(optim(logab, betaObjf, ydata = y, wdata = w, weights = weights,
method = "Nelder-Mead",control=list(maxit=50) ), silent = TRUE)
if (inherits(opt, "try-error"))
return(c(1, 1))
exp(opt$par)
} # end of function betaEst
blc2=function (Y, w, maxiter = 25, tol = 1e-06, weights = NULL, verbose = TRUE)
{
Ymn = min(Y[Y > 0], na.rm = TRUE)
Ymx = max(Y[Y < 1], na.rm = TRUE)
Y = pmax(Y, Ymn/2)
Y = pmin(Y, 1 - (1 - Ymx)/2)
Yobs = !is.na(Y)
J = dim(Y)[2]
K = dim(w)[2]
n = dim(w)[1]
if (n != dim(Y)[1])
stop("Dimensions of w and Y do not agree")
if (is.null(weights))
weights = rep(1, n)
mu = a = b = matrix(Inf, K, J)
crit = Inf
for (i in 1:maxiter) {
warn0 = options()$warn
options(warn = -1)
eta = apply(weights * w, 2, sum)/sum(weights)
mu0 = mu
for (k in 1:K) {
for (j in 1:J) {
ab = betaEst2(Y[, j], w[, k], weights)
a[k, j] = ab[1]
b[k, j] = ab[2]
mu[k, j] = ab[1]/sum(ab)
}
}
ww = array(0, dim = c(n, J, K))
for (k in 1:K) {
for (j in 1:J) {
ww[Yobs[, j], j, k] = dbeta(Y[Yobs[, j], j],
a[k, j], b[k, j], log = TRUE)
}
}
options(warn = warn0)
w = apply(ww, c(1, 3), sum, na.rm = TRUE)
wmax = apply(w, 1, max)
for (k in 1:K) w[, k] = w[, k] - wmax
w = t(eta * t(exp(w)))
like = apply(w, 1, sum)
w = (1/like) * w
llike = weights * (log(like) + wmax)
crit = max(abs(mu - mu0))
if (verbose)
print(crit)
if (crit < tol)
break
}
return(list(a = a, b = b, eta = eta, mu = mu, w = w, llike = sum(llike)))
}
# The function BMIQcalibration was created by Steve Horvath by heavily recycling code
# from A. Teschendorff's BMIQ function.
# BMIQ stands for beta mixture quantile normalization.
# Explanation: datM is a data frame with Illumina beta values (rows are samples, colums are CpGs.
# goldstandard is a numeric vector with beta values that is used as gold standard for calibrating the columns of datM.
# The length of goldstandard has to equal the number of columns of datM.
# Example code: First we impute missing values.
# library(WGCNA); dimnames1=dimnames(datMeth)
# datMeth= data.frame(t(impute.knn(as.matrix(t(datMeth)))$data))
# dimnames(datMeth)=dimnames1
# gold.mean=as.numeric(apply(datMeth,2,mean,na.rm=TRUE))
#datMethCalibrated=BMIQcalibration(datM=datMeth,goldstandard.beta=gold.mean)
BMIQcalibration=function(datM,goldstandard.beta,nL=3,doH=TRUE,nfit=20000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=FALSE,calibrateUnitInterval=TRUE){
if (length(goldstandard.beta) !=dim(datM)[[2]] ) {stop("Error in function arguments length(goldstandard.beta) !=dim(datM)[[2]]. Consider transposing datM.")}
if (plots ) {par(mfrow=c(2,2))}
beta1.v = goldstandard.beta
if (calibrateUnitInterval ) {datM=CalibrateUnitInterval(datM)}
### estimate initial weight matrix from type1 distribution
w0.m = matrix(0,nrow=length(beta1.v),ncol=nL);
w0.m[which(beta1.v <= th1.v[1]),1] = 1;
w0.m[intersect(which(beta1.v > th1.v[1]),which(beta1.v <= th1.v[2])),2] = 1;
w0.m[which(beta1.v > th1.v[2]),3] = 1;
### fit type1
print("Fitting EM beta mixture to goldstandard probes");
set.seed(1)
rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v)) ) ,replace=FALSE)
em1.o = blc(matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
subsetclass1.v = apply(em1.o$w,1,which.max);
## MATT
subsetth1.v = c(
mean(quantile(beta1.v[rand.idx[subsetclass1.v==1]], probs=0.9),
quantile(beta1.v[rand.idx[subsetclass1.v==2]], probs=0.1)),
mean(quantile(beta1.v[rand.idx[subsetclass1.v==2]], probs=0.9),
quantile(beta1.v[rand.idx[subsetclass1.v==3]], probs=0.1)));
#c(
#mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),
# min(beta1.v[rand.idx[subsetclass1.v==2]])),
#mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),
# min(beta1.v[rand.idx[subsetclass1.v==3]])));
class1.v = rep(2,length(beta1.v));
class1.v[which(beta1.v < subsetth1.v[1])] = 1;
class1.v[which(beta1.v > subsetth1.v[2])] = 3;
nth1.v = subsetth1.v;
print("Done");
### generate plot from estimated mixture
if(plots){
print("Check");
tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
tmpB.v = vector();
for(l in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
}
plot(density(beta1.v),main= paste("Type1fit-", sep=""));
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
}
### Estimate Modes
if ( sum(class1.v==1)==1 ){ mod1U= beta1.v[class1.v==1]}
if ( sum(class1.v==3)==1 ){ mod1M= beta1.v[class1.v==3]}
if ( sum(class1.v==1) >1){
d1U.o = density(beta1.v[class1.v==1])
mod1U = d1U.o$x[which.max(d1U.o$y)]
}
if ( sum(class1.v==3)>1 ){
d1M.o = density(beta1.v[class1.v==3])
mod1M = d1M.o$x[which.max(d1M.o$y)]
}
### BETA 2
for (ii in 1:dim(datM)[[1]] ){
printFlush(paste("ii=",ii, "of", dim(datM)[[1]]))
printFlush(date())
sampleID=ii
beta2.v = as.numeric(datM[ii,])
d2U.o = density(beta2.v[which(beta2.v<0.4)]);
d2M.o = density(beta2.v[which(beta2.v>0.6)]);
mod2U = d2U.o$x[which.max(d2U.o$y)]
mod2M = d2M.o$x[which.max(d2M.o$y)]
### now deal with type2 fit
th2.v = vector();
th2.v[1] = nth1.v[1] + (mod2U-mod1U);
th2.v[2] = nth1.v[2] + (mod2M-mod1M);
### estimate initial weight matrix
w0.m = matrix(0,nrow=length(beta2.v),ncol=nL);
w0.m[which(beta2.v <= th2.v[1]),1] = 1;
w0.m[intersect(which(beta2.v > th2.v[1]),which(beta2.v <= th2.v[2])),2] = 1;
w0.m[which(beta2.v > th2.v[2]),3] = 1;
print("Fitting EM beta mixture to input probes");
# I fixed an error in the following line (replaced beta1 by beta2)
set.seed(1)
rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE) ,replace=FALSE)
em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol,verbose=TRUE);
print("Done");
### for type II probes assign to state (unmethylated, hemi or full methylation)
subsetclass2.v = apply(em2.o$w,1,which.max);
if (sum(subsetclass2.v==2)>0 ){
subsetth2.v = c(mean(max(beta2.v[rand.idx[subsetclass2.v==1]]),min(beta2.v[rand.idx[subsetclass2.v==2]])),
mean(max(beta2.v[rand.idx[subsetclass2.v==2]]),min(beta2.v[rand.idx[subsetclass2.v==3]])));
}
if (sum(subsetclass2.v==2)==0 ){
subsetth2.v = c(1/2*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 1/2*mean(beta2.v[rand.idx[subsetclass2.v==3]]), 1/3*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 2/3*mean(beta2.v[rand.idx[subsetclass2.v==3]]));
}
class2.v = rep(2,length(beta2.v));
class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
class2.v[which(beta2.v >= subsetth2.v[2])] = 3;
### generate plot
if(plots){
tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
tmpB.v = vector();
for(lt in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
}
plot(density(beta2.v), main= paste("Type2fit-",sampleID,sep="") );
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
}
classAV1.v = vector();classAV2.v = vector();
for(l in 1:nL){
classAV1.v[l] = em1.o$mu[l,1];
classAV2.v[l] = em2.o$mu[l,1];
}
### start normalising input probes
print("Start normalising input probes");
nbeta2.v = beta2.v;
### select U probes
lt = 1;
selU.idx = which(class2.v==lt);
selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selUR.idx] = q.v;
p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);
### find corresponding quantile in type I distribution
q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
nbeta2.v[selUL.idx] = q.v;
### select M probes
lt = 3;
selM.idx = which(class2.v==lt);
selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selMR.idx] = q.v;
if(doH){ ### if TRUE also correct type2 hemimethylated probes
### select H probes and include ML probes (left ML tail is not well described by a beta-distribution).
lt = 2;
selH.idx = c(which(class2.v==lt),selML.idx);
minH = min(beta2.v[selH.idx],na.rm=TRUE)
maxH = max(beta2.v[selH.idx],na.rm=TRUE)
deltaH = maxH - minH;
#### need to do some patching
deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)
## new maximum of H probes should be
nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;
## new minimum of H probes should be
nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
ndeltaH = nmaxH - nminH;
### perform conformal transformation (shift+dilation)
## new_beta_H(i) = a + hf*(beta_H(i)-minH);
hf = ndeltaH/deltaH ;
### fix lower point first
nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);
}
### generate final plot to check normalisation
if(plots){
print("Generating final plot");
d1.o = density(beta1.v);
d2.o = density(beta2.v);
d2n.o = density(nbeta2.v);
ymax = max(d2.o$y,d1.o$y,d2n.o$y);
plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1), main=paste("CheckBMIQ-",sampleID,sep="") );
points(d1.o$x,d1.o$y,col="red",type="l");
points(d2n.o$x,d2n.o$y,col="blue",type="l");
legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
}
datM[ii,]= nbeta2.v ;
} # end of for (ii=1 loop
datM
} # end of function BMIQcalibration
BMIQ = function(beta.v,design.v,nL=3,doH=TRUE,nfit=50000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=TRUE,sampleID=1,calibrateUnitInterval=TRUE){
if (calibrateUnitInterval) {
rangeBySample=range(beta.v,na.rm=TRUE)
minBySample=rangeBySample[1]
maxBySample=rangeBySample[2]
if ( (minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample) ) {
y1=c(0.001,.999)
x1=c(minBySample,maxBySample)
lm1=lm( y1 ~ x1 )
intercept1=coef(lm1)[[1]]
slope1=coef(lm1)[[2]]
beta.v=intercept1+slope1*beta.v
} # end of if
} # end of if (calibrateUnitInterval
type1.idx = which(design.v==1);
type2.idx = which(design.v==2);
beta1.v = beta.v[type1.idx];
beta2.v = beta.v[type2.idx];
### estimate initial weight matrix from type1 distribution
w0.m = matrix(0,nrow=length(beta1.v),ncol=nL);
w0.m[which(beta1.v <= th1.v[1]),1] = 1;
w0.m[intersect(which(beta1.v > th1.v[1]),which(beta1.v <= th1.v[2])),2] = 1;
w0.m[which(beta1.v > th1.v[2]),3] = 1;
### fit type1
print("Fitting EM beta mixture to goldstandard probes");
set.seed(1)
rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v)) ) ,replace=FALSE)
em1.o = blc2(Y=matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
subsetclass1.v = apply(em1.o$w,1,which.max);
subsetth1.v = c(mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),min(beta1.v[rand.idx[subsetclass1.v==2]])),mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),min(beta1.v[rand.idx[subsetclass1.v==3]],na.rm=TRUE)));
class1.v = rep(2,length(beta1.v));
class1.v[which(beta1.v < subsetth1.v[1])] = 1;
class1.v[which(beta1.v > subsetth1.v[2])] = 3;
nth1.v = subsetth1.v;
print("Done");
### generate plot from estimated mixture
if(plots){
print("Check");
tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
tmpB.v = vector();
for(l in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
}
pdf(paste("Type1fit-",sampleID,".pdf",sep=""),width=6,height=4);
plot(density(beta1.v));
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
dev.off();
}
### Estimate Modes
if ( sum(class1.v==1)==1 ){ mod1U= beta1.v[class1.v==1]}
if ( sum(class1.v==3)==1 ){ mod1M= beta1.v[class1.v==3]}
if ( sum(class1.v==1) >1){
d1U.o = density(beta1.v[class1.v==1])
mod1U = d1U.o$x[which.max(d1U.o$y)]
}
if ( sum(class1.v==3)>1 ){
d1M.o = density(beta1.v[class1.v==3])
mod1M = d1M.o$x[which.max(d1M.o$y)]
}
d2U.o = density(beta2.v[which(beta2.v<0.4)]);
d2M.o = density(beta2.v[which(beta2.v>0.6)]);
mod2U = d2U.o$x[which.max(d2U.o$y)]
mod2M = d2M.o$x[which.max(d2M.o$y)]
### now deal with type2 fit
th2.v = vector();
th2.v[1] = nth1.v[1] + (mod2U-mod1U);
th2.v[2] = nth1.v[2] + (mod2M-mod1M);
### estimate initial weight matrix
w0.m = matrix(0,nrow=length(beta2.v),ncol=nL);
w0.m[which(beta2.v <= th2.v[1]),1] = 1;
w0.m[intersect(which(beta2.v > th2.v[1]),which(beta2.v <= th2.v[2])),2] = 1;
w0.m[which(beta2.v > th2.v[2]),3] = 1;
print("Fitting EM beta mixture to input probes");
set.seed(1)
rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE) ,replace=FALSE)
em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
print("Done");
### for type II probes assign to state (unmethylated, hemi or full methylation)
subsetclass2.v = apply(em2.o$w,1,which.max);
if (sum(subsetclass2.v==2)>0 ){
subsetth2.v = c(mean(max(beta2.v[rand.idx[subsetclass2.v==1]]),min(beta2.v[rand.idx[subsetclass2.v==2]])),
mean(max(beta2.v[rand.idx[subsetclass2.v==2]]),min(beta2.v[rand.idx[subsetclass2.v==3]])));
}
if (sum(subsetclass2.v==2)==0 ){
subsetth2.v = c(1/2*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 1/2*mean(beta2.v[rand.idx[subsetclass2.v==3]]), 1/3*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 2/3*mean(beta2.v[rand.idx[subsetclass2.v==3]]));
}
class2.v = rep(2,length(beta2.v));
class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
class2.v[which(beta2.v >= subsetth2.v[2])] = 3;
### generate plot
if(plots){
tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
tmpB.v = vector();
for(lt in 1:nL){
tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
}
pdf(paste("Type2fit-",sampleID,".pdf",sep=""),width=6,height=4);
plot(density(beta2.v));
d.o = density(tmpB.v);
points(d.o$x,d.o$y,col="green",type="l")
legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
dev.off();
}
classAV1.v = vector();classAV2.v = vector();
for(l in 1:nL){
classAV1.v[l] = em1.o$mu[l,1];
classAV2.v[l] = em2.o$mu[l,1];
}
### start normalising input probes
print("Start normalising input probes");
nbeta2.v = beta2.v;
### select U probes
lt = 1;
selU.idx = which(class2.v==lt);
selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selUR.idx] = q.v;
p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);
### find corresponding quantile in type I distribution
q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
nbeta2.v[selUL.idx] = q.v;
### select M probes
lt = 3;
selM.idx = which(class2.v==lt);
selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
nbeta2.v[selMR.idx] = q.v;
if(doH){ ### if TRUE also correct type2 hemimethylated probes
### select H probes and include ML probes (left ML tail is not well described by a beta-distribution).
lt = 2;
selH.idx = c(which(class2.v==lt),selML.idx);
minH = min(beta2.v[selH.idx],na.rm=TRUE)
maxH = max(beta2.v[selH.idx],na.rm=TRUE)
deltaH = maxH - minH;
#### need to do some patching
deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)
## new maximum of H probes should be
nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;
## new minimum of H probes should be
nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
ndeltaH = nmaxH - nminH;
### perform conformal transformation (shift+dilation)
## new_beta_H(i) = a + hf*(beta_H(i)-minH);
hf = ndeltaH/deltaH ;
### fix lower point first
nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);
}
pnbeta.v = beta.v;
pnbeta.v[type1.idx] = beta1.v;
pnbeta.v[type2.idx] = nbeta2.v;
### generate final plot to check normalisation
if(plots){
print("Generating final plot");
d1.o = density(beta1.v);
d2.o = density(beta2.v);
d2n.o = density(nbeta2.v);
ymax = max(d2.o$y,d1.o$y,d2n.o$y);
pdf(paste("CheckBMIQ-",sampleID,".pdf",sep=""),width=6,height=4)
plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1));
points(d1.o$x,d1.o$y,col="red",type="l");
points(d2n.o$x,d2n.o$y,col="blue",type="l");
legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
dev.off();
}
print(paste("Finished for sample ",sampleID,sep=""));
return(list(nbeta=pnbeta.v,class1=class1.v,class2=class2.v,av1=classAV1.v,av2=classAV2.v,hf=hf,th1=nth1.v,th2=th2.v));
}
CheckBMIQ = function(beta.v,design.v,pnbeta.v){### pnbeta is BMIQ normalised profile
type1.idx = which(design.v==1);
type2.idx = which(design.v==2);
beta1.v = beta.v[type1.idx];
beta2.v = beta.v[type2.idx];
pnbeta2.v = pnbeta.v[type2.idx];
} # end of function CheckBMIQ
CalibrateUnitInterval=function(datM,onlyIfOutside=TRUE){
rangeBySample=data.frame(lapply(data.frame(t(datM)),range,na.rm=TRUE))
minBySample=as.numeric(rangeBySample[1,])
maxBySample=as.numeric(rangeBySample[2,])
if (onlyIfOutside) { indexSamples=which((minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample))
}
if (!onlyIfOutside) { indexSamples=1:length(minBySample)}
if ( length(indexSamples)>=1 ){
for ( i in indexSamples) {
y1=c(0.001,0.999)
x1=c(minBySample[i],maxBySample[i])
lm1=lm( y1 ~ x1 )
intercept1=coef(lm1)[[1]]
slope1=coef(lm1)[[2]]
datM[i,]=intercept1+slope1*datM[i,]
} # end of for loop
}
datM
} #end of function for calibrating to [0,1]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.