R/normalizeHorvath.R

### ORIGINAL AUTHOR: Andrew Teschendorff
# The original BMIQ function from Teschendorff 2013 adjusts for the type-2 bias 
# in Illumina Infinium 450k data.
# Later functions and edits were provided by Steve Horvath (SH).
# SH changed the code so one can calibrate methylation data to a gold standard.
# Specifically, SH took version v_1.2 by Teschendorff  and fixed minor issues. 
# SH also made the code more robust e.g. by changing the optimization algorithm.
# Toward this end, SH used the method="Nelder-Mead" in optim()
# Toward this end, the function blc was replaced by blc2.

# require(RPMM) ## done by ozymandias now 

betaEst2=function (y, w, weights) { # {{{
    yobs = !is.na(y)
    if (sum(yobs) <= 1) 
        return(c(1, 1))
    y = y[yobs]
    w = w[yobs]
    weights = weights[yobs]
    N = sum(weights * w)
    p = sum(weights * w * y)/N
    v = sum(weights * w * y * y)/N - p * p
    logab = log(c(p, 1 - p)) + log(pmax(1e-06, p * (1 - p)/v - 
        1))
    if (sum(yobs) == 2) 
        return(exp(logab))
    opt = try(optim(logab, betaObjf, ydata = y, wdata = w, weights = weights, 
        method = "Nelder-Mead",control=list(maxit=50) ), silent = TRUE)
    if (inherits(opt, "try-error")) 
        return(c(1, 1))
    exp(opt$par)
} # }}} end of function betaEst

blc2=function(Y, w, maxiter=25, tol=1e-06, weights=NULL, verbose=TRUE) { # {{{
    Ymn = min(Y[Y > 0], na.rm = TRUE)
    Ymx = max(Y[Y < 1], na.rm = TRUE)
    Y = pmax(Y, Ymn/2)
    Y = pmin(Y, 1 - (1 - Ymx)/2)
    Yobs = !is.na(Y)
    J = dim(Y)[2]
    K = dim(w)[2]
    n = dim(w)[1]
    if (n != dim(Y)[1]) 
        stop("Dimensions of w and Y do not agree")
    if (is.null(weights)) 
        weights = rep(1, n)
    mu = a = b = matrix(Inf, K, J)
    crit = Inf
    for (i in 1:maxiter) {
        warn0 = options()$warn
        options(warn = -1)
        eta = apply(weights * w, 2, sum)/sum(weights)
        mu0 = mu
        for (k in 1:K) {
            for (j in 1:J) {
                ab = betaEst2(Y[, j], w[, k], weights)
                a[k, j] = ab[1]
                b[k, j] = ab[2]
                mu[k, j] = ab[1]/sum(ab)
            }
        }
        ww = array(0, dim = c(n, J, K))
        for (k in 1:K) {
            for (j in 1:J) {
                ww[Yobs[, j], j, k] = dbeta(Y[Yobs[, j], j], 
                  a[k, j], b[k, j], log = TRUE)
            }
        }
        options(warn = warn0)
        w = apply(ww, c(1, 3), sum, na.rm = TRUE)
        wmax = apply(w, 1, max)
        for (k in 1:K) w[, k] = w[, k] - wmax
        w = t(eta * t(exp(w)))
        like = apply(w, 1, sum)
        w = (1/like) * w
        llike = weights * (log(like) + wmax)
        crit = max(abs(mu - mu0))
        if (verbose) 
            print(crit)
        if (crit < tol) 
            break
    }
    return(list(a = a, b = b, eta = eta, mu = mu, w = w, llike = sum(llike)))
} # }}}

# The function BMIQcalibration was created by Steve Horvath by heavily recycling code
# from A. Teschendorff's BMIQ function.
# BMIQ stands for beta mixture quantile normalization.
# Explanation: datM is a data frame with Illumina beta values (rows are samples, colums are CpGs.
# goldstandard is a numeric vector with beta values that is used as gold standard for calibrating the columns of datM.
# The length of goldstandard has to equal the number of columns of datM.
# Example code: First we impute missing values.
# library(WGCNA); dimnames1=dimnames(datMeth)
# datMeth= data.frame(t(impute.knn(as.matrix(t(datMeth)))$data))
# dimnames(datMeth)=dimnames1
# gold.mean=as.numeric(apply(datMeth,2,mean,na.rm=TRUE))
#datMethCalibrated=BMIQcalibration(datM=datMeth,goldstandard.beta=gold.mean)
#
BMIQcalibration <- function(datM,goldstandard.beta,nL=3,doH=TRUE,nfit=20000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=FALSE,calibrateUnitInterval=TRUE){ # {{{
    if (length(goldstandard.beta) !=dim(datM)[[2]] ) {stop("Error in function arguments length(goldstandard.beta) !=dim(datM)[[2]]. Consider transposing datM.")}
    if (plots ) {par(mfrow=c(2,2))}
beta1.v = goldstandard.beta

    if (calibrateUnitInterval ) {datM=CalibrateUnitInterval(datM)}

    ### estimate initial weight matrix from type1 distribution
    w0.m = matrix(0,nrow=length(beta1.v),ncol=nL);
    w0.m[which(beta1.v <= th1.v[1]),1] = 1;
    w0.m[intersect(which(beta1.v > th1.v[1]),which(beta1.v <= th1.v[2])),2] = 1;
    w0.m[which(beta1.v > th1.v[2]),3] = 1;
    ### fit type1
    print("Fitting EM beta mixture to goldstandard probes");
    set.seed(1)
    rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v))  )   ,replace=FALSE)
    em1.o = blc(matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
    subsetclass1.v = apply(em1.o$w,1,which.max);
    subsetth1.v = c(mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),min(beta1.v[rand.idx[subsetclass1.v==2]])),mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),min(beta1.v[rand.idx[subsetclass1.v==3]])));
    class1.v = rep(2,length(beta1.v));
    class1.v[which(beta1.v < subsetth1.v[1])] = 1;
    class1.v[which(beta1.v > subsetth1.v[2])] = 3;
    nth1.v = subsetth1.v;
    print("Done");

    ### generate plot from estimated mixture
    if(plots){
    print("Check");
    tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
    tmpB.v = vector();
    for(l in 1:nL){
      tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
    }
    plot(density(beta1.v),main= paste("Type1fit-", sep=""));
    d.o = density(tmpB.v);
    points(d.o$x,d.o$y,col="green",type="l")
    legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
    }

    ### Estimate Modes 
    if (  sum(class1.v==1)==1 ){ mod1U= beta1.v[class1.v==1]}
    if (  sum(class1.v==3)==1 ){ mod1M= beta1.v[class1.v==3]}
    if (  sum(class1.v==1) >1){ 
    d1U.o = density(beta1.v[class1.v==1])
    mod1U = d1U.o$x[which.max(d1U.o$y)]
    }
    if (  sum(class1.v==3)>1 ){ 
    d1M.o = density(beta1.v[class1.v==3])
    mod1M = d1M.o$x[which.max(d1M.o$y)]
    }

    ### BETA 2
    for (ii in 1:dim(datM)[[1]] ){
    printFlush(paste("ii=",ii))
    sampleID=ii
    beta2.v = as.numeric(datM[ii,])

    d2U.o = density(beta2.v[which(beta2.v<0.4)]);
    d2M.o = density(beta2.v[which(beta2.v>0.6)]);
    mod2U = d2U.o$x[which.max(d2U.o$y)]
    mod2M = d2M.o$x[which.max(d2M.o$y)]

    ### now deal with type2 fit
    th2.v = vector();
    th2.v[1] = nth1.v[1] + (mod2U-mod1U);
    th2.v[2] = nth1.v[2] + (mod2M-mod1M);

    ### estimate initial weight matrix 
    w0.m = matrix(0,nrow=length(beta2.v),ncol=nL);
    w0.m[which(beta2.v <= th2.v[1]),1] = 1;
    w0.m[intersect(which(beta2.v > th2.v[1]),which(beta2.v <= th2.v[2])),2] = 1;
    w0.m[which(beta2.v > th2.v[2]),3] = 1;

    print("Fitting EM beta mixture to input probes");
    # I fixed an error in the following line (replaced beta1 by beta2)
    set.seed(1)
    rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE)   ,replace=FALSE)
    em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol,verbose=TRUE);
    print("Done");

    ### for type II probes assign to state (unmethylated, hemi or full methylation)
    subsetclass2.v = apply(em2.o$w,1,which.max);


    if (sum(subsetclass2.v==2)>0 ){
    subsetth2.v = c(mean(max(beta2.v[rand.idx[subsetclass2.v==1]]),min(beta2.v[rand.idx[subsetclass2.v==2]])),
    mean(max(beta2.v[rand.idx[subsetclass2.v==2]]),min(beta2.v[rand.idx[subsetclass2.v==3]])));
    }
    if (sum(subsetclass2.v==2)==0 ){
    subsetth2.v = c(1/2*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 1/2*mean(beta2.v[rand.idx[subsetclass2.v==3]]), 1/3*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 2/3*mean(beta2.v[rand.idx[subsetclass2.v==3]]));
    }



    class2.v = rep(2,length(beta2.v));
    class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
    class2.v[which(beta2.v >= subsetth2.v[2])] = 3;

    ### generate plot
    if(plots){
    tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
    tmpB.v = vector();
    for(lt in 1:nL){
      tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
    }
    plot(density(beta2.v),  main= paste("Type2fit-",sampleID,sep="")  );
    d.o = density(tmpB.v);
    points(d.o$x,d.o$y,col="green",type="l")
    legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
    }

    classAV1.v = vector();classAV2.v = vector();
    for(l in 1:nL){
      classAV1.v[l] =  em1.o$mu[l,1];
      classAV2.v[l] =  em2.o$mu[l,1];
    }

    ### start normalising input probes
    print("Start normalising input probes");
    nbeta2.v = beta2.v;
    ### select U probes
    lt = 1;
    selU.idx = which(class2.v==lt);
    selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
    selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];
    ### find prob according to typeII distribution
    p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
    ### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
    nbeta2.v[selUR.idx] = q.v;
    p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);
    ### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
    nbeta2.v[selUL.idx] = q.v;

    ### select M probes
    lt = 3;
    selM.idx = which(class2.v==lt);
    selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
    selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];
    ### find prob according to typeII distribution
    p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
    ### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
    nbeta2.v[selMR.idx] = q.v;


    if(doH){ ### if TRUE also correct type2 hemimethylated probes
    ### select H probes and include ML probes (left ML tail is not well described by a beta-distribution).
    lt = 2;
    selH.idx = c(which(class2.v==lt),selML.idx);
    minH = min(beta2.v[selH.idx],na.rm=TRUE)
    maxH = max(beta2.v[selH.idx],na.rm=TRUE)
    deltaH = maxH - minH;
    #### need to do some patching
    deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
    deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)

    ## new maximum of H probes should be
    nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;
    ## new minimum of H probes should be
    nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
    ndeltaH = nmaxH - nminH;

    ### perform conformal transformation (shift+dilation)
    ## new_beta_H(i) = a + hf*(beta_H(i)-minH);
    hf = ndeltaH/deltaH ;
    ### fix lower point first
    nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);

    }


    ### generate final plot to check normalisation
    if(plots){
     print("Generating final plot");
     d1.o = density(beta1.v);
     d2.o = density(beta2.v);
     d2n.o = density(nbeta2.v);
     ymax = max(d2.o$y,d1.o$y,d2n.o$y);
    plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1), main=paste("CheckBMIQ-",sampleID,sep="") );
     points(d1.o$x,d1.o$y,col="red",type="l");
     points(d2n.o$x,d2n.o$y,col="blue",type="l");
     legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
    }

    datM[ii,]= nbeta2.v ;
    } # end of for (ii=1 loop
    datM
} # }}} end of function BMIQcalibration

BMIQ <- function(beta.v,design.v,nL=3,doH=TRUE,nfit=50000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=TRUE,sampleID=1,calibrateUnitInterval=TRUE){ # {{{

    if (calibrateUnitInterval) {
    rangeBySample=range(beta.v,na.rm=TRUE)
    minBySample=rangeBySample[1]
    maxBySample=rangeBySample[2]
    if ( (minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample) ) {
    y1=c(0.001,.999) 
    x1=c(minBySample,maxBySample)
    lm1=lm( y1 ~ x1 )
    intercept1=coef(lm1)[[1]]
    slope1=coef(lm1)[[2]]
    beta.v=intercept1+slope1*beta.v
    } # end of if
    } # end of if (calibrateUnitInterval


    type1.idx = which(design.v==1);
    type2.idx = which(design.v==2);

    beta1.v = beta.v[type1.idx];
    beta2.v = beta.v[type2.idx];


    ### estimate initial weight matrix from type1 distribution
    w0.m = matrix(0,nrow=length(beta1.v),ncol=nL);
    w0.m[which(beta1.v <= th1.v[1]),1] = 1;
    w0.m[intersect(which(beta1.v > th1.v[1]),which(beta1.v <= th1.v[2])),2] = 1;
    w0.m[which(beta1.v > th1.v[2]),3] = 1;

    ### fit type1
    print("Fitting EM beta mixture to goldstandard probes");
    set.seed(1)
    rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v))  )   ,replace=FALSE)
    em1.o = blc2(Y=matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
    subsetclass1.v = apply(em1.o$w,1,which.max);
    subsetth1.v = c(mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),min(beta1.v[rand.idx[subsetclass1.v==2]])),mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),min(beta1.v[rand.idx[subsetclass1.v==3]],na.rm=TRUE)));
    class1.v = rep(2,length(beta1.v));
    class1.v[which(beta1.v < subsetth1.v[1])] = 1;
    class1.v[which(beta1.v > subsetth1.v[2])] = 3;
    nth1.v = subsetth1.v;
    print("Done");

    ### generate plot from estimated mixture
    if(plots){
    print("Check");
    tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
    tmpB.v = vector();
    for(l in 1:nL){
      tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
    }

    pdf(paste("Type1fit-",sampleID,".pdf",sep=""),width=6,height=4);
    plot(density(beta1.v));
    d.o = density(tmpB.v);
    points(d.o$x,d.o$y,col="green",type="l")
    legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
    dev.off();
    }



    ### Estimate Modes 
    if (  sum(class1.v==1)==1 ){ mod1U= beta1.v[class1.v==1]}
    if (  sum(class1.v==3)==1 ){ mod1M= beta1.v[class1.v==3]}
    if (  sum(class1.v==1) >1){ 
    d1U.o = density(beta1.v[class1.v==1])
    mod1U = d1U.o$x[which.max(d1U.o$y)]
    }
    if (  sum(class1.v==3)>1 ){ 
    d1M.o = density(beta1.v[class1.v==3])
    mod1M = d1M.o$x[which.max(d1M.o$y)]
    }


    d2U.o = density(beta2.v[which(beta2.v<0.4)]);
    d2M.o = density(beta2.v[which(beta2.v>0.6)]);
    mod2U = d2U.o$x[which.max(d2U.o$y)]
    mod2M = d2M.o$x[which.max(d2M.o$y)]


    ### now deal with type2 fit
    th2.v = vector();
    th2.v[1] = nth1.v[1] + (mod2U-mod1U);
    th2.v[2] = nth1.v[2] + (mod2M-mod1M);

    ### estimate initial weight matrix 
    w0.m = matrix(0,nrow=length(beta2.v),ncol=nL);
    w0.m[which(beta2.v <= th2.v[1]),1] = 1;
    w0.m[intersect(which(beta2.v > th2.v[1]),which(beta2.v <= th2.v[2])),2] = 1;
    w0.m[which(beta2.v > th2.v[2]),3] = 1;

    print("Fitting EM beta mixture to input probes");
    set.seed(1)
    rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE)   ,replace=FALSE)
    em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
    print("Done");

    ### for type II probes assign to state (unmethylated, hemi or full methylation)
    subsetclass2.v = apply(em2.o$w,1,which.max);



    if (sum(subsetclass2.v==2)>0 ){
    subsetth2.v = c(mean(max(beta2.v[rand.idx[subsetclass2.v==1]]),min(beta2.v[rand.idx[subsetclass2.v==2]])),
    mean(max(beta2.v[rand.idx[subsetclass2.v==2]]),min(beta2.v[rand.idx[subsetclass2.v==3]])));
    }
    if (sum(subsetclass2.v==2)==0 ){
    subsetth2.v = c(1/2*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 1/2*mean(beta2.v[rand.idx[subsetclass2.v==3]]), 1/3*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 2/3*mean(beta2.v[rand.idx[subsetclass2.v==3]]));
    }


    class2.v = rep(2,length(beta2.v));
    class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
    class2.v[which(beta2.v >= subsetth2.v[2])] = 3;


    ### generate plot
    if(plots){
    tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
    tmpB.v = vector();
    for(lt in 1:nL){
      tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
    }
    pdf(paste("Type2fit-",sampleID,".pdf",sep=""),width=6,height=4);
    plot(density(beta2.v));
    d.o = density(tmpB.v);
    points(d.o$x,d.o$y,col="green",type="l")
    legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
    dev.off();
    }

    classAV1.v = vector();classAV2.v = vector();
    for(l in 1:nL){
      classAV1.v[l] =  em1.o$mu[l,1];
      classAV2.v[l] =  em2.o$mu[l,1];
    }

    ### start normalising input probes
    print("Start normalising input probes");
    nbeta2.v = beta2.v;
    ### select U probes
    lt = 1;
    selU.idx = which(class2.v==lt);
    selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
    selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];
    ### find prob according to typeII distribution
    p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
    ### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
    nbeta2.v[selUR.idx] = q.v;
    p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);
    ### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
    nbeta2.v[selUL.idx] = q.v;

    ### select M probes
    lt = 3;
    selM.idx = which(class2.v==lt);
    selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
    selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];
    ### find prob according to typeII distribution
    p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
    ### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
    nbeta2.v[selMR.idx] = q.v;


    if(doH){ ### if TRUE also correct type2 hemimethylated probes
    ### select H probes and include ML probes (left ML tail is not well described by a beta-distribution).
    lt = 2;
    selH.idx = c(which(class2.v==lt),selML.idx);
    minH = min(beta2.v[selH.idx],na.rm=TRUE)
    maxH = max(beta2.v[selH.idx],na.rm=TRUE)
    deltaH = maxH - minH;
    #### need to do some patching
    deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
    deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)

    ## new maximum of H probes should be
    nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;
    ## new minimum of H probes should be
    nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
    ndeltaH = nmaxH - nminH;

    ### perform conformal transformation (shift+dilation)
    ## new_beta_H(i) = a + hf*(beta_H(i)-minH);
    hf = ndeltaH/deltaH ;
    ### fix lower point first
    nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);

    }

    pnbeta.v = beta.v;
    pnbeta.v[type1.idx] = beta1.v;
    pnbeta.v[type2.idx] = nbeta2.v;

    ### generate final plot to check normalisation
    if(plots){
     print("Generating final plot");
     d1.o = density(beta1.v);
     d2.o = density(beta2.v);
     d2n.o = density(nbeta2.v);
     ymax = max(d2.o$y,d1.o$y,d2n.o$y);
     pdf(paste("CheckBMIQ-",sampleID,".pdf",sep=""),width=6,height=4)
     plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1));
     points(d1.o$x,d1.o$y,col="red",type="l");
     points(d2n.o$x,d2n.o$y,col="blue",type="l");
     legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
     dev.off();
    }

    print(paste("Finished for sample ",sampleID,sep=""));

    return(list(nbeta=pnbeta.v,class1=class1.v,class2=class2.v,av1=classAV1.v,av2=classAV2.v,hf=hf,th1=nth1.v,th2=th2.v));

} # }}} end function BMIQ

CheckBMIQ = function(beta.v,design.v,pnbeta.v){### {{{ pnbeta is BMIQ normalised profile

    type1.idx = which(design.v==1);
    type2.idx = which(design.v==2);

    beta1.v = beta.v[type1.idx];
    beta2.v = beta.v[type2.idx];
    pnbeta2.v = pnbeta.v[type2.idx];
  
} # }}} end of function CheckBMIQ

CalibrateUnitInterval=function(datM,onlyIfOutside=TRUE){ # {{{

    rangeBySample=data.frame(lapply(data.frame(t(datM)),range,na.rm=TRUE))
    minBySample=as.numeric(rangeBySample[1,])
    maxBySample=as.numeric(rangeBySample[2,])
    if (onlyIfOutside) { indexSamples=which((minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample))
       }
    if (!onlyIfOutside) { indexSamples=1:length(minBySample)}
    if ( length(indexSamples)>=1 ){
    for ( i in indexSamples) {
    y1=c(0.001,0.999) 
    x1=c(minBySample[i],maxBySample[i])
    lm1=lm( y1 ~ x1 )
    intercept1=coef(lm1)[[1]]
    slope1=coef(lm1)[[2]]
    datM[i,]=intercept1+slope1*datM[i,]
    } # end of for loop
    }
    datM
} # }}} end of function for calibrating to [0,1]
RamsinghLab/ozymandias documentation built on May 9, 2019, 9:21 a.m.