tests/dnamage/NORMALIZATION.R

### ORIGINAL AUTHOR: Andrew Teschendorff
# The original BMIQ function from Teschendorff 2013 adjusts for the type-2 bias in 
# Illumina Infinium 450k data.
# Later functions and edits were provided by yours truly, Steve Horvath.
# I changed the code so that one can calibrate methylation data to a gold standard.
# Specifically, I took version v_1.2 by Teschendorff  and fixed minor issues. 
# Also I made the code more robust e.g. by changing the optimization algorithm.
# Toward this end, I used the method="Nelder-Mead" in optim()

### Later functions and edits by Steve Horvath 
### # Steve Horvath took version v_1.2 by Teschendorff 
# and fixed minor errors. Also he made the code more robust.
# Importantly, SH changed the optimization algorithm to make it #more robust. 
# SH used method="Nelder-Mead" in optim() since the other #optimization method sometimes gets stuck.
#Toward this end, the function blc was replaced by blc2.



require(RPMM);



betaEst2=function (y, w, weights) 
{
    yobs = !is.na(y)
    if (sum(yobs) <= 1) 
        return(c(1, 1))
    y = y[yobs]
    w = w[yobs]
    weights = weights[yobs]
    N = sum(weights * w)
    p = sum(weights * w * y)/N
    v = sum(weights * w * y * y)/N - p * p
    logab = log(c(p, 1 - p)) + log(pmax(1e-06, p * (1 - p)/v - 
        1))
    if (sum(yobs) == 2) 
        return(exp(logab))
    opt = try(optim(logab, betaObjf, ydata = y, wdata = w, weights = weights, 
        method = "Nelder-Mead",control=list(maxit=50) ), silent = TRUE)
    if (inherits(opt, "try-error")) 
        return(c(1, 1))
    exp(opt$par)
} # end of function betaEst



blc2=function (Y, w, maxiter = 25, tol = 1e-06, weights = NULL, verbose = TRUE) 
{
    Ymn = min(Y[Y > 0], na.rm = TRUE)
    Ymx = max(Y[Y < 1], na.rm = TRUE)
    Y = pmax(Y, Ymn/2)
    Y = pmin(Y, 1 - (1 - Ymx)/2)
    Yobs = !is.na(Y)
    J = dim(Y)[2]
    K = dim(w)[2]
    n = dim(w)[1]
    if (n != dim(Y)[1]) 
        stop("Dimensions of w and Y do not agree")
    if (is.null(weights)) 
        weights = rep(1, n)
    mu = a = b = matrix(Inf, K, J)
    crit = Inf
    for (i in 1:maxiter) {
        warn0 = options()$warn
        options(warn = -1)
        eta = apply(weights * w, 2, sum)/sum(weights)
        mu0 = mu
        for (k in 1:K) {
            for (j in 1:J) {
                ab = betaEst2(Y[, j], w[, k], weights)
                a[k, j] = ab[1]
                b[k, j] = ab[2]
                mu[k, j] = ab[1]/sum(ab)
            }
        }
        ww = array(0, dim = c(n, J, K))
        for (k in 1:K) {
            for (j in 1:J) {
                ww[Yobs[, j], j, k] = dbeta(Y[Yobs[, j], j], 
                  a[k, j], b[k, j], log = TRUE)
            }
        }
        options(warn = warn0)
        w = apply(ww, c(1, 3), sum, na.rm = TRUE)
        wmax = apply(w, 1, max)
        for (k in 1:K) w[, k] = w[, k] - wmax
        w = t(eta * t(exp(w)))
        like = apply(w, 1, sum)
        w = (1/like) * w
        llike = weights * (log(like) + wmax)
        crit = max(abs(mu - mu0))
        if (verbose) 
            print(crit)
        if (crit < tol) 
            break
    }
    return(list(a = a, b = b, eta = eta, mu = mu, w = w, llike = sum(llike)))
}











# The function BMIQcalibration was created by Steve Horvath by heavily recycling code
# from A. Teschendorff's BMIQ function.
# BMIQ stands for beta mixture quantile normalization.
# Explanation: datM is a data frame with Illumina beta values (rows are samples, colums are CpGs.
# goldstandard is a numeric vector with beta values that is used as gold standard for calibrating the columns of datM.
# The length of goldstandard has to equal the number of columns of datM.
# Example code: First we impute missing values.
# library(WGCNA); dimnames1=dimnames(datMeth)
# datMeth= data.frame(t(impute.knn(as.matrix(t(datMeth)))$data))
# dimnames(datMeth)=dimnames1
# gold.mean=as.numeric(apply(datMeth,2,mean,na.rm=TRUE))
#datMethCalibrated=BMIQcalibration(datM=datMeth,goldstandard.beta=gold.mean)

BMIQcalibration=function(datM,goldstandard.beta,nL=3,doH=TRUE,nfit=20000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=FALSE,calibrateUnitInterval=TRUE){
    if (length(goldstandard.beta) !=dim(datM)[[2]] ) {stop("Error in function arguments length(goldstandard.beta) !=dim(datM)[[2]]. Consider transposing datM.")}
    if (plots ) {par(mfrow=c(2,2))}
    beta1.v = goldstandard.beta
    
    if (calibrateUnitInterval ) {datM=CalibrateUnitInterval(datM)}
    
### estimate initial weight matrix from type1 distribution
    w0.m = matrix(0,nrow=length(beta1.v),ncol=nL);
    w0.m[which(beta1.v <= th1.v[1]),1] = 1;
    w0.m[intersect(which(beta1.v > th1.v[1]),which(beta1.v <= th1.v[2])),2] = 1;
    w0.m[which(beta1.v > th1.v[2]),3] = 1;
### fit type1
    print("Fitting EM beta mixture to goldstandard probes");
    set.seed(1)
    rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v))  )   ,replace=FALSE)
    em1.o = blc(matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
    subsetclass1.v = apply(em1.o$w,1,which.max);

    ## MATT
    subsetth1.v = c(
        mean(quantile(beta1.v[rand.idx[subsetclass1.v==1]], probs=0.9),
             quantile(beta1.v[rand.idx[subsetclass1.v==2]], probs=0.1)),
        mean(quantile(beta1.v[rand.idx[subsetclass1.v==2]], probs=0.9),
             quantile(beta1.v[rand.idx[subsetclass1.v==3]], probs=0.1)));

        #c(
        #mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),
        #     min(beta1.v[rand.idx[subsetclass1.v==2]])),
        #mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),
        #     min(beta1.v[rand.idx[subsetclass1.v==3]])));

        
    class1.v = rep(2,length(beta1.v));
    class1.v[which(beta1.v < subsetth1.v[1])] = 1;
    class1.v[which(beta1.v > subsetth1.v[2])] = 3;
    nth1.v = subsetth1.v;
    print("Done");
    
### generate plot from estimated mixture
    if(plots){
        print("Check");
        tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
        tmpB.v = vector();
        for(l in 1:nL){
            tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
        }
        plot(density(beta1.v),main= paste("Type1fit-", sep=""));
        d.o = density(tmpB.v);
        points(d.o$x,d.o$y,col="green",type="l")
        legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
    }
    
### Estimate Modes 
    if (  sum(class1.v==1)==1 ){ mod1U= beta1.v[class1.v==1]}
    if (  sum(class1.v==3)==1 ){ mod1M= beta1.v[class1.v==3]}
    if (  sum(class1.v==1) >1){ 
        d1U.o = density(beta1.v[class1.v==1])
        mod1U = d1U.o$x[which.max(d1U.o$y)]
    }
    if (  sum(class1.v==3)>1 ){ 
        d1M.o = density(beta1.v[class1.v==3])
        mod1M = d1M.o$x[which.max(d1M.o$y)]
    }
    
### BETA 2
    for (ii in 1:dim(datM)[[1]] ){
        printFlush(paste("ii=",ii, "of", dim(datM)[[1]]))
        printFlush(date())
        sampleID=ii
        beta2.v = as.numeric(datM[ii,])
        
        d2U.o = density(beta2.v[which(beta2.v<0.4)]);
        d2M.o = density(beta2.v[which(beta2.v>0.6)]);
        mod2U = d2U.o$x[which.max(d2U.o$y)]
        mod2M = d2M.o$x[which.max(d2M.o$y)]
        
### now deal with type2 fit
        th2.v = vector();
        th2.v[1] = nth1.v[1] + (mod2U-mod1U);
        th2.v[2] = nth1.v[2] + (mod2M-mod1M);
        
### estimate initial weight matrix 
        w0.m = matrix(0,nrow=length(beta2.v),ncol=nL);
        w0.m[which(beta2.v <= th2.v[1]),1] = 1;
        w0.m[intersect(which(beta2.v > th2.v[1]),which(beta2.v <= th2.v[2])),2] = 1;
        w0.m[which(beta2.v > th2.v[2]),3] = 1;
        
        print("Fitting EM beta mixture to input probes");
                                        # I fixed an error in the following line (replaced beta1 by beta2)
        set.seed(1)
        rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE)   ,replace=FALSE)
        em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol,verbose=TRUE);
        print("Done");
        
### for type II probes assign to state (unmethylated, hemi or full methylation)
        subsetclass2.v = apply(em2.o$w,1,which.max);
        
        
        if (sum(subsetclass2.v==2)>0 ){
            subsetth2.v = c(mean(max(beta2.v[rand.idx[subsetclass2.v==1]]),min(beta2.v[rand.idx[subsetclass2.v==2]])),
                mean(max(beta2.v[rand.idx[subsetclass2.v==2]]),min(beta2.v[rand.idx[subsetclass2.v==3]])));
        }
        if (sum(subsetclass2.v==2)==0 ){
            subsetth2.v = c(1/2*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 1/2*mean(beta2.v[rand.idx[subsetclass2.v==3]]), 1/3*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 2/3*mean(beta2.v[rand.idx[subsetclass2.v==3]]));
        }
        
        

        class2.v = rep(2,length(beta2.v));
        class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
        class2.v[which(beta2.v >= subsetth2.v[2])] = 3;
        
### generate plot
        if(plots){
            tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
            tmpB.v = vector();
            for(lt in 1:nL){
                tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
            }
            plot(density(beta2.v),  main= paste("Type2fit-",sampleID,sep="")  );
            d.o = density(tmpB.v);
            points(d.o$x,d.o$y,col="green",type="l")
            legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
        }
        
        classAV1.v = vector();classAV2.v = vector();
        for(l in 1:nL){
            classAV1.v[l] =  em1.o$mu[l,1];
            classAV2.v[l] =  em2.o$mu[l,1];
        }
        
### start normalising input probes
        print("Start normalising input probes");
        nbeta2.v = beta2.v;
### select U probes
        lt = 1;
        selU.idx = which(class2.v==lt);
        selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
        selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
        p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
        q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
        nbeta2.v[selUR.idx] = q.v;
        p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);
### find corresponding quantile in type I distribution
        q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
        nbeta2.v[selUL.idx] = q.v;
        
### select M probes
        lt = 3;
        selM.idx = which(class2.v==lt);
        selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
        selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
        p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
        q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
        nbeta2.v[selMR.idx] = q.v;
        
        
        if(doH){ ### if TRUE also correct type2 hemimethylated probes
### select H probes and include ML probes (left ML tail is not well described by a beta-distribution).
            lt = 2;
            selH.idx = c(which(class2.v==lt),selML.idx);
            minH = min(beta2.v[selH.idx],na.rm=TRUE)
            maxH = max(beta2.v[selH.idx],na.rm=TRUE)
            deltaH = maxH - minH;
#### need to do some patching
            deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
            deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)
            
            ## new maximum of H probes should be
            nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;
            ## new minimum of H probes should be
            nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
            ndeltaH = nmaxH - nminH;
            
### perform conformal transformation (shift+dilation)
            ## new_beta_H(i) = a + hf*(beta_H(i)-minH);
            hf = ndeltaH/deltaH ;
### fix lower point first
            nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);
            
        }
        
        
### generate final plot to check normalisation
        if(plots){
            print("Generating final plot");
            d1.o = density(beta1.v);
            d2.o = density(beta2.v);
            d2n.o = density(nbeta2.v);
            ymax = max(d2.o$y,d1.o$y,d2n.o$y);
            plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1), main=paste("CheckBMIQ-",sampleID,sep="") );
            points(d1.o$x,d1.o$y,col="red",type="l");
            points(d2n.o$x,d2n.o$y,col="blue",type="l");
            legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
        }
        
        datM[ii,]= nbeta2.v ;
    } # end of for (ii=1 loop
    datM
} # end of function BMIQcalibration





BMIQ = function(beta.v,design.v,nL=3,doH=TRUE,nfit=50000,th1.v=c(0.2,0.75),th2.v=NULL,niter=5,tol=0.001,plots=TRUE,sampleID=1,calibrateUnitInterval=TRUE){
    
    if (calibrateUnitInterval) {
        rangeBySample=range(beta.v,na.rm=TRUE)
        minBySample=rangeBySample[1]
        maxBySample=rangeBySample[2]
        if ( (minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample) ) {
            y1=c(0.001,.999) 
            x1=c(minBySample,maxBySample)
            lm1=lm( y1 ~ x1 )
            intercept1=coef(lm1)[[1]]
            slope1=coef(lm1)[[2]]
            beta.v=intercept1+slope1*beta.v
        } # end of if
    } # end of if (calibrateUnitInterval
    
    
    type1.idx = which(design.v==1);
    type2.idx = which(design.v==2);
    
    beta1.v = beta.v[type1.idx];
    beta2.v = beta.v[type2.idx];
    
    
### estimate initial weight matrix from type1 distribution
    w0.m = matrix(0,nrow=length(beta1.v),ncol=nL);
    w0.m[which(beta1.v <= th1.v[1]),1] = 1;
    w0.m[intersect(which(beta1.v > th1.v[1]),which(beta1.v <= th1.v[2])),2] = 1;
    w0.m[which(beta1.v > th1.v[2]),3] = 1;
    
### fit type1
    print("Fitting EM beta mixture to goldstandard probes");
    set.seed(1)
    rand.idx = sample(1:length(beta1.v),min(c(nfit, length(beta1.v))  )   ,replace=FALSE)
    em1.o = blc2(Y=matrix(beta1.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
    subsetclass1.v = apply(em1.o$w,1,which.max);
    subsetth1.v = c(mean(max(beta1.v[rand.idx[subsetclass1.v==1]]),min(beta1.v[rand.idx[subsetclass1.v==2]])),mean(max(beta1.v[rand.idx[subsetclass1.v==2]]),min(beta1.v[rand.idx[subsetclass1.v==3]],na.rm=TRUE)));
    class1.v = rep(2,length(beta1.v));
    class1.v[which(beta1.v < subsetth1.v[1])] = 1;
    class1.v[which(beta1.v > subsetth1.v[2])] = 3;
    nth1.v = subsetth1.v;
    print("Done");
    
### generate plot from estimated mixture
    if(plots){
        print("Check");
        tmpL.v = as.vector(rmultinom(1:nL,length(beta1.v),prob=em1.o$eta));
        tmpB.v = vector();
        for(l in 1:nL){
            tmpB.v = c(tmpB.v,rbeta(tmpL.v[l],em1.o$a[l,1],em1.o$b[l,1]));
        }
        
        pdf(paste("Type1fit-",sampleID,".pdf",sep=""),width=6,height=4);
        plot(density(beta1.v));
        d.o = density(tmpB.v);
        points(d.o$x,d.o$y,col="green",type="l")
        legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
        dev.off();
    }
    
    
    
### Estimate Modes 
    if (  sum(class1.v==1)==1 ){ mod1U= beta1.v[class1.v==1]}
    if (  sum(class1.v==3)==1 ){ mod1M= beta1.v[class1.v==3]}
    if (  sum(class1.v==1) >1){ 
        d1U.o = density(beta1.v[class1.v==1])
        mod1U = d1U.o$x[which.max(d1U.o$y)]
    }
    if (  sum(class1.v==3)>1 ){ 
        d1M.o = density(beta1.v[class1.v==3])
        mod1M = d1M.o$x[which.max(d1M.o$y)]
    }
    
    
    d2U.o = density(beta2.v[which(beta2.v<0.4)]);
    d2M.o = density(beta2.v[which(beta2.v>0.6)]);
    mod2U = d2U.o$x[which.max(d2U.o$y)]
    mod2M = d2M.o$x[which.max(d2M.o$y)]
    
    
### now deal with type2 fit
    th2.v = vector();
    th2.v[1] = nth1.v[1] + (mod2U-mod1U);
    th2.v[2] = nth1.v[2] + (mod2M-mod1M);
    
### estimate initial weight matrix 
    w0.m = matrix(0,nrow=length(beta2.v),ncol=nL);
    w0.m[which(beta2.v <= th2.v[1]),1] = 1;
    w0.m[intersect(which(beta2.v > th2.v[1]),which(beta2.v <= th2.v[2])),2] = 1;
    w0.m[which(beta2.v > th2.v[2]),3] = 1;
    
    print("Fitting EM beta mixture to input probes");
    set.seed(1)
    rand.idx = sample(1:length(beta2.v),min(c(nfit, length(beta2.v)),na.rm=TRUE)   ,replace=FALSE)
    em2.o = blc2(Y=matrix(beta2.v[rand.idx],ncol=1),w=w0.m[rand.idx,],maxiter=niter,tol=tol);
    print("Done");
    
### for type II probes assign to state (unmethylated, hemi or full methylation)
    subsetclass2.v = apply(em2.o$w,1,which.max);
    
    
    
    if (sum(subsetclass2.v==2)>0 ){
        subsetth2.v = c(mean(max(beta2.v[rand.idx[subsetclass2.v==1]]),min(beta2.v[rand.idx[subsetclass2.v==2]])),
            mean(max(beta2.v[rand.idx[subsetclass2.v==2]]),min(beta2.v[rand.idx[subsetclass2.v==3]])));
    }
    if (sum(subsetclass2.v==2)==0 ){
        subsetth2.v = c(1/2*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 1/2*mean(beta2.v[rand.idx[subsetclass2.v==3]]), 1/3*max(beta2.v[rand.idx[subsetclass2.v==1]])+ 2/3*mean(beta2.v[rand.idx[subsetclass2.v==3]]));
    }
    

    class2.v = rep(2,length(beta2.v));
    class2.v[which(beta2.v <= subsetth2.v[1])] = 1;
    class2.v[which(beta2.v >= subsetth2.v[2])] = 3;
    
    
### generate plot
    if(plots){
        tmpL.v = as.vector(rmultinom(1:nL,length(beta2.v),prob=em2.o$eta));
        tmpB.v = vector();
        for(lt in 1:nL){
            tmpB.v = c(tmpB.v,rbeta(tmpL.v[lt],em2.o$a[lt,1],em2.o$b[lt,1]));
        }
        pdf(paste("Type2fit-",sampleID,".pdf",sep=""),width=6,height=4);
        plot(density(beta2.v));
        d.o = density(tmpB.v);
        points(d.o$x,d.o$y,col="green",type="l")
        legend(x=0.5,y=3,legend=c("obs","fit"),fill=c("black","green"),bty="n");
        dev.off();
    }
    
    classAV1.v = vector();classAV2.v = vector();
    for(l in 1:nL){
        classAV1.v[l] =  em1.o$mu[l,1];
        classAV2.v[l] =  em2.o$mu[l,1];
    }
    
### start normalising input probes
    print("Start normalising input probes");
    nbeta2.v = beta2.v;
### select U probes
    lt = 1;
    selU.idx = which(class2.v==lt);
    selUR.idx = selU.idx[which(beta2.v[selU.idx] > classAV2.v[lt])];
    selUL.idx = selU.idx[which(beta2.v[selU.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
    p.v = pbeta(beta2.v[selUR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
    nbeta2.v[selUR.idx] = q.v;
    p.v = pbeta(beta2.v[selUL.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=TRUE);
### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=TRUE);
    nbeta2.v[selUL.idx] = q.v;
    
### select M probes
    lt = 3;
    selM.idx = which(class2.v==lt);
    selMR.idx = selM.idx[which(beta2.v[selM.idx] > classAV2.v[lt])];
    selML.idx = selM.idx[which(beta2.v[selM.idx] < classAV2.v[lt])];
### find prob according to typeII distribution
    p.v = pbeta(beta2.v[selMR.idx],em2.o$a[lt,1],em2.o$b[lt,1],lower.tail=FALSE);
### find corresponding quantile in type I distribution
    q.v = qbeta(p.v,em1.o$a[lt,1],em1.o$b[lt,1],lower.tail=FALSE);
    nbeta2.v[selMR.idx] = q.v;
    
    
    if(doH){ ### if TRUE also correct type2 hemimethylated probes
### select H probes and include ML probes (left ML tail is not well described by a beta-distribution).
        lt = 2;
        selH.idx = c(which(class2.v==lt),selML.idx);
        minH = min(beta2.v[selH.idx],na.rm=TRUE)
        maxH = max(beta2.v[selH.idx],na.rm=TRUE)
        deltaH = maxH - minH;
#### need to do some patching
        deltaUH = -max(beta2.v[selU.idx],na.rm=TRUE) + min(beta2.v[selH.idx],na.rm=TRUE)
        deltaHM = -max(beta2.v[selH.idx],na.rm=TRUE) + min(beta2.v[selMR.idx],na.rm=TRUE)
        
        ## new maximum of H probes should be
        nmaxH = min(nbeta2.v[selMR.idx],na.rm=TRUE) - deltaHM;
        ## new minimum of H probes should be
        nminH = max(nbeta2.v[selU.idx],na.rm=TRUE) + deltaUH;
        ndeltaH = nmaxH - nminH;
        
### perform conformal transformation (shift+dilation)
        ## new_beta_H(i) = a + hf*(beta_H(i)-minH);
        hf = ndeltaH/deltaH ;
### fix lower point first
        nbeta2.v[selH.idx] = nminH + hf*(beta2.v[selH.idx]-minH);
        
    }
    
    pnbeta.v = beta.v;
    pnbeta.v[type1.idx] = beta1.v;
    pnbeta.v[type2.idx] = nbeta2.v;
    
### generate final plot to check normalisation
    if(plots){
        print("Generating final plot");
        d1.o = density(beta1.v);
        d2.o = density(beta2.v);
        d2n.o = density(nbeta2.v);
        ymax = max(d2.o$y,d1.o$y,d2n.o$y);
        pdf(paste("CheckBMIQ-",sampleID,".pdf",sep=""),width=6,height=4)
        plot(density(beta2.v),type="l",ylim=c(0,ymax),xlim=c(0,1));
        points(d1.o$x,d1.o$y,col="red",type="l");
        points(d2n.o$x,d2n.o$y,col="blue",type="l");
        legend(x=0.5,y=ymax,legend=c("type1","type2","type2-BMIQ"),bty="n",fill=c("red","black","blue"));
        dev.off();
    }
    
    print(paste("Finished for sample ",sampleID,sep=""));
    
    return(list(nbeta=pnbeta.v,class1=class1.v,class2=class2.v,av1=classAV1.v,av2=classAV2.v,hf=hf,th1=nth1.v,th2=th2.v));
    
}



CheckBMIQ = function(beta.v,design.v,pnbeta.v){### pnbeta is BMIQ normalised profile
    
    type1.idx = which(design.v==1);
    type2.idx = which(design.v==2);
    
    beta1.v = beta.v[type1.idx];
    beta2.v = beta.v[type2.idx];
    pnbeta2.v = pnbeta.v[type2.idx];
    
} # end of function CheckBMIQ
















CalibrateUnitInterval=function(datM,onlyIfOutside=TRUE){

    rangeBySample=data.frame(lapply(data.frame(t(datM)),range,na.rm=TRUE))
    minBySample=as.numeric(rangeBySample[1,])
    maxBySample=as.numeric(rangeBySample[2,])
    if (onlyIfOutside) { indexSamples=which((minBySample<0 | maxBySample>1) & !is.na(minBySample) & !is.na(maxBySample))
                     }
    if (!onlyIfOutside) { indexSamples=1:length(minBySample)}
    if ( length(indexSamples)>=1 ){
        for ( i in indexSamples) {
            y1=c(0.001,0.999) 
            x1=c(minBySample[i],maxBySample[i])
            lm1=lm( y1 ~ x1 )
            intercept1=coef(lm1)[[1]]
            slope1=coef(lm1)[[2]]
            datM[i,]=intercept1+slope1*datM[i,]
        } # end of for loop
    }
    datM
} #end of function for calibrating to [0,1]
perishky/meffonym documentation built on Nov. 8, 2024, 8:36 p.m.