R/quantileNormalize.R
In blima: Tools for the preprocessing and analysis of the Illumina microarrays on the detector (bead) level

# Functions for bead level quantile normalization of Illumina data processed by beadarray package. 
# Author: Vojtěch Kulvait
# Licence: GPL-3
###############################################################################

singleArrayNormalize <- function
###This function does quantile normalization of object beadLevelData from package beadarray.
###Internal function not intended to direct use. Please use quantileNormalize.
##title<<Bead level quantile normalization.
(b,##<<Object beadLevelData from package beadarray
normalizationMod=NULL,##<<NULL for normalization of all input b. Otherwise specifies logical vector of the length equals to the number of arrays in b. 
channelNormalize="Grn",##<<Name of channel to normalize.
channelOutput="qua",##<<Name of output normalized channel.
channelInclude=NULL,##<<This field allows user to set channel with weights which have to be in {0,1}. 
##All zero weighted items are excluded from quantile normalization and the value asigned to such probes is a close to value which would be assigned to them if not being excluded. 
##You can turn this off by setting this NULL. This option may be used together with bacgroundCorrect method or/and with beadarray QC (defaults to NULL).
dst##<<This field must be sorted. It is a distribution of values to assign to ports.
##By default this distribution is computed using meanDistribution function.
)
{
    checkIntegrityOfSingleBeadLevelDataObject(b, "warn")
    if(missing(dst))
        stop("Field dst is mandatory");
    x=b
    if(is.null(channelInclude))
    {
        nowts=TRUE;
    }else
    {
        nowts=FALSE;
    }
    
    bdc = integer();
    sw = integer();
    iteratorSet = 1:nrow(b);
    if(!is.null(normalizationMod))
    {
        iteratorSet = iteratorSet[normalizationMod];
    }
    for(i in iteratorSet)
    {
        bdc[i] = nrow(b[[i]])
        if(nowts)
        {
            sw[i] = bdc[i];
        }else
        {
            sw[i] = sum(b[[i]][,channelInclude])#\in {0,1}
        }
    }
    prvku = length(dst);
    
    for(i in iteratorSet)
    {
        q = double(bdc[i])
        sorted = sort(b[[i]][,channelNormalize], index.return=TRUE)$ix
        cnt = 0;
        if(nowts)
        {
            for(j in 1:bdc[i])
            {
                cnt = cnt + 1;
                pos = round((sw[i]-prvku+(prvku-1)*cnt)/(sw[i]-1))
                q[sorted[j]] = dst[pos]
            }
        }else
        {
            sortedweights = b[[i]][, channelInclude][sorted]
            for(j in 1:bdc[i])
            {
                if(sortedweights[j]==1)
                {
                    cnt = cnt + 1;
                    pos = round((sw[i]-prvku+(prvku-1)*cnt)/(sw[i]-1))
                    q[sorted[j]] = dst[pos]
                }else
                {
                    if(cnt==0)
                    {
                        q[sorted[j]] = dst[1]
                    }else
                    {
                        pos = round((sw[i]-prvku+(prvku-1)*cnt)/(sw[i]-1))
                        q[sorted[j]] = dst[pos]
                    }
                }
            }
        }
        if(cnt!=sw[i])
            stop("ERROR HERE, PLEASE REPORT BUG!")
        x = setWeights(x, wts=q, array=i, wtName=channelOutput)
    }
    return(x)
}

singleNumberOfDistributionElements <-function
###Internal function
##title<<Internal
(b,##<<Object beadLevelData from package beadarray
normalizationMod=NULL,##<<NULL for normalization of all input b. Otherwise specifies logical vector of the length equals to the number of arrays in b or list of such vectors if b is a list of beadLevelData classes. 
channelInclude=NULL##<This field allows user to set channel with weights which have to be in {0,1}. 
)
{
    if(is.null(channelInclude))
    {
        nowts=TRUE;
    }else
    {
        nowts=FALSE;
    }
    #object inicialization
    sumWeights <- integer();
    
    iteratorSet = 1:nrow(b);
    if(!is.null(normalizationMod))
    {
        iteratorSet = iteratorSet[normalizationMod];
    }
    for(i in iteratorSet)
    {
        if(nowts)
        {
            sumWeights = c(sumWeights, nrow(b[[i]]));
        }else
        {
            sumWeights = c(sumWeights, sum(b[[i]][,channelInclude]))#\in {0,1}
        }
    }
    return(min(sumWeights))
}

numberOfDistributionElements <-function
###Internal function
##title<<Internal
(b,##<<Object beadLevelData from package beadarray or list of these objects
normalizationMod=NULL,##<<NULL for normalization of all input b. Otherwise specifies logical vector of the length equals to the number of arrays in b or list of such vectors if b is a list of beadLevelData classes. 
channelInclude=NULL##<This field allows user to set channel with weights which have to be in {0,1}. 
)
{
    if(class(b)=="list")
    {
        sumWeights <- integer();
        if(is.null(normalizationMod))
        {
            for(i in 1:length(b))
            {
                sumWeights[i] = singleNumberOfDistributionElements(b[[i]], NULL, channelInclude)
            }
        }else
        {
            for(i in 1:length(b))
            {
                sumWeights[i] = singleNumberOfDistributionElements(b[[i]], normalizationMod[[i]], channelInclude)
            }
        }
        return(min(sumWeights));
    }else
    {
        return(singleNumberOfDistributionElements(b, normalizationMod, channelInclude));
    }
}

quantileNormalize <- structure(function
###This function does quantile normalization of object beadLevelData from package beadarray.
##title<<Bead level quantile normalization.
(b,##<<Object beadLevelData from package beadarray or list of these objects
normalizationMod=NULL,##<<NULL for normalization of all input b. Otherwise specifies logical vector of the length equals to the number of arrays in b or list of such vectors if b is a list of beadLevelData classes. 
channelNormalize="Grn",##<<Name of channel to normalize.
channelOutput="qua",##<<Name of output normalized channel.
channelInclude=NULL,##<<This field allows user to set channel with weights which have to be in {0,1}. 
##All zero weighted items are excluded from quantile normalization and the value asigned to such probes is a close to value which would be assigned to them if not being excluded. 
##You can turn this off by setting this NULL. This option may be used together with bacgroundCorrect method or/and with beadarray QC (defaults to NULL).
dst##<<User can specify sorted vector which represents distribution that should be assigned to items.
#By default this distribution is computed using meanDistribution function.
)
{
    waslist = checkIntegrity(b, "warn")
    if(!waslist)
    {
        b = list(b)
        if(!is.null(normalizationMod))
        {
            normalizationMod = list(normalizationMod)
        }
    }
    channelExistsIntegrityWithLogicalVectorList(b, normalizationMod, channelNormalize, "error")
    if(!is.null(channelInclude))
    {
        channelExistsIntegrityWithLogicalVectorList(b, normalizationMod, channelInclude, "error")
    }

    if(missing(dst) || is.null(dst))
    {
        prvku <- numberOfDistributionElements(b, normalizationMod, channelInclude);
        dst <- meanDistribution(b, normalizationMod, channelNormalize, channelInclude, prvku);
    }
    x=list();
    for(i in 1:length(b))
    {
        x=c(x, singleArrayNormalize(b[[i]],normalizationMod[[i]],channelNormalize,channelOutput,channelInclude,dst));
    }
    if(waslist)
    {
        return(x);
    }else
    {
        return(x[[1]])
    }
}, ex=function()
{
    if(require("blimaTestingData") && interactive())
    {
        #To perform background correction, variance stabilization and quantile normalization.
        data(blimatesting)
        #Prepare logical vectors corresponding to conditions A(groups1Mod), E(groups2Mod) and both(c).
        groups1 = "A";
        groups2 = "E";
        sampleNames = list()
        processingMod = list()
        for(i in 1:length(blimatesting))
        {
            p = pData(blimatesting[[i]]@experimentData$phenoData)
            processingMod[[i]] = p$Group %in% c(groups1, groups2);
            sampleNames[[i]] = p$Name
        }
        #Background correction and quantile normalization followed by testing including log2TransformPositive transformation.
        blimatesting = bacgroundCorrect(blimatesting, normalizationMod = processingMod, channelBackgroundFilter="bgf")
        blimatesting = nonPositiveCorrect(blimatesting, normalizationMod = processingMod, channelCorrect="GrnF",  channelBackgroundFilter="bgf", channelAndVector="bgf")
        blimatesting = varianceBeadStabilise(blimatesting, normalizationMod = processingMod,
                quality="GrnF", channelInclude="bgf", channelOutput="vst")
        blimatesting = quantileNormalize(blimatesting, normalizationMod = processingMod,
                channelNormalize="vst", channelOutput="qua", channelInclude="bgf")
    }else
    {
        print("To run this example, please install blimaTestingData package from bioconductor by running BiocManager::install('blimaTestingData').");
    }
})

initMeanDistribution <-function
###This is internal function not intended to direct use which initializes mean distribution.
(srt,##<<vector of sorted values
prvku ##<<number of items in meanDistribution
)
{
    if(prvku>length(srt))
    {
        stop("Length of meanDistribution is less or equal length of particullar array!");
    }
#    meanDistribution <- double(prvku);
#    vsz = length(srt)
#    prev = 1;
#    prvek = srt[1];
#    if(vsz>1)
#    {
#        for(j in 2:vsz)
#        {
#            pos = round((vsz-prvku+(prvku-1)*j)/(vsz-1))
#            if(pos==prev)
#            {
#                prvek=c(prvek, srt[j])
#            }else
#            {
#                meanDistribution[pos-1] = mean(prvek);
#                prvek = srt[j];
#                prev = pos;
#            }
#        }
#    }
#    meanDistribution[prev] = mean(prvek);
#    return(meanDistribution);
	return(interpolateSortedVector(srt, prvku))
	
}

updateMeanDistribution<-function
###This is internal function not intended to direct use. Updates mean distribution.
(meanDistribution, #semicreated meanDistribution
srt, ##<<vector of sorted values
arraysUsed ##<<number of arrays allready used to create distribution
)
{
    if(length(meanDistribution)>length(srt))
    {
        stop("Length of meanDistribution is less or equal length of particullar array!");
    }
#    if(length(meanDistribution)<3)
#    {
#        warning("DEBUG:Something weird with meanDistribution!")
#    }
#    prev = 1;
#    prvek = srt[1];
#    vsz = length(srt);
#    prvku = length(meanDistribution);
#    i = arraysUsed + 1;
#    #print(i)
#    if(vsz>1)
#    {
#        for(j in 2:vsz)
#        {
#            pos = round((vsz-prvku+(prvku-1)*j)/(vsz-1))
#            if(pos==prev)
#            {
#                prvek=c(prvek, srt[j])
#            }else
#            {
#                meanDistribution[pos-1] = (1-1/i)*meanDistribution[pos-1] + mean(prvek)/i;
#                prvek = srt[j];
#                prev = pos;
#            }
#        }
#    }
#    meanDistribution[prev] = (1-1/i)*meanDistribution[prev] + mean(prvek)/i;
#    return(meanDistribution);
	prvku = length(meanDistribution)
	interpolatedVector = interpolateSortedVector(srt, prvku)
	i = arraysUsed + 1
	meanDistribution <- (1 - 1/i) * meanDistribution + (1/i) * interpolatedVector
	return(meanDistribution)
	
}

interpolateSortedVector <- function
###Interpolates given sorted vector to the vector of different
###length. It does not sort input vector thus for unsorted vectors do not guarantee functionality.
###Internal function.
##title<<Interpolate sorted vector
(vector,##<<Sorted vector to interpolate.
newSize##<<Size of the vector to produce.
)
{
	if (length(vector) == 0) {
		return(as.double(rep(NA, newSize)))
	}
	if (newSize == 0) {
		return(double())
	}
	#ret = .C("interpolateSortedVector_", as.double(vector), as.integer(length(vector)), 
	#		as.integer(newSize), newVector = double(newSize))$newVector
	ret = interpolateSortedVectorRcpp_(vector, newSize)
	return(ret)
}


meanDistribution <- function
###This function processes arrays in the object beadLevelData from package beadarray and returns sorted double vector.
###The vector has length prvku. And the distribution of this vector is a "mean" of all distributions of distributionChannel quantity in arrays.
###In case that probe numbers are different from prvku it does some averaging.
##title<<Produce sorted double vector with mean distribution.
(b,##<<Object beadLevelData from package beadarray or list of these objects
normalizationMod=NULL,##<<NULL for normalization of all input b. Otherwise specifies logical 
##vector of the length equals to the number of arrays in b or list of such vectors 
##if b is a list of beadLevelData classes (defaults to NULL). 
distributionChannel="Grn",##<<Channel to do mean distribution from (defaults to "Grn").
channelInclude=NULL,##<<This field allows user to set channel with weights which have to be in {0,1}. 
##All zero weighted items are excluded from quantile normalization and the value asigned to such probes is a close to value which would be assigned to them if not being excluded. 
##You can turn this off by setting this NULL. This option may be used together with bacgroundCorrect method or/and with beadarray QC (defaults to NULL).
prvku##<<Number of items in a resulting double vector. Prvku must not be more than minimal number of indluded items in any distributionChannel.
)
{
    if(class(b)!="list")
    {
        b = list(b)
        if(!is.null(normalizationMod))
        {
            normalizationMod = list(normalizationMod)
        }
    }
    checkIntegrityOfListOfBeadLevelDataObjects(b, "warn")
    
    #channelInclude initialization
    if(is.null(channelInclude))
    {
        nowts=TRUE;
    }else
    {
        nowts=FALSE;
    }
    
    n <- numberOfDistributionElements(b, normalizationMod, channelInclude);
    if(missing(prvku) || is.null(prvku))
    {
        prvku <- n
    }
    if(prvku > n)
    {
        stop("Prvku must not be more than minimal number of included items in any distributionChannel.")
    }

    arraysUsed = 0;
    for(i in 1:length(b))
    {
        bbb = b[[i]];
        iteratorSet = 1:nrow(bbb);
        if(!is.null(normalizationMod))
        {
            iteratorSet = iteratorSet[normalizationMod[[i]]];
        }
        for(j in iteratorSet)
        {
            if(nowts)
            {
                srt = sort(bbb[[j]][,distributionChannel]);
            }else
            {
                srt = sort(bbb[[j]][bbb[[j]][,channelInclude]==1,][,distributionChannel])
            }
            
            if(arraysUsed>0)
            {
                meanDistribution <- updateMeanDistribution(meanDistribution, srt, arraysUsed)
            }else
            {
                meanDistribution <- initMeanDistribution(srt, prvku);
            }
            arraysUsed = arraysUsed + 1;
        }
    }

    if(is.unsorted(meanDistribution))
    {
        stop("UNSORTED SEQUENCE - PLEASE REPORT BUG!")
    }
    return(meanDistribution)
}
Any scripts or data that you put into this service are public.
blima documentation built on Nov. 8, 2020, 8:15 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
blima
Tools for the preprocessing and analysis of the Illumina microarrays on the detector (bead) level

R/quantileNormalize.R
In blima: Tools for the preprocessing and analysis of the Illumina microarrays on the detector (bead) level

Try the blima package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

blima Tools for the preprocessing and analysis of the Illumina microarrays on the detector (bead) level

R/quantileNormalize.R In blima: Tools for the preprocessing and analysis of the Illumina microarrays on the detector (bead) level

Try the blima package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

blima
Tools for the preprocessing and analysis of the Illumina microarrays on the detector (bead) level

R/quantileNormalize.R
In blima: Tools for the preprocessing and analysis of the Illumina microarrays on the detector (bead) level