R/calcNormFactors.R
In edgeR: Empirical Analysis of Digital Gene Expression Data in R

Documented in calcNormFactors calcNormFactors.default calcNormFactors.DGEList calcNormFactors.SummarizedExperiment

calcNormFactors <- function(object, ...)
UseMethod("calcNormFactors")

calcNormFactors.DGEList <- function(object, method=c("TMM","TMMwsp","RLE","upperquartile","none"), refColumn=NULL, logratioTrim=.3, sumTrim=0.05, doWeighting=TRUE, Acutoff=-1e10, p=0.75, ...)
#	Scale normalization of RNA-Seq data, for DGEList objects
#	Created 2 October 2014.  Last modified 2 June 2020.
{
	if(!is.null(object$offset)) warning("object contains offsets, which take precedence over library\nsizes and norm factors (and which will not be recomputed).")
	object$samples$norm.factors <- calcNormFactors(object=object$counts, lib.size=object$samples$lib.size, method=method, refColumn=refColumn, logratioTrim=logratioTrim, sumTrim=sumTrim, doWeighting=doWeighting, Acutoff=Acutoff, p=p)
	object
}

calcNormFactors.SummarizedExperiment <- function(object, method=c("TMM","TMMwsp","RLE","upperquartile","none"), refColumn=NULL, logratioTrim=.3, sumTrim=0.05, doWeighting=TRUE, Acutoff=-1e10, p=0.75, ...)
#	Scale normalization of RNA-Seq data, for SummarizedExperiment objects
#	Created 19 March 2020.  Last modified 19 March 2020.
{
	object <- SE2DGEList(object)
	object$samples$norm.factors <- calcNormFactors(object=object$counts, lib.size=object$samples$lib.size, method=method, refColumn=refColumn, logratioTrim=logratioTrim, sumTrim=sumTrim, doWeighting=doWeighting, Acutoff=Acutoff, p=p)
	object
}

calcNormFactors.default <- function(object, lib.size=NULL, method=c("TMM","TMMwsp","RLE","upperquartile","none"), refColumn=NULL, logratioTrim=.3, sumTrim=0.05, doWeighting=TRUE, Acutoff=-1e10, p=0.75, ...)
#	Scale normalization of RNA-Seq data, for count matrices
#	Mark Robinson, Gordon Smyth and edgeR team
#	Created 22 October 2009. Last modified 2 June 2020.
{
#	Check object
	x <- as.matrix(object)
	if(any(is.na(x))) stop("NA counts not permitted")
	nsamples <- ncol(x)

#	Check lib.size
	if(is.null(lib.size)) {
		lib.size <- colSums(x)
	} else {
		if(anyNA(lib.size)) stop("NA lib.sizes not permitted")
		if(length(lib.size) != nsamples) {
			if(length(lib.size) > 1L) warning("calcNormFactors: length(lib.size) doesn't match number of samples",call.=FALSE)
			lib.size <- rep_len(lib.size,nsamples)
		}
	}

#	Check method
#	Backward compatability with previous name
	if(length(method)==1L && method=="TMMwzp") {
		method <- "TMMwsp"
		message("TMMwzp has been renamed to TMMwsp")
	}
	method <- match.arg(method)

#	Remove all zero rows
	allzero <- .rowSums(x>0, nrow(x), nsamples) == 0L
	if(any(allzero)) x <- x[!allzero,,drop=FALSE]

#	Degenerate cases
	if(nrow(x)==0 || nsamples==1) method="none"

#	Calculate factors
	f <- switch(method,
		TMM = {
			if( is.null(refColumn) ) {
				f75 <- suppressWarnings(.calcFactorQuantile(data=x, lib.size=lib.size, p=0.75))
				if(median(f75) < 1e-20) {
					refColumn <- which.max(colSums(sqrt(x)))
				} else {
					refColumn <- which.min(abs(f75-mean(f75)))
				}
			}
			f <- rep_len(NA_real_,nsamples)
			for(i in 1:nsamples)
				f[i] <- .calcFactorTMM(obs=x[,i],ref=x[,refColumn], libsize.obs=lib.size[i], libsize.ref=lib.size[refColumn], logratioTrim=logratioTrim, sumTrim=sumTrim, doWeighting=doWeighting, Acutoff=Acutoff)
			f
		},
		TMMwsp = {
			if( is.null(refColumn) ) refColumn <- which.max(colSums(sqrt(x)))
			f <- rep_len(NA_real_,nsamples)
			for(i in 1:nsamples)
				f[i] <- .calcFactorTMMwsp(obs=x[,i],ref=x[,refColumn], libsize.obs=lib.size[i], libsize.ref=lib.size[refColumn], logratioTrim=logratioTrim, sumTrim=sumTrim, doWeighting=doWeighting, Acutoff=Acutoff)
			f
		},
		RLE = .calcFactorRLE(x)/lib.size,
		upperquartile = .calcFactorQuantile(x,lib.size,p=p),
		none = rep_len(1,nsamples)
	)

#	Factors should multiple to one
	f <- f/exp(mean(log(f)))

#	Output
	names(f) <- colnames(x)
	f
}

.calcFactorRLE <- function(data)
#	Scale factors as in Anders et al (2010)
#	Mark Robinson
#	Created 16 Aug 2010
{
	gm <- exp(rowMeans(log(data)))
	apply(data, 2, function(u) median((u/gm)[gm > 0]))
}

.calcFactorQuantile <- function (data, lib.size, p=0.75)
#	Generalized version of upper-quartile normalization
#	Mark Robinson and Gordon Smyth
#	Created 16 Aug 2010. Last modified 12 Sep 2020.
{
	f <- rep_len(1,ncol(data))
	for (j in seq_len(ncol(data))) f[j] <- quantile(data[,j], probs=p)
	if(min(f)==0) warning("One or more quantiles are zero")
	f / lib.size
}

.calcFactorTMM <- function(obs, ref, libsize.obs=NULL, libsize.ref=NULL, logratioTrim=.3, sumTrim=0.05, doWeighting=TRUE, Acutoff=-1e10)
#	TMM between two libraries
#	Mark Robinson
{
	obs <- as.numeric(obs)
	ref <- as.numeric(ref)

	if( is.null(libsize.obs) ) nO <- sum(obs) else nO <- libsize.obs
	if( is.null(libsize.ref) ) nR <- sum(ref) else nR <- libsize.ref

	logR <- log2((obs/nO)/(ref/nR))          # log ratio of expression, accounting for library size
	absE <- (log2(obs/nO) + log2(ref/nR))/2  # absolute expression
	v <- (nO-obs)/nO/obs + (nR-ref)/nR/ref   # estimated asymptotic variance

#	remove infinite values, cutoff based on A
	fin <- is.finite(logR) & is.finite(absE) & (absE > Acutoff)

	logR <- logR[fin]
	absE <- absE[fin]
	v <- v[fin]

	if(max(abs(logR)) < 1e-6) return(1)

#	taken from the original mean() function
	n <- length(logR)
	loL <- floor(n * logratioTrim) + 1
	hiL <- n + 1 - loL
	loS <- floor(n * sumTrim) + 1
	hiS <- n + 1 - loS

#	keep <- (rank(logR) %in% loL:hiL) & (rank(absE) %in% loS:hiS)
#	a fix from leonardo ivan almonacid cardenas, since rank() can return
#	non-integer values when there are a lot of ties
	keep <- (rank(logR)>=loL & rank(logR)<=hiL) & (rank(absE)>=loS & rank(absE)<=hiS)

	if(doWeighting)
		f <- sum(logR[keep]/v[keep], na.rm=TRUE) / sum(1/v[keep], na.rm=TRUE)
	else
		f <- mean(logR[keep], na.rm=TRUE)

#	Results will be missing if the two libraries share no features with positive counts
#	In this case, return unity
	if(is.na(f)) f <- 0
	2^f
}

.calcFactorTMMwsp <- function(obs, ref, libsize.obs=NULL, libsize.ref=NULL, logratioTrim=.3, sumTrim=0.05, doWeighting=TRUE, Acutoff=-1e10)
#	TMM with pairing of singleton positive counts between the obs and ref libraries
#	Gordon Smyth
#	Created 19 Sep 2018. Last modified 9 Jun 2020.
{
	obs <- as.numeric(obs)
	ref <- as.numeric(ref)

#	epsilon serves as floating-point zero
	eps <- 1e-14

#	Identify zero counts
	pos.obs <- (obs > eps)
	pos.ref <- (ref > eps)
	npos <- 2L * pos.obs + pos.ref

#	Remove double zeros and NAs
	i <- which(npos==0L | is.na(npos))
	if(length(i)) {
		obs <- obs[-i]
		ref <- ref[-i]
		npos <- npos[-i]
	}

#	Check library sizes	
	if(is.null(libsize.obs)) libsize.obs <- sum(obs)
	if(is.null(libsize.ref)) libsize.ref <- sum(ref)

#	Pair up as many singleton positives as possible
#	The unpaired singleton positives are discarded so that no zeros remain
	zero.obs <- (npos == 1L)
	zero.ref <- (npos == 2L)
	k <- (zero.obs | zero.ref)
	n.eligible.singles <- min( sum(zero.obs), sum(zero.ref))
	if(n.eligible.singles > 0L) {
		refk <- sort(ref[k],decreasing=TRUE)[1:n.eligible.singles]
		obsk <- sort(obs[k],decreasing=TRUE)[1:n.eligible.singles]
		obs <- c(obs[!k],obsk)
		ref <- c(ref[!k],refk)
	} else {
		obs <- obs[!k]
		ref <- ref[!k]
	}

#	Any left?
	n <- length(obs)
	if(n==0L) return(1)

#	Compute M and A values
	obs.p <- obs / libsize.obs
	ref.p <- ref / libsize.ref
	M <- log2( obs.p / ref.p )
	A <- 0.5 * log2( obs.p * ref.p )

#	If M all zero, return 1
	if(max(abs(M)) < 1e-6) return(1)

#	M order, breaking ties by shrunk M
	obs.p.shrunk <- (obs+0.5) / (libsize.obs+0.5)
	ref.p.shrunk <- (ref+0.5) / (libsize.ref+0.5)
	M.shrunk <- log2( obs.p.shrunk / ref.p.shrunk )
	o.M <- order(M, M.shrunk)

#	A order
	o.A <- order(A)

#	Trim
	loM <- as.integer(n * logratioTrim) + 1L
	hiM <- n + 1L - loM
	keep.M <- rep_len(FALSE,n)
	keep.M[o.M[loM:hiM]] <- TRUE
	loA <- as.integer(n * sumTrim) + 1L
	hiA <- n + 1L - loA
	keep.A <- rep_len(FALSE,n)
	keep.A[o.A[loA:hiA]] <- TRUE
	keep <- keep.M & keep.A
	M <- M[keep]

#	Average the M values
	if(doWeighting) {
		obs.p <- obs.p[keep]
		ref.p <- ref.p[keep]
		v <- (1-obs.p)/obs.p/libsize.obs + (1-ref.p)/ref.p/libsize.ref
		w <- (1+1e-6) / (v+1e-6)
		TMM <- sum(w*M) / sum(w)
	} else {
		TMM <- mean(M)
	}

	2^TMM
}

Any scripts or data that you put into this service are public.

edgeR documentation built on Jan. 16, 2021, 2:03 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

edgeR
Empirical Analysis of Digital Gene Expression Data in R

R/calcNormFactors.R
In edgeR: Empirical Analysis of Digital Gene Expression Data in R

Defines functions .calcFactorTMMwsp .calcFactorTMM .calcFactorRLE calcNormFactors.default calcNormFactors.SummarizedExperiment calcNormFactors.DGEList calcNormFactors

Documented in calcNormFactors calcNormFactors.default calcNormFactors.DGEList calcNormFactors.SummarizedExperiment

Try the edgeR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

edgeR Empirical Analysis of Digital Gene Expression Data in R

R/calcNormFactors.R In edgeR: Empirical Analysis of Digital Gene Expression Data in R

Defines functions .calcFactorTMMwsp .calcFactorTMM .calcFactorRLE calcNormFactors.default calcNormFactors.SummarizedExperiment calcNormFactors.DGEList calcNormFactors

Documented in calcNormFactors calcNormFactors.default calcNormFactors.DGEList calcNormFactors.SummarizedExperiment

Try the edgeR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

edgeR
Empirical Analysis of Digital Gene Expression Data in R

R/calcNormFactors.R
In edgeR: Empirical Analysis of Digital Gene Expression Data in R