R/geneset-fry.R

Defines functions .fryEffects fry.default fry

Documented in fry fry.default

fry <- function(y,...) UseMethod("fry")

fry.default <- function(y,index=NULL,design=NULL,contrast=ncol(design),geneid=NULL,gene.weights=NULL,standardize="posterior.sd",sort="directional",...)
#	Quick version of roast gene set test assuming equal variances between genes
#	The up and down p-values are equivalent to those from roast with nrot=Inf
#	in the special case of prior.df=Inf.
#	Gordon Smyth and Goknur Giner
#	Created 30 January 2015.  Last modified 9 June 2020.
{
#	Check gene.weights
	if(!is.null(gene.weights)) {
		if(length(gene.weights) != nrow(y)) stop("length of gene.weights should equal nrow(y)")
		if(!is.numeric(gene.weights)) stop("gene.weights should be numeric")
	}

#	Partial matching of extra arguments
#	array.weights and trend.var included for (undocumented) backward compatibility with code before 9 May 2016.
	Dots <- list(...)
	PossibleArgs <- c("array.weights","weights","block","correlation","trend.var","robust","winsor.tail.p")
	if(!is.null(names(Dots))) {
		i <- pmatch(names(Dots),PossibleArgs)
		names(Dots) <- PossibleArgs[i]
	}

#	Defaults for extra arguments
	if(is.null(Dots$trend.var)) {
		Dots$trend <- FALSE
	} else {
		Dots$trend <- Dots$trend.var
		Dots$trend.var <- NULL
	}
	if(is.null(Dots$robust)) Dots$robust <- FALSE
	if(Dots$robust & is.null(Dots$winsor.tail.p)) Dots$winsor.tail.p <- c(0.05,0.1)
	if(!is.null(Dots$block) & is.null(Dots$correlation)) stop("Intra-block correlation must be specified")

#	Covariate for trended eBayes
	covariate <- NULL
	if(Dots$trend) covariate <- rowMeans(as.matrix(y))

#	Compute effects matrix (with df.residual+1 columns)
	Effects <- .lmEffects(y=y,design=design,contrast=contrast,
		array.weights=Dots$array.weights,
		weights=Dots$weights,
		block=Dots$block,
		correlation=Dots$correlation)

#	Divide out genewise standard deviations
	standardize <- match.arg(standardize, c("none","residual.sd","posterior.sd","p2"))
	if(!standardize=="none") {

#		Estimate genewise sds robustly
		OK <- requireNamespace("statmod",quietly=TRUE)
		if(!OK) stop("statmod package required but isn't installed (or can't be loaded)")
		gq <- statmod::gauss.quad.prob(128,"uniform")
		df.residual <- ncol(Effects)-1
		Eu2max <- sum( (df.residual+1)*gq$nodes^df.residual*qchisq(gq$nodes,df=1)*gq$weights )
		u2max <- apply(Effects^2,1,max)
		s2.robust <- (rowSums(Effects^2)-u2max) / (df.residual+1-Eu2max)

#		Empirical Bayes moderation method I: estimate hyperparameters from robust variances
		if(standardize=="p2") {
			sv <- squeezeVar(s2.robust, df=0.92*df.residual,
				covariate=covariate,
				robust=Dots$robust,
				winsor.tail.p=Dots$winsor.tail.p)
			s2.robust <- sv$var.post
		}

#		Empirical Bayes moderation method II: estimate hyperparameters from residual variances but apply squeezing to robust variances
		if(standardize=="posterior.sd") {
			s2 <- rowMeans(Effects[,-1,drop=FALSE]^2)
			if(Dots$robust) {
				fit <- fitFDistRobustly(s2, df1=df.residual, covariate=covariate, winsor.tail.p=Dots$winsor.tail.p)
				df.prior <- fit$df2.shrunk
			} else {
				fit <- fitFDist(s2, df1=df.residual, covariate=covariate)
				df.prior <- fit$df2
			}
			s2.robust <- .squeezeVar(s2.robust,df=0.92*df.residual,var.prior=fit$scale,df.prior=df.prior)
		}

		Effects <- Effects/sqrt(s2.robust)
	}

#	Check geneid (can be a vector of gene IDs or an annotation column name)
	if(is.null(geneid)) {
		geneid <- rownames(Effects)
	} else {
		geneid <- as.character(geneid)
		if(length(geneid)==1) 
			geneid <- as.character(y$genes[,geneid])
		else
			if(length(geneid) != nrow(y)) stop("geneid vector should be of length nrow(y)")
	}

	.fryEffects(effects=Effects,index=index,geneid=geneid,gene.weights=gene.weights,sort=sort)
}


.fryEffects <- function(effects,index=NULL,geneid=rownames(effects),gene.weights=NULL,sort=TRUE)
#	fry given the effects matrix
#	Gordon Smyth and Goknur Giner
#	Created 30 January 2015.  Last modified 24 July 2019
{
	G <- nrow(effects)
	neffects <- ncol(effects)
	df.residual <- neffects-1L

#	Check index
	if(is.null(index)) index <- list(set1=1L:G)
	if(is.data.frame(index) || !is.list(index)) index <- list(set1=index)
	nsets <- length(index)
	if(nsets==0L) stop("index is empty")
	if(is.null(names(index)))
		names(index) <- paste0("set",formatC(1L:nsets,width=1L+floor(log10(nsets)),flag="0"))
	else
		if(anyDuplicated(names(index))) stop("Gene sets don't have unique names",call. =FALSE)

#	Global statistics
	NGenes <- rep_len(0L,length.out=nsets)
	PValue.Mixed <- t.stat <- rep_len(0,length.out=nsets)
	for (i in 1:nsets) {
		iset <- index[[i]]
		if(is.data.frame(iset)) {
			if(ncol(iset)>1 && is.numeric(iset[,2])) {
				iw <- iset[,2]
				iset <- iset[,1]
				if(is.factor(iset)) iset <- as.character(iset)
				if(is.character(iset)) {
					if(anyDuplicated(iset)) stop("Duplicate gene ids in set ",i)
					j <- match(geneid,iset)
					inset <- which(!is.na(j))
					EffectsSet <- effects[inset,,drop=FALSE]
					iw <- iw[j[inset]]
				} else {
					EffectsSet <- effects[iset,,drop=FALSE]
				}
				EffectsSet <- iw * EffectsSet
			} else {
				stop("index ",i," is a data.frame but doesn't contain gene weights")
			}
		} else {
			if(is.factor(iset)) iset <- as.character(iset)
			if(is.character(iset)) iset <- which(geneid %in% iset)
			EffectsSet <- effects[iset,,drop=FALSE]
			if(!is.null(gene.weights)) {
				iw <- gene.weights[iset]
				EffectsSet <- iw * EffectsSet
			}
		}
		MeanEffectsSet <- colMeans(EffectsSet)
		t.stat[i] <- MeanEffectsSet[1] / sqrt(mean(MeanEffectsSet[-1]^2))
		NGenes[i] <- nrow(EffectsSet)

		if(NGenes[i]>1) {
			SVD <- svd(EffectsSet,nu=0)
			A <- SVD$d^2
			d1 <- length(A)
			d <- d1-1L
			beta.mean <- 1/d1
			beta.var <- d/d1/d1/(d1/2+1)
			Fobs <- (sum(EffectsSet[,1]^2)-A[d1]) / (A[1]-A[d1])
			Frb.mean <- (sum(A) * beta.mean - A[d1]) / (A[1]-A[d1])
			COV <- matrix(-beta.var/d,d1,d1)
			diag(COV) <- beta.var
			Frb.var <- (A %*% COV %*% A ) / (A[1]-A[d1])^2
			alphaplusbeta <- Frb.mean*(1-Frb.mean)/Frb.var-1
			alpha <- alphaplusbeta*Frb.mean
			beta <- alphaplusbeta-alpha
			PValue.Mixed[i] <- pbeta(Fobs,shape1=alpha,shape2=beta,lower.tail=FALSE)
		}
	}

	Direction <- rep_len("Up",length.out=nsets)
	Direction[t.stat<0] <- "Down"
	PValue <- 2*pt(-abs(t.stat),df=df.residual)
	PValue.Mixed[NGenes==1] <- PValue[NGenes==1]

#	Add FDR
	if(nsets>1) {
		FDR <- p.adjust(PValue,method="BH")
		FDR.Mixed <- p.adjust(PValue.Mixed,method="BH")
		tab <- data.frame(NGenes=NGenes,Direction=Direction,PValue=PValue,FDR=FDR,PValue.Mixed=PValue.Mixed,FDR.Mixed=FDR.Mixed)
	} else {
		tab <- data.frame(NGenes=NGenes,Direction=Direction,PValue=PValue,PValue.Mixed=PValue.Mixed)
	}
	rownames(tab) <- names(index)

#	Sort results
	if(is.logical(sort)) if(sort) sort <- "directional" else sort <- "none"
	sort <- match.arg(sort,c("directional","mixed","none"))
	if(sort=="none") return(tab)
	if(sort=="directional") {
		o <- order(tab$PValue,-tab$NGenes,tab$PValue.Mixed)
	} else {
		o <- order(tab$PValue.Mixed,-tab$NGenes,tab$PValue)
	}
	tab[o,,drop=FALSE]
}
hdeberg/limma documentation built on Dec. 20, 2021, 3:43 p.m.