R/miscFunctions.R
In npGSEA: Permutation approximation methods for gene set enrichment analysis (non-permutation GSEA)

runNormApprox <- function(xg, y, wg, set){
    betahatG <- .calcBetaHatG (xg, y)
    ThatGw <- .calcThatGw (betahatG, wg)
    varThatGw <- .calcVarThatGw (xg, y, wg)
    stats <- .calcPvaluesNorm (ThatGw, varThatGw)
    return( new("npGSEAResultNorm", 
        geneSetName = setName(set),
        zStat = as.numeric(ThatGw/sqrt(varThatGw)),
        ThatGw = as.numeric(ThatGw),
        varThatGw = as.numeric(varThatGw), 
        pLeft = as.numeric(stats$pLeft), 
        pRight = as.numeric(stats$pRight), 
        pTwoSided = as.numeric(stats$pTwoSided), 
        xSet = xg,
        betaHats = betahatG) )
}

.calcBetaHatG <- function(xg, y){
	nobs <- length(y)
    ###a vector containing each Betahat_g
   	betahat_g <- (xg %*% y)/nobs  
   	return(betahat_g)
}

##test stat for gene set in experiment (norm approx):
.calcThatGw <- function(betahat_g, wg) {
    ##sum of all the Betahat_g's wtd by  their corresponding weights
    ThatGw <- wg %*% betahat_g 
    return(ThatGw)
}

##calculates variance of permuted ThatGw (norm approx)
## based on eq 5, section 3.3 in Larson and Owen 2014
.calcVarThatGw <- function(xg, y, wg) {
    nobs <- length(y)
    XGi <- wg %*% xg 
    XbarGG <- mean(XGi^2)
    mu2 <- mean(y^2)  ##section 3.1
    VarThatGw  <- (mu2/(nobs-1))*XbarGG
    return(VarThatGw)
}


##calculates p-values for our appoximation (norm)
.calcPvaluesNorm <- function(ThatGw, varThatGw){
    Tscaled <- ThatGw/sqrt(varThatGw)
    pL <- pnorm(Tscaled)
    pR <- pnorm(-Tscaled)
    pC <- 2*min(pL, pR)
    return(list(pLeft=pL, pRight=pR, pTwoSided=pC))
}

### calc Beta dist'n based stats
##uses many of the same functions as norm approx
runBetaApprox <- function(xg, y, wg, set, epAdj){    
    #first we get betahats, That and its variance
    betahatG <- .calcBetaHatG (xg, y)
    ThatGw <- .calcThatGw (betahatG, wg)
    varThatGw <- .calcVarThatGw(xg, y, wg)

    ##next we calculate A and B, see section 3.3
    nobs <- length(y)
    XGi <- wg %*% xg 
    sortedXGi <- sort(XGi)
    sortedY <- sort(y)
    sortedYdecreasing <- sort(y, decreasing=TRUE)
    A <- (sortedXGi%*%sortedYdecreasing)/nobs 
    B <- (sortedXGi%*%sortedY)/nobs

    ##next we calculate alpha and beta and our betaStat, see eq. 3
    alpha <- (A/(B-A))*(A*B/varThatGw+1)
    beta <- (-B/(B-A))*(A*B/varThatGw+1)
    betaStat <- (ThatGw-A)/(B-A)
    pvals <- .calcPvaluesBeta (betaStat, alpha, beta, y, epAdj)
    
    return(new("npGSEAResultBeta",
        geneSetName = setName(set),
        betaStat = as.numeric(betaStat),
        ThatGw = as.numeric(ThatGw),
        varThatGw = as.numeric(varThatGw), 
        alpha = as.numeric(alpha), 
        beta = as.numeric(beta), 
        pLeft = as.numeric(pvals$pLeft), 
        pRight = as.numeric(pvals$pRight), 
        pTwoSided = as.numeric(pvals$pTwoSided), 
        xSet = xg,
        betaHats = betahatG)  )
}

##calculates p-values for our appoximation (beta)
##with the epsilon min p-value based on the number of levels of y
.calcPvaluesBeta <- function(betaStat, alpha, beta, y, epAdj){
	epsilon <- 0
	
	if (epAdj==TRUE){
		yFactor <- as.factor(y)
    	yLevels <- levels(yFactor)
   	 	numLevels <- length(yLevels)
    	n <- length(y)
    	epsilon <- 1
    	for (j in 1:numLevels){
    		locLevel <- which(yFactor==yLevels[j])
    		nj <- length(locLevel)
    		epsilon <- epsilon*factorial(nj)
    	}
    	epsilon <- epsilon/factorial(n)
	}
	
	pL <- pbeta(betaStat, alpha, beta)
	pLeft <- epsilon + (1-2*epsilon)*pL
	
    pR <- pbeta(betaStat, alpha, beta, lower.tail=FALSE)
    pRight <- epsilon + (1-2*epsilon)*pR
    
    pC <- 2*min(pLeft, pRight)
    return(list(pLeft=pLeft, pRight=pRight, pTwoSided=pC) )
}




##test stat for gene set in experiment (chisq approx):
runChisqApprox <- function(xg, y, wg, set){
    if( length(y) < 4 ){
        stop("Number of samples is too small 
        for permutation approximation; 
        you need at least 4 samples for the 
        chiSq approximation analysis")
    }
    #Get set statistics
    betahatG <- .calcBetaHatG (xg, y)
    ChatGw <- .calcChatGw(betahatG, wg)
    # Get moment approximation to permutation distribution
    chiSqMoments <- .calcChiSqMoments(xg, y, wg)   
    df <- as.numeric(2*chiSqMoments$mu^2/chiSqMoments$var)
    sigsq <- as.numeric(chiSqMoments$mu/df)
    pQ <- as.numeric(1 - pchisq(ChatGw/sigsq, df) )
   
	##check moments and NaN's
	if (is.nan(pQ)==TRUE){
		print(paste0("The moments for the chisquare approximation (mu= ", chiSqMoments$mu , " and var = ",
		chiSqMoments$var, "), which did not lead to a valid approximation.  This may be a result of a small sample size at one or more levels 
		of the predictor or response variable"))
	}
    
    return(new("npGSEAResultChiSq",
        geneSetName = setName(set),
        chiSqStat = ChatGw/sigsq,
        ChatGw = ChatGw,
        sigmaSq = sigsq, 
        DF= df, 
        pTwoSided = pQ, 
        xSet = xg,
        betaHats = betahatG) )
}

# sum of squared dot products, genes x response y with wts
.calcChatGw <- function(betahat_g, wg) {
    C_Gw <- wg %*%(betahat_g)^2
    return(as.numeric(C_Gw))
}

# Return permutation based moments (chisq approx)
##section 3.1, eq4 of paper
.calcChiSqMoments <- function(xg, y, wg){
    x <- t(xg)
    nobs <- length(y)  ##num of obs
    G = ncol(x)  ##number of genes in G

    ##get A and B matrices
    A <- .getA(nobs)
    B <- .getB()
    AtB = t(A) %*% B  ###2x2 matrix of constants

    ## Precompute X moments
    xgh = xgghh = matrix(NA,G,G)
    for( g in 1:G ){
        for( h in 1:g ){
            xgh[g,h]   = xgh[h,g]   = mean( x[,g] * x[,h] )
            xgghh[g,h] = xgghh[h,g] = mean( x[,g]^2 * x[,h]^2 )
        }
    }

    ## Precompute Y moments
    ymoments <- matrix( 0,2,1 )
    mu2 <- mean(y^2)
    ymoments[1,1] <- mu2^2  
    ymoments[2,1] <- mean(y^4)  #mu4

    ##Calculate mean square (lemma 1)
    meansquare <- rep(0,G)
    for( g in 1:G ){
        meansquare[g] <- mu2 * xgh[g,g]/(nobs-1)
    }

    ##Calculate cov square (lemma 2, collary 2)
    covsquare <- matrix(0,G,G)
    xmoments <- matrix( 0,2,1 )
    for( g in 1:G ){
        for( h in 1:g ){
            xmoments[1,1]  <- (xgh[g,g]*xgh[h,h]+2*xgh[g,h]^2)/nobs^2
            xmoments[2,1]  <- xgghh[g,h]/nobs^3
            covsquare[g,h] = covsquare[h,g] = 
                t(ymoments) %*% AtB %*% xmoments - 
                ymoments[1,1] * xgh[g,g] * xgh[h,h] /(nobs-1)^2
        }
    }
  
    ##Calculate E(C_tildeGw) and var(C_tildeGw) (eq 4, section 3.1)
    mu <- wg%*% meansquare 
    var <- (wg%*% covsquare)%*%wg
    return( list(mu=mu, var = var) )
}

## Calculate matrix A from Lemma 2, section 3.1
.getA <- function(n){
    A = matrix(0, 5, 2)
    A[,1] = c( 0, 0, n/(n-1),-n/((n-1)*(n-2)), 3*n/((n-1)*(n-2)*(n-3)) )
    A[,2] = c(1,-1/(n-1),-1/(n-1),2/((n-1)*(n-2)),-6/((n-1)*(n-2)*(n-3)))
    return(A)
}
## Calculate matrix B from Lemma 2, section 3.1
.getB <- function(){
    B <- matrix(0, 5, 2)
    B[,1] <- c(0, 0, 1, -2, 1)
    B[,2] <- c(1, -4, -3, 12, 6)
    return(B)
}

Any scripts or data that you put into this service are public.

npGSEA documentation built on Nov. 8, 2020, 6:49 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

npGSEA
Permutation approximation methods for gene set enrichment analysis (non-permutation GSEA)

R/miscFunctions.R
In npGSEA: Permutation approximation methods for gene set enrichment analysis (non-permutation GSEA)

Defines functions .getB .getA .calcChiSqMoments .calcChatGw runChisqApprox .calcPvaluesBeta runBetaApprox .calcPvaluesNorm .calcVarThatGw .calcThatGw .calcBetaHatG runNormApprox

Try the npGSEA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

npGSEA Permutation approximation methods for gene set enrichment analysis (non-permutation GSEA)

R/miscFunctions.R In npGSEA: Permutation approximation methods for gene set enrichment analysis (non-permutation GSEA)

Defines functions .getB .getA .calcChiSqMoments .calcChatGw runChisqApprox .calcPvaluesBeta runBetaApprox .calcPvaluesNorm .calcVarThatGw .calcThatGw .calcBetaHatG runNormApprox

Try the npGSEA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

npGSEA
Permutation approximation methods for gene set enrichment analysis (non-permutation GSEA)

R/miscFunctions.R
In npGSEA: Permutation approximation methods for gene set enrichment analysis (non-permutation GSEA)