R/functions.R

Defines functions makeSigma .goodnessOfFit computeGOF empiricalStart .tuningDefault .updatesDefault .hyperparametersDefault .outputDefault xsScores symbolsInteresting ssStatistic .permuteClassLabels .permutePhenotype .parameterNames calculatePosteriorAvg .chainInitialize

Documented in calculatePosteriorAvg empiricalStart ssStatistic symbolsInteresting xsScores

##setMethod("chain.initialize", "XdeSet",
.chainInitialize <- function(object,
                             chain.length,
                             verbose){
              names(chain.length) <- c("potential",
                                        "acceptance",
                                        "nu",
                                        "deltaDelta",
                                        "a",
                                        "b",
                                        "c2",
                                        "gamma2",
                                        "r",
                                        "rho",
                                        "delta",
                                        "xi",
                                        "sigma2",
                                        "t",
                                        "l",
                                        "phi",
                                        "theta",
                                        "lambda",
                                        "tau2R",
				       "tau2Rho",
                                        "probDelta",
                                        "diffExpressed")

            ##These values are placeholders to create a chain of the
            ##right dimension.  If called, it is assumed that default
            ##starting values for the chain will be used.
            G <- nrow(object)
            QQ <- length(object)
            potential <- rep(0, 20*chain.length[["potential"]])
	      ##acceptance <- rep(0, 17*chain.length[["acceptance"]])
	      acceptance <- rep(0, 18*chain.length[["acceptance"]])
            Nu <- rep(0, G*QQ*chain.length[["nu"]])
            DDelta <- rep(0, G*QQ*chain.length[["deltaDelta"]])
            A <- rep(0, QQ*chain.length[["a"]])
            B <- rep(0, QQ*chain.length[["b"]])
            C2 <- rep(0, chain.length[["c2"]])
            Gamma2 <- rep(0, chain.length[["gamma2"]])
            Rho <- rep(0, QQ*(QQ-1)/2*chain.length[["rho"]])
            R <- rep(0, QQ*(QQ-1)/2*chain.length[["r"]])
            Delta <- rep(0, G*QQ*chain.length[["delta"]])
            Xi <- rep(0, QQ*chain.length[["xi"]])
            Sigma2 <- rep(0, QQ * G*chain.length[["sigma2"]])
            T <- rep(0, QQ*chain.length[["t"]])
            L <- rep(0, QQ*chain.length[["l"]])
            Phi <- rep(0, QQ * G*chain.length[["phi"]])
            Theta <- rep(0, QQ*chain.length[["theta"]])
            Lambda <- rep(0, QQ*chain.length[["lambda"]])
	      Tau2R <- Tau2Rho <- rep(0, QQ*chain.length[["tau2R"]])
            ProbDelta <- rep(0, G*QQ*chain.length[["probDelta"]])
            Diffexpressed <- rep(0, 3*G)
            eenv <- new.env()
            x <- list(Potential=potential,
                      Acceptance=acceptance,
                      Nu=Nu,
                      DDelta=DDelta,
                      A=A,
                      B=B,
                      C2=C2,
                      Gamma2=Gamma2,
                      R=R,
                      Rho=Rho,
                      Delta=Delta,
                      Xi=Xi,
                      Sigma2=Sigma2,
                      T=T,
                      L=L,
                      Phi=Phi,
                      Theta=Theta,
                      Lambda=Lambda,
                      Tau2R=Tau2R,
		      Tau2Rho=Tau2Rho,
                      ProbDelta=ProbDelta,
                      Diffexpressed=Diffexpressed)
              assign("vars", x, eenv)
              eenv
      }

calculatePosteriorAvg <- function(object, NCONC=2, NDIFF=1, burnin=0){
	if(class(object) != "XdeMcmc") stop("object must be of class XdeMcmc")
	if(iterations(object) < 10) stop("Too few iterations to compute posterior means")
	ii <- which((1:iterations(object)) > burnin)
	if(length(ii) < 1) stop("Not enough iterations after burnin to compute the posterior means")
	D <- object$DDelta
	d <- object$delta
	dD <- sign(d*D)
	##For each mcmc iteration, assess concordance, discordance
	myf <- function(x){
		##define an indicator for concordance
		##indicator for discordance
		tmp <- cbind(rowSums(x>0), rowSums(x < 0))
		##tmp <- t(rbind(colSums(x > 0), colSums(x < 0)))
		colnames(tmp) <- c("#up", "#down")
		discordant <- (tmp[, 1] * tmp[, 2]) != 0

		##1) no discordant signs
		concordant <- (tmp[, 1] * tmp[, 2]) == 0
		##2) let the number of concordant studies be user-specified
		concordant <- concordant * (rowSums(tmp) >= NCONC)
		diffExpr <- rowSums(abs(tmp)) >= NDIFF
		indicators <- matrix(c(concordant,
				       discordant,
				       diffExpr), ncol=3, byrow=FALSE)
	}
##	I <- array(NA, dim=dim(dD)
	I <- array(NA, c(dim(dD)[2], 3, dim(dD)[1]),
		   dimnames=list(featureNames(object),
		   c("concordant", "discordant", "diffExpressed"),
		   paste("iter", 1:dim(dD)[1], sep="_")))
	IT <- dim(dD)[1]
	for(i in 1:IT){
		I[, , i] <- myf(dD[i, , ])
	}
	concordant.avg <- rowMeans(I[, "concordant", ])
	discordant.avg <- rowMeans(I[, "discordant", ])
	diffExpressed.avg <- rowMeans(I[, "diffExpressed", ])
	X <- cbind(concordant.avg, discordant.avg, diffExpressed.avg)
	colnames(X) <- c("concordant", "discordant", "diffExpressed")
	rownames(X) <- featureNames(object)
	X
}


.parameterNames <- function(){
	c("thin",
	  "potential",
	  "acceptance",
	  "nu",
	  "DDelta",
	  "a",
	  "b",
	  "c2",
	  "gamma2",
	  "r",
	  "rho",
	  "delta",
	  "xi",
	  "sigma2",
	  "t",
	  "l",
	  "phi",
	  "theta",
	  "lambda",
	  "tau2R",
	  "tau2Rho",
	  "probDelta",
	  "diffExpressed")
}

.permutePhenotype <- function(object, seed, ...){
	if(missing(seed)){
		seed <- sample(0:100000, 1)
		set.seed(seed)
		print(paste("Seed", seed))
	}
	print(paste("permuting phenotypeLabel", phenotypeLabel(object)))
	object@data <- lapply(object, .permuteClassLabels, phenotype=phenotypeLabel(object))
	object
}

.permuteClassLabels <- function(object, phenotype, ...){
	pData(object)[, phenotype] <- permute(pData(object)[, phenotype])
	object
}

ssStatistic <- function(statistic=c("t", "sam", "z")[1],
                        phenotypeLabel,
                        esetList, ...){
	if(!(statistic == "t" | statistic == "sam" | statistic == "z")){
		stop("only t and sam study-specific statistics provided")
	}

	##-----------------------------------------------------------
	##Wrapper for Welch t-statistic
##	multtestWrapper <- function(eset, classlabel, ...){
##		require(multtest) || stop("Package multtest not available")
##		column <- grep(classlabel, colnames(pData(eset)))
##		if(length(column) == 0) { warning("Invalid classlabel.  Using the first column in phenoData"); classlabel <- pData(eset)[, 1]}
##		if(length(column) > 1)  {warning("More than 1 phenotype has the class label.  Using the first"); classlabel <- pData(eset)[, column[1]]}
##		if(length(column) == 1) classlabel <- pData(eset)[, column]
##		X <- exprs(eset)
##		stat <- mt.teststat(X=X, classlabel=classlabel, test="t",
##				    nonpara="n")
##		stat
##	}
	tt <- rowttests(esetList, phenotypeLabel)

	sam.wrapper <- function(eset, classlabel, method, returnQ, gene.names){
		##require(siggenes) || stop("Package siggenes not available")
		column <- grep(classlabel, colnames(pData(eset)))
		if(length(column) == 0) {
      warning("Invalid classlabel.  Using the first column in phenoData"); classlabel <- pData(eset)[, 1]
    }
		if(length(column) > 1)  {
      warning("More than 1 phenotype has the class label.  Using the first"); classlabel <- pData(eset)[, column[1]]
    }
		if(length(column) == 1) classlabel <- pData(eset)[, column]
		X <- exprs(eset)
		stat <- sam(data=X, cl=classlabel, method=method,
			    gene.names=gene.names)
		if(returnQ) stat <- stat@q.value else stat <- stat@d
		stat
	}

	z.wrapper <- function(object, classlabel, useREM=TRUE){
		##object is a list of ExpressionSets
		##require(GeneMeta) || stop("Package GeneMeta is not available")
		pDataList <- function(eset, covariateName){
			1-(pData(eset)[, grep(covariateName, colnames(pData(eset)))])
		}
		classes <- lapply(object, pDataList, classlabel)
		z <- zScores(object, classes=classes, useREM=useREM)
		##    z <- data.frame(z[, grep("zSco", colnames(z))])
		z <- z[, grep("zSco", colnames(z))]
		return(z)
	}

	##---------------------------------------------------------------------------
	##Wrapper for SAM-statistic
	stat <- switch(statistic,
##		       t=sapply(esetList, multtestWrapper, classlabel=phenotypeLabel),
		       t=tt,
		       sam=sapply(esetList, sam.wrapper, classlabel=phenotypeLabel,
		       returnQ=FALSE, method="d.stat", gene.names=featureNames(esetList), ...),
		       z=z.wrapper(object=esetList, classlabel=phenotypeLabel, useREM=TRUE))
	rownames(stat) <- featureNames(esetList)
	return(stat)
}

symbolsInteresting <- function(rankingStatistic, percentile=0.9,
                               colors=c("grey50", "royalblue"),
                               symbols=c(".", "o"),
                               size=c(3, 1),
                               background=c("white", "grey70")){
  thr <- quantile(rankingStatistic, probs=percentile)
##  avgC <- postAvg[, "concordant"]
  i <- rankingStatistic > thr
  N <- length(rankingStatistic)
  pch <- rep(symbols[1], N)
  j <- i[order(i)]
  pch[j] <- symbols[2]
  col <- rep(colors[1], N)
  col[j] <- colors[2]
  cex <- rep(size[1], N)
  cex[j] <- size[2]
  bg <- rep(background[1], N)
  bg[j] <- background[2]
  list(order=order(i), pch=pch, col=col, bg=bg, cex=cex)
}


##statistic should be the standardized Delta
xsScores <- function(statistic, N){
	if(class(statistic)[[1]] != "matrix") stop("study-specific statistics must be provided as a matrix")
	if(missing(N)) stop("Must specify the sample size of each study")

	scores <- matrix(NA, nrow(statistic), ncol=3)
	if(!is.null(rownames(statistic))) rownames(scores) <- rownames(statistic)
	colnames(scores) <- c("diffExpressed", "concordant", "discordant")

	##------------------------------------------------
	##Differential expression
	scores[, 1] <- .pca(abs(statistic), N=N)

	##------------------------------------------------
	##Concordance
	scores[, 2] <- abs(.pca(statistic, N=N))

	##------------------------------------------------
	##Discordance
	tmp <- cbind(rowSums(statistic > 0), rowSums(statistic < 0))
	I <- which((tmp[, 1] * tmp[, 2]) == 0)  ##concordant signs
##	I <- abs(rowSums(sign(statistic))) < ncol(statistic)
##	I[!I] <- -1
	scores[, 3] <- .pca(abs(statistic), N)
	##penalize concordance
	scores[I, 3] <- scores[I, 3] * -1
	return(scores)
}


.outputDefault <- function(thin=1, burnin=FALSE){
##  if(!burnin) out <- rep(1, 22) else out <- rep(0, 22)
	out <- rep(1, 23)
	out[1] <- thin
	out[23] <- 1  ##by default, keep a running average of the posterior statistics attached to the object (do not write to file)
	names(out) <- .parameterNames()
	return(out)
}
##Number of platforms (P)
.hyperparametersDefault <- function(P){
  ##Setting p_a^0, p_a^1, p_b^0, and p_b^1 = 0.1 (instead of 0) allows
  ##a and b to be identical to zero and 1 (10/23/2006 HT)
  hyperparameters <- c(1.0, 1.0, 0.1, 0.1,
                       1.0, 1.0, 0.1, 0.1,
                       P+1,
                       P+1,
                       1.0, 1.0,
                       50.0)
  names(hyperparameters) <- c("alpha.a", "beta.a", "p0.a", "p1.a",
                              "alpha.b", "beta.b", "p0.b", "p1.b",
                              "nu.r",
                              "nu.rho",
                              "alpha.xi", "beta.xi",
                              "c2max")
  hyperparameters
}



##.getPar <- function(object, ...){
##  as.list(...)
##}




###########################################################################
##Number of updates per iterations set as per Haakon's 10/23/2006 e-mail
###########################################################################
.updatesDefault <- function(){
  updates <- c(1, # nu Gibbs
                          1, # Delta Gibbs
                          3, # a
                          3, # b
                          1, # c2 Gibbs
                          1, # gamma2 Gibbs
                          3, # r and c2
                          3, # rho and gamma2
                          1, # delta
                          1, # xi Gibbs
                          1, # sigma2
                          1, # t and l
                          1, # l
                          1, # phi
                          1, # theta and lambda
                          1, # lambda
                          1, # tau2R
	       1) #tau2Rho
  names(updates) <- c("nu",
                      "Delta",
                      "a",
                      "b",
                      "c2",
                      "gamma2",
                      "rAndC2",
                      "rhoAndGamma2",
                      "delta",
                      "xi",
                      "sigma2",
                      "tAndL",
                      "l",
                      "phi",
                      "thetaAndLambda",
                      "lambda",
                      "tau2R",
		      "tau2Rho")
  updates
}

###########################################################################
##Tuning parameters set as per Haakon's 10/23/2006 e-mail
###########################################################################
.tuningDefault <- function(){
	##10/23/2006: HT
	tuning <- c(0.01,  #1. nu Gibbs
		    0.01,  #2. Delta Gibbs
		    0.04,  #3. a
		    0.04,  #4. b
		    0.01,  #5. c2 Gibbs
		    0.01,  #6. gamma2 Gibbs
		    0.01,  #7. r and c2
		    0.01,  #8. rho and gamma2
		    0.01,  #9. delta
		    0.01,  #10. xi Gibbs
		    0.50,  #11. sigma2
		    0.10,  #12. t and l
		    0.04,  #13. l
		    0.40,  #14. phi
		    0.10,  #15. theta and lambda
		    0.02,  #16. lambda
		    0.04,  #17. tau2R
		    0.04) #18. tau2Rho
  names(tuning) <- c("nu", "Delta",  "a", "b", "c2", "gamma2",
                     "rAndC2", "rhoAndGamma2", "delta", "xi", "sigma2",
                     "tAndL", "l", "phi", "thetaAndLambda", "lambda", "tau2R", "tau2Rho")
  return(tuning)
}



empiricalStart <- function(object, zeroNu=FALSE,
			   phenotypeLabel, one.delta=FALSE, T_THRESH=4){
	if(length(grep(phenotypeLabel, varLabels(object[[1]]))) < 1) stop("phenotypeLabel must be in varLabels")
	potential <- rep(0, 19)
	acceptance <- rep(0, 17)

	##order should be platforms, genes for nu, phi, sigma2, delta, and DDelta
	if(!zeroNu){
		meanExpression <- function(eset) rowMeans(exprs(eset))
		Nu <- sapply(object, meanExpression)
		Rho <- cor(Nu)
		##order should be 1_2, 1_3, ..., 1_P, 2_3, ..., 2_P, ...
		##R <- R[upper.tri(R)]  ##Not in the right order!
		cors <- list()
		if(nrow(Rho) > 1){
			for(i in 1:(nrow(Rho)-1)) cors[[i]] <- Rho[i, -(1:i)]
		} else cors[[1]] <- Rho
		Rho <- unlist(cors)

		NuVars <- apply(Nu, 2, "var")
		Gamma2 <- mean(NuVars)
		S <- length(NuVars)
		tau2Rho <- NuVars/(prod(NuVars))^(1/S)
		Nu <- as.vector(t(Nu))
		##the prior for gamma2 is an improper uniform distribution on (0, Inf)
	} else{
		Nu <- as.vector(matrix(0, nrow(object), length(object)))
		Rho <- rep(0, choose(length(object), 2))
		tau2Rho <- rep(0, length(object))
		Gamma2 <- 0
	}
	averageDifference <- function(eset, classLabel){
		## limma would be an easy way to fit a linear model to each gene
		phenoColumn <- grep(classLabel, names(pData(eset)))
		pheno <- pData(eset)[, phenoColumn]
		mn1 <- rowMeans(exprs(eset)[, pheno == 1])
		mn0 <- rowMeans(exprs(eset)[, pheno == 0])
		avdif <- mn1-mn0
		avdif
	}
	DDelta <- sapply(object, averageDifference, classLabel=phenotypeLabel)
	##require(genefilter)
	tt <- rowttests(object, phenotypeLabel)
	delta <- abs(tt) > T_THRESH
	if(one.delta){
		tmp <- ifelse(rowMeans(delta) > 0.5, 1, 0)
		delta <- matrix(tmp, nrow=length(tmp), ncol=length(object), byrow=FALSE)
	}
	if(any(colSums(delta) == 0)){
		##if none of the genes are differentially expressed in
		##some of the studies, set R at 0
		R <- rep(0, choose(S,2))
	} else 	{
		R <- cor(delta*DDelta)
		##order should be 1_2, 1_3, ..., 1_P, 2_3, ..., 2_P, ...
		##R <- R[upper.tri(R)]  ##Not in the right order!
		cors <- list()
		if(nrow(R) > 1){
			for(i in 1:(nrow(R)-1)) cors[[i]] <- R[i, -(1:i)]
		} else cors[[1]] <- R
		R <- unlist(cors)
	}
	DeltaVars <- apply(DDelta*delta, 2, "var")
	C2 <- mean(DeltaVars)
	S <- length(DeltaVars)
	if(any(colSums(delta)==0)){
		tau2R <- rep(1, S)
	} else {
		tau2R <- DeltaVars/(prod(DeltaVars))^(1/S)
	}
	##0.1 seems reasonable,  or the proportion of t-statistics > X
	Xi <- apply(delta, 2, "mean")
	Sigma2 <- sapply(lapply(object, exprs), rowVars)
	DDelta <- as.numeric(t(DDelta))
	delta <- as.numeric(t(delta))
	##Should we start at zero, 1, or somewhere in the middle?
	A <- rep(1, length(object))
	B <- rep(1, length(object))
	##Gamma with mean 1
	##L and T are mean and variance, respectively
	L <- as.numeric(colMeans(Sigma2))
	T <- as.numeric(apply(Sigma2, 2, var))
	Sigma2 <- as.numeric(t(Sigma2))
	##sigma2*phi is variance of expression values for patients with binary phenotype = 0
	##sigma2/phi is variance of expression values for patients with binary phenotype = 1
	Phi <- rep(1, (length(object) * nrow(object)))
	##Lambda and theta are the mean and variance for phi (study-specific)
	Lambda <- rep(1, length(object))
	Theta <- rep(0.05, length(object))
	##Tau2 is the relative scale of sigma across platforms
	##- the product is constrained to be one
	##- tau2 is shared by both nu and Delta
	##Tau2 <- rep(1, length(object))
	##Tau2 <- tau2.Delta * tau2.Nu
	linInitialValues <- list(Nu=Nu, DDelta=DDelta,
				 A=A, B=B, C2=C2, Gamma2=Gamma2,
				 R=R, Rho=Rho, Delta=delta,
				 Xi=Xi, Sigma2=Sigma2, T=T, L=L,
				 Phi=Phi, Theta=Theta, Lambda=Lambda,
				 Tau2R=tau2R,
				 Tau2Rho=tau2Rho)
	linInitialValues
}

computeGOF <- function(object,
                       nus,
                       Delta,
                       delta,
                       sigma2,
                       phi,
                       psi,
                       firstIteration,
                       lastIteration,
                       by=2){
	results <- list()
	avgM.k <- list()
	m.k <- 0
	##already thinned by 10
	I <- seq(firstIteration, lastIteration, by=by)
	NN <- length(I)
	for(p in seq(along=object)){
		cat("Study ", p, "\n")
		for(i in I){
			if(i %% 10 == 0) cat(i, " ")
			nus.1 <- nus[i, , ]
			Delta.1 <- Delta[i, , ]
			delta.1 <- delta[i, , ]
			sigma2.1 <- sigma2[i, , ]
			phi.1 <- phi[i, , ]
			sigma.1 <- sqrt(sigma2.1/phi.1)
			sigma.0 <- sqrt(sigma2.1*phi.1)
			##	trace(goodnessOfFit, browser)
			by <- 1/round(ncol(object[[p]])^0.4, 1)
			qtls <- qnorm(seq(0, 1, by=by), mean=0, sd=1)
			results[[p]] <- .goodnessOfFit(x=exprs(object[[p]]),
						      nus=nus.1[,p],
						      delta=delta.1[,p],
						      Delta=Delta.1[,p],
						      sigma.0=sigma.0[, p],
						      sigma.1=sigma.1[, p],
						      psi=psi[[p]],
						      qtls=qtls)
			m.k <- results[[p]]$m.k + m.k
		}
		m.k <- m.k/NN
		avgM.k[[p]] <- m.k
		m.k <- 0
		cat("\n")
	}
	pvals <- R.b <- list()
	for(p in 1:4){
		n.p.k <- ncol(object[[p]])*1/ncol(avgM.k[[p]])
		tmp <- ((avgM.k[[p]] - n.p.k)/sqrt(n.p.k))^2
		R.b[[p]] <- rowSums(tmp)
		pvals[[p]] <- -log10(1-pchisq(R.b[[p]], df=ncol(tmp)-1))
	}
	goodnessOfFit_results <- list(R.b, pvals)
	goodnessOfFit_results
}

.goodnessOfFit <- function(x, nus, delta, Delta, sigma.0, sigma.1, psi, qtls){
	Psi <- matrix(psi, nrow=nrow(x), ncol=ncol(x), byrow=TRUE)
	mus <- nus + delta * (2*Psi - 1)*Delta
	sigma <- sigma.0*(1-Psi) + sigma.1 * Psi
	m.k <- matrix(NA, nrow(x), length(qtls[-1]))

	##x <- exprs(object[[p]]) - mean(as.numeric(exprs(object[[p]])))
	x <- x-mean(as.numeric(x))
	z <- (x-mus)/sigma
	##determine bins by the inverse distribution function evaluated at theta.
	##
	##because we have a different mean and variance for psi=0,
	##psi=1, I standardized the x-values by theta and then
	##calculated the inverse quantiles
	R.b <- rep(NA, nrow(z))
	for(i in 1:nrow(z)){
		m.k[i, ] <- as.numeric(table(cut(as.numeric(z[i, ]), breaks=qtls, labels=seq(along=qtls[-1]))))
		n.p.k <- ncol(z)*1/length(qtls[-1])
		R.b[i] <- sum(((m.k[i, ] - n.p.k)/sqrt(n.p.k))^2)
	}
	pvals <- -log10(1-pchisq(R.b, df=ncol(m.k)-1))
	pvals[is.infinite(pvals)] <- 16
	##require(genefilter)
	sds0 <- rowSds(z[, psi==0])
	sds1 <- rowSds(z[, psi==1])
	n0 <- sum(psi==0)
	n1 <- sum(psi==1)
	sds <- (sds0*(n0-1) + sds1*(n1-1))/(n0+n1-2)
	return(list(pvals=pvals, R.b=R.b, sds=sds, z=z, m.k=m.k))
}

makeSigma <- function(G, Q, gamma2, tau2, a, sigma2, r){
	sigma <- sqrt(gamma2*tau2*exp(a*log(sigma2)))
	Sigma <- sigma %*% t(sigma)
	Sigma <- Sigma*r
	## check diagonals
	if(FALSE){
		correct.answer <- gamma2*tau2*exp(a*log(sigma2))
		answer2 <- diag(Sigma)
		all.equal(correct.answer, answer2)
	}
	return(Sigma)
}

##getParameters <- function(hyper.parameters, one.delta=FALSE, MRF=FALSE){
##Params <- function(hyper.parameters, one.delta=FALSE, MRF=FALSE){

Try the XDE package in your browser

Any scripts or data that you put into this service are public.

XDE documentation built on Nov. 8, 2020, 5:02 p.m.