R/EBTest_ext.R
In EBSeqHMM: Bayesian analysis for identifying gene or isoform expression changes in ordered RNA-seq experiments

Documented in EBTest_ext

#' @title Extented EBTest function
#' @usage EBTest_ext(Data,NgVector=NULL,Conditions, 
#'	sizeFactors, maxround, Pool=FALSE, NumBin=1000,
#'	ApproxVal=10^-10, Alpha=NULL, Beta=NULL,
#'	PInput=NULL,RInput=NULL,PoolLower=.25, 
#'	PoolUpper=.75,OnlyCalcR=FALSE,Print=TRUE)
#' @param Data Input data, rows are genes/isoforms and columns are samples. Data should come from a two condition experiment
#' @param NgVector Ng vector; NULL for gene level data
#' @param Conditions A factor indicates the condition (time/spatial point) which each sample belongs to. Only two levels are allowed.
#' @param sizeFactors a vector indicates library size factors
#' @param maxround number of iteration
#' @param Pool While working without replicates, user could define the Pool
#'          = TRUE in the EBTest function to enable pooling.
#' @param NumBin By defining NumBin = 1000, EBSeq will group the genes with
#'          similar means together into 1,000 bins.	
#' @param PoolLower,PoolUpper With the assumption that only subset of the genes
#'          are DE in the data set, we take genes whose FC are in the
#'          PoolLower - PoolUpper quantile of the FCs as the candidate
#'					          genes (default is 25%-75%).
#' For each bin, the bin-wise variance estimation is defined as
#'          the median of the cross condition variance estimations of the
#'          candidate genes within that bin.
#'					  We use the cross condition variance estimations for the
#'					          candidate genes and the bin-wise variance estimations of the
#'										          host bin for the non-candidate genes.
#' @param ApproxVal The variances of the transcripts with mean < var will be
#'										          approximated as mean/(1-ApproxVal).
#' @param Alpha,Beta,PInput,RInput If the parameters are known and the user
#'          doesn't want to estimate them from the data, user may
#'					          specify them here.
#' @param Print Whether print the elapsed-time while running the test.					
#' @param OnlyCalcR if OnlyCalcR=TRUE, the function will only return estimation of r's.
#' @author Ning Leng
#' @examples data(GeneExampleData)
#' Data=GeneExampleData[,1:6]
#' CondVector <- rep(paste("t",1:2,sep=""),each=3)
#' Conditions <- factor(CondVector, levels=c("t1","t2"))
#' Sizes <- MedianNorm(Data[1:10,])
#' Out <- EBTest_ext(Data=Data[1:10,], sizeFactors=Sizes, Conditions=Conditions,
#'          maxround=1)
#' @details    
#' EBSeq_ext() function is an extension of EBTest() function, which is used to calculate the conditional probability P(X_g,t | X_g,t-1).
#' In EBSeqHMM, we assume the conditional distribution is Beta-Negative Binomial.
#' @return See \code{\link{EBTest}}

EBTest_ext <-
function(Data,NgVector=NULL,Conditions, sizeFactors, maxround, Pool=FALSE, NumBin=1000,ApproxVal=10^-10, Alpha=NULL, Beta=NULL,PInput=NULL,RInput=NULL,PoolLower=.25, PoolUpper=.75,OnlyCalcR=FALSE,Print=TRUE)
{
	
	
	 if (!is.factor(Conditions)) 
		         Conditions = as.factor(Conditions)
		if (!is.matrix(Data)) stop("The input Data is not a matrix")
    if (length(Conditions) != ncol(Data)) 
		         stop("The number of conditions is not the same as the number of samples! ")
   if (nlevels(Conditions) > 2) 
		         stop("More than 2 conditions! Please use EBMultiTest() function")
   if (nlevels(Conditions) < 2) 
		        stop("Less than 2 conditions - Please check your input")
    if (length(sizeFactors) != length(Data) & length(sizeFactors) != 
						         ncol(Data)) 
			     stop("The number of library size factors is not the same as the number of samples!")

	
	Vect5End <- NULL
	Vect3End <- NULL
	CI <- NULL
	CIthre <- NULL
	tau <-NULL
	Dataraw <- Data
	AllZeroNames <- which(rowMeans(Data)==0)
	NotAllZeroNames <- which(rowMeans(Data)>0)
	if(length(AllZeroNames)>0 & Print==TRUE) cat("Remove transcripts with all zero \n")
	Data <- Data[NotAllZeroNames,]
	if(!is.null(NgVector))NgVector <- NgVector[NotAllZeroNames]
	if(!length(sizeFactors)==ncol(Data))sizeFactors <- sizeFactors[NotAllZeroNames,]

	if(is.null(NgVector))NgVector <- rep(1,nrow(Data))

	#Rename Them
	IsoNamesIn <- rownames(Data)
	Names <- paste("I",c(1:dim(Data)[1]),sep="")
	names(IsoNamesIn) <- Names
	rownames(Data) <- paste("I",c(1:dim(Data)[1]),sep="")
	names(NgVector) <- paste("I",c(1:dim(Data)[1]),sep="")
	

	if(!length(sizeFactors)==ncol(Data)){
		rownames(sizeFactors) <- rownames(Data)
		colnames(sizeFactors) <- Conditions
	}
	
	NumOfNg <- nlevels(as.factor(NgVector))
	NameList <- sapply(1:NumOfNg,function(i)Names[NgVector==i],simplify=FALSE)
	names(NameList) <- paste("Ng",c(1:NumOfNg),sep="")
	NotNone <- NULL
	for (i in 1:NumOfNg) {
		if (length(NameList[[i]])!=0) 
			NotNone <- c(NotNone,names(NameList)[i])
		}
	NameList <- NameList[NotNone]
		
	NoneZeroLength <- length(NameList)
	DataList <- vector("list",NoneZeroLength)
	DataList <- sapply(1:NoneZeroLength , function(i) Data[NameList[[i]],],simplify=FALSE)
	names(DataList) <- names(NameList)
    
	NumEachGroup <- sapply(1:NoneZeroLength , function(i)dim(DataList[[i]])[1])
	# Unlist 
	DataList.unlist <- do.call(rbind, DataList)

	# Divide by SampleSize factor
	
	if(length(sizeFactors)==ncol(Data))
	DataList.unlist.dvd <- t(t( DataList.unlist)/sizeFactors)
	
	if(length(sizeFactors)!=ncol(Data))
	DataList.unlist.dvd <- DataList.unlist/sizeFactors
	
	MeanList <- rowMeans(DataList.unlist.dvd)

###############
# Input R
###############
	if (!is.null(RInput)){

	RNoZero <- RInput[NotAllZeroNames]
	names(RNoZero) <- rownames(Data)
	RNoZero.order <- RNoZero[rownames(DataList.unlist)]
	if(length(sizeFactors)==ncol(Data)){
		RMat <-  outer(RNoZero.order, sizeFactors)
	}
	if(!length(sizeFactors)==ncol(Data)){
		RMat <-  RNoZero.order* sizeFactors
	}
		
	DataListSP <- vector("list",nlevels(Conditions))
	RMatSP <- vector("list",nlevels(Conditions))

	for (lv in 1:nlevels(Conditions)){
		DataListSP[[lv]] <-  matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
		rownames(DataListSP[[lv]]) <- rownames(DataList.unlist)
		RMatSP[[lv]]= matrix(RMat[,Conditions==levels(Conditions)[lv]],nrow=dim(RMat)[1])
		rownames(RMatSP[[lv]]) <- rownames(RMat)
	
		}

	F0Log <- f0(Input=DataList.unlist, AlphaIn=Alpha, BetaIn=Beta, 
			 EmpiricalR=RMat, NumOfGroups=NumEachGroup, log=TRUE)
	F1Log <- f1(Input1=DataListSP[[1]], Input2=DataListSP[[2]], 
			 AlphaIn=Alpha, BetaIn=Beta, EmpiricalRSP1=RMatSP[[1]], 
			 EmpiricalRSP2=RMatSP[[2]], NumOfGroup=NumEachGroup, log=TRUE)
	F0LogMdf <- F0Log+600
	F1LogMdf <- F1Log+600
	F0Mdf <- exp(F0LogMdf)
	F1Mdf <- exp(F1LogMdf)
	if(!is.null(PInput)){
		z.list <- PInput*F1Mdf/(PInput*F1Mdf+(1-PInput)*F0Mdf)
		PIn <- PInput
	}
	if(is.null(PInput)){
		PIn <- .5
		PInput <- rep(NULL,maxround)
		for(i in 1:maxround){
			z.list <- PIn*F1Mdf/(PIn*F1Mdf+(1-PIn)*F0Mdf)
			zNaNName <- names(z.list)[is.na(z.list)]
			zGood <- which(!is.na(z.list))
			PIn <- sum(z.list[zGood])/length(z.list[zGood])
			PInput[i] <- PIn
		}
	
	zNaNName <- names(z.list)[is.na(z.list)]
	if(length(zNaNName)!=0){
	PNotIn <- rep(1-ApproxVal,length(zNaNName))
	MeanList.NotIn <- MeanList[zNaNName]
	R.NotIn.raw <- MeanList.NotIn*PNotIn/(1-PNotIn)
	if(length(sizeFactors)==ncol(Data))
			R.NotIn <- outer(R.NotIn.raw,sizeFactors)
	if(!length(sizeFactors)==ncol(Data))
			R.NotIn <- R.NotIn.raw*sizeFactors[zNaNName,]
	R.NotIn1 <- matrix(R.NotIn[,Conditions==levels(Conditions)[1]],nrow=nrow(R.NotIn))
    R.NotIn2 <- matrix(R.NotIn[,Conditions==levels(Conditions)[2]],nrow=nrow(R.NotIn))
	NumOfEachGroupNA <- sapply(1:NoneZeroLength, function(i)length(zNaNName%in%rownames(DataList[[i]])))
	F0LogNA <- f0(matrix(DataList.unlist[zNaNName,],ncol=ncol(DataList.unlist)),  Alpha, Beta, R.NotIn, NumOfEachGroupNA, log=TRUE)
    F1LogNA <- f1(matrix(DataListSP[[1]][zNaNName,],ncol=ncol(DataListSP[[1]])), 
			   matrix(DataListSP[[2]][zNaNName,],ncol=ncol(DataListSP[[2]])),
			   Alpha, Beta, R.NotIn1,R.NotIn2, NumOfEachGroupNA, log=TRUE)
	F0LogMdfNA <- F0LogNA+600
		F1LogMdfNA <- F1LogNA+600
		F0MdfNA <- exp(F0LogMdfNA)
		F1MdfNA <- exp(F1LogMdfNA)
		z.list.NotIn <- PIn*F1MdfNA/(PIn*F1MdfNA+(1-PIn)*F0MdfNA)
	z.list[zNaNName] <- z.list.NotIn
	F0Log[zNaNName] <- F0LogNA
	F1Log[zNaNName] <- F1LogNA
	}
	}
	RealName.Z.output <- z.list
	RealName.F0 <- F0Log
	RealName.F1 <- F1Log
	names(RealName.Z.output) <- IsoNamesIn
	names(RealName.F0) <- IsoNamesIn
	names(RealName.F1) <- IsoNamesIn


	output <- list(Alpha=Alpha,Beta=Beta,P=PInput, Z=RealName.Z.output,
			 PPDE=RealName.Z.output,f0=RealName.F0, f1=RealName.F1)
	return(output)
	
	}


	# Get FC and VarPool for pooling - Only works on 2 conditions
	if(ncol(Data)==2){
	DataforPoolSP.dvd1 <- matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[1]],nrow=dim(DataList.unlist)[1])	
	DataforPoolSP.dvd2 <- matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[2]],nrow=dim(DataList.unlist)[1])
	MeanforPoolSP.dvd1 <- rowMeans(DataforPoolSP.dvd1)
	MeanforPoolSP.dvd2 <- rowMeans(DataforPoolSP.dvd2)
	FCforPool <- MeanforPoolSP.dvd1/MeanforPoolSP.dvd2
	names(FCforPool) <- rownames(Data)
	FC_Use <- which(FCforPool>=quantile(FCforPool[!is.na(FCforPool)],PoolLower) & 
								  FCforPool<=quantile(FCforPool[!is.na(FCforPool)],PoolUpper))
	
	Var_FC_Use <- apply( DataList.unlist.dvd[FC_Use,],1,var )
	Mean_FC_Use <- (MeanforPoolSP.dvd1[FC_Use]+MeanforPoolSP.dvd2[FC_Use])/2
	MeanforPool <- (MeanforPoolSP.dvd1+MeanforPoolSP.dvd2)/2
	FC_Use2 <- which(Var_FC_Use>=Mean_FC_Use)
	Var_FC_Use2 <- Var_FC_Use[FC_Use2]
	Mean_FC_Use2 <- Mean_FC_Use[FC_Use2]
	Phi <- mean((Var_FC_Use2-Mean_FC_Use2)/Mean_FC_Use2^2)
	VarEst <- 	MeanforPool*(1+MeanforPool*Phi)
	if(Print==TRUE)cat(paste("No Replicate - estimate phi",round(Phi,5), "\n"))
	names(VarEst) = names(MeanforPoolSP.dvd1)=
	names(MeanforPoolSP.dvd2)=rownames(DataList.unlist.dvd)
	}

	#DataListSP Here also unlist.. Only two lists
	DataListSP <- vector("list",nlevels(Conditions))
	DataListSP.dvd <- vector("list",nlevels(Conditions))
	SizeFSP <- DataListSP
	MeanSP <- DataListSP
	VarSP <- DataListSP
	GetPSP <- DataListSP
	RSP <- DataListSP
	CISP <- DataListSP
	tauSP <- DataListSP
	NumSampleEachCon <- rep(NULL,nlevels(Conditions))

	for (lv in 1:nlevels(Conditions)){
		DataListSP[[lv]] <-  matrix(DataList.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist)[1])
		rownames(DataListSP[[lv]]) <- rownames(DataList.unlist)
		DataListSP.dvd[[lv]] <-  matrix(DataList.unlist.dvd[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
		NumSampleEachCon[lv] <- ncol(DataListSP[[lv]])

	if(ncol(DataListSP[[lv]])==1 & !is.null(CI)){
		CISP[[lv]] <- matrix(CI[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
		tauSP[[lv]] <- matrix(tau[,Conditions==levels(Conditions)[lv]],nrow=dim(DataList.unlist.dvd)[1])
	}
	# no matter sizeFactors is a vector or a matrix. Matrix should be columns are the normalization factors
	# may input one for each 
	if(length(sizeFactors)==ncol(Data))SizeFSP[[lv]] <- sizeFactors[Conditions==levels(Conditions)[lv]]
	if(length(sizeFactors)!= ncol(Data))SizeFSP[[lv]] <- sizeFactors[,Conditions==levels(Conditions)[lv]]
	
	
	MeanSP[[lv]] <- rowMeans(DataListSP.dvd[[lv]])
	
	if(length(sizeFactors)==ncol(Data))PrePareVar <- sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][i])
	if(length(sizeFactors)!=ncol(Data))PrePareVar <- sapply(1:ncol( DataListSP[[lv]]),function(i)( DataListSP[[lv]][,i]- SizeFSP[[lv]][,i]*MeanSP[[lv]])^2 /SizeFSP[[lv]][,i])

	if(ncol(DataListSP[[lv]])==1 & !is.null(CI))
		VarSP[[lv]] <- as.vector(((DataListSP[[lv]]/tauSP[[lv]]) * CISP[[lv]]/(CIthre*2))^2)
	if(ncol(DataListSP[[lv]])!=1){
		VarSP[[lv]] <- rowSums(PrePareVar)/ncol( DataListSP[[lv]])
		names(MeanSP[[lv]]) <- rownames(DataList.unlist)
		names(VarSP[[lv]]) <- rownames(DataList.unlist)
		GetPSP[[lv]] <- MeanSP[[lv]]/VarSP[[lv]]
		RSP[[lv]] <- MeanSP[[lv]]*GetPSP[[lv]]/(1-GetPSP[[lv]])
	}
}
	
	
	VarList <- apply(DataList.unlist.dvd, 1, var)
	if(ncol(Data)==2){
		PoolVar <- VarEst
		VarSP[[1]]=VarSP[[2]]=VarEst
		GetPSP[[1]] <- MeanSP[[1]]/VarEst
		GetPSP[[2]] <- MeanSP[[2]]/VarEst

	}
	if(!ncol(Data)==2){
		CondWithRep <- which(NumSampleEachCon>1)
		VarCondWithRep <- do.call(cbind,VarSP[CondWithRep])
		PoolVar <- rowMeans(VarCondWithRep)
	
	}
	GetP <- MeanList/PoolVar
	
    EmpiricalRList <- MeanList*GetP/(1-GetP) 
	EmpiricalRList[EmpiricalRList==Inf]	 <- max(EmpiricalRList[EmpiricalRList!=Inf])
#####################
	if(ncol(Data)!=2){
	Varcbind <- do.call(cbind,VarSP)
	VarrowMin <- apply(Varcbind,1,min)
	}

	if(ncol(Data)==2){
		Varcbind <- VarEst
		VarrowMin <- VarEst
		VarSP[[1]]=VarSP[[2]]=VarEst
		names(MeanSP[[1]]) <- names(VarSP[[1]])
		names(MeanSP[[2]]) <- names(VarSP[[2]])
	}
	# 
	# 
	GoodData <- names(MeanList)[EmpiricalRList>0 &  VarrowMin!=0 & EmpiricalRList!=Inf & !is.na(VarrowMin) & !is.na(EmpiricalRList)]
	NotIn <- names(MeanList)[EmpiricalRList<=0 | VarrowMin==0 | EmpiricalRList==Inf |  is.na(VarrowMin) | is.na(EmpiricalRList)]
	#print(paste("ZeroVar",sum(VarrowMin==0), "InfR", length(which(EmpiricalRList==Inf)), "Poi", length(which(EmpiricalRList<0)), ""))
	EmpiricalRList.NotIn <- EmpiricalRList[NotIn]
	EmpiricalRList.Good <- EmpiricalRList[GoodData]
	EmpiricalRList.Good[EmpiricalRList.Good<1] <- 1+EmpiricalRList.Good[EmpiricalRList.Good<1]
	if(length(sizeFactors)==ncol(Data)){
		EmpiricalRList.Good.mat <-  outer(EmpiricalRList.Good, sizeFactors)	
		EmpiricalRList.mat <-  outer(EmpiricalRList, sizeFactors)
	}
	if(!length(sizeFactors)==ncol(Data)){
	EmpiricalRList.Good.mat <- EmpiricalRList.Good* sizeFactors[GoodData,]
	EmpiricalRList.mat <- EmpiricalRList* sizeFactors
	}

	# Only Use Data has Good q's
	DataList.In <- sapply(1:NoneZeroLength, function(i)DataList[[i]][GoodData[GoodData%in%rownames(DataList[[i]])],],simplify=FALSE)
	DataList.NotIn <- sapply(1:NoneZeroLength, function(i)DataList[[i]][NotIn[NotIn%in%rownames(DataList[[i]])],],simplify=FALSE)
	DataListIn.unlist <- do.call(rbind, DataList.In)
	DataListNotIn.unlist <- do.call(rbind, DataList.NotIn)
	
	DataListSPIn <- vector("list",nlevels(Conditions))
	DataListSPNotIn <- vector("list",nlevels(Conditions))
	EmpiricalRList.Good.mat.SP = EmpiricalRList.mat.SP=vector("list",nlevels(Conditions))
	for (lv in 1:nlevels(Conditions)){
		DataListSPIn[[lv]] <-  matrix(DataListIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListIn.unlist)[1])
		if(length(NotIn)>0){	
			DataListSPNotIn[[lv]] <-  matrix(DataListNotIn.unlist[,Conditions==levels(Conditions)[lv]],nrow=dim(DataListNotIn.unlist)[1])
			rownames(DataListSPNotIn[[lv]]) <- rownames(DataListNotIn.unlist)
	}
		rownames(DataListSPIn[[lv]]) <- rownames(DataListIn.unlist)
		EmpiricalRList.Good.mat.SP[[lv]] <- matrix(EmpiricalRList.Good.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.Good.mat)[1])
		EmpiricalRList.mat.SP[[lv]] <- matrix(EmpiricalRList.mat[,Conditions==levels(Conditions)[lv]],nrow=dim(EmpiricalRList.mat)[1])
	}	

	NumOfEachGroupIn <- sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.In[[i]])[1]))
	NumOfEachGroupNotIn <- sapply(1:NoneZeroLength, function(i)max(0,dim(DataList.NotIn[[i]])[1]))
	

#################
# For output
#################
RealName.EmpiricalRList <- sapply(1:NoneZeroLength,function(i)EmpiricalRList[names(EmpiricalRList)%in%NameList[[i]]], simplify=FALSE)
RealName.MeanList <- sapply(1:NoneZeroLength,function(i)MeanList[names(MeanList)%in%NameList[[i]]], simplify=FALSE)
RealName.C1MeanList <- sapply(1:NoneZeroLength,function(i)MeanSP[[1]][names(MeanSP[[1]])%in%NameList[[i]]], simplify=FALSE)
RealName.C2MeanList <- sapply(1:NoneZeroLength,function(i)MeanSP[[2]][names(MeanSP[[2]])%in%NameList[[i]]], simplify=FALSE)
RealName.C1VarList <- sapply(1:NoneZeroLength,function(i)VarSP[[1]][names(VarSP[[1]])%in%NameList[[i]]], simplify=FALSE)
RealName.C2VarList <- sapply(1:NoneZeroLength,function(i)VarSP[[2]][names(VarSP[[2]])%in%NameList[[i]]], simplify=FALSE)
RealName.DataList <- sapply(1:NoneZeroLength,function(i)DataList[[i]][rownames(DataList[[i]])%in%NameList[[i]],], simplify=FALSE)



RealName.VarList <- sapply(1:NoneZeroLength,function(i)VarList[names(VarList)%in%NameList[[i]]], simplify=FALSE)
RealName.PoolVarList <- sapply(1:NoneZeroLength,function(i)PoolVar[names(PoolVar)%in%NameList[[i]]], simplify=FALSE)


RealName.QList1 <- sapply(1:NoneZeroLength,function(i)GetPSP[[1]][names(GetPSP[[1]])%in%NameList[[i]]], simplify=FALSE)
RealName.QList2 <- sapply(1:NoneZeroLength,function(i)GetPSP[[2]][names(GetPSP[[2]])%in%NameList[[i]]], simplify=FALSE)


for (i in 1:NoneZeroLength){
tmp <- NameList[[i]]
names <- IsoNamesIn[tmp]

RealName.MeanList[[i]] <- RealName.MeanList[[i]][NameList[[i]]]
RealName.VarList[[i]] <- RealName.VarList[[i]][NameList[[i]]]
RealName.QList1[[i]] <- RealName.QList1[[i]][NameList[[i]]]
RealName.QList2[[i]] <- RealName.QList2[[i]][NameList[[i]]]
RealName.EmpiricalRList[[i]] <- RealName.EmpiricalRList[[i]][NameList[[i]]]
RealName.C1MeanList[[i]] <- RealName.C1MeanList[[i]][NameList[[i]]]
RealName.C2MeanList[[i]] <- RealName.C2MeanList[[i]][NameList[[i]]]
RealName.PoolVarList[[i]] <- RealName.PoolVarList[[i]][NameList[[i]]]
RealName.C1VarList[[i]] <- RealName.C1VarList[[i]][NameList[[i]]]
RealName.C2VarList[[i]] <- RealName.C2VarList[[i]][NameList[[i]]]
RealName.DataList[[i]] <- RealName.DataList[[i]][NameList[[i]],]

names(RealName.MeanList[[i]]) <- names
names(RealName.VarList[[i]]) <- names
if(ncol(DataListSP[[1]])!=1){
	names(RealName.QList1[[i]]) <- names
	names(RealName.C1VarList[[i]]) <- names
}
if(ncol(DataListSP[[2]])!=1){
	names(RealName.QList2[[i]]) <- names
	names(RealName.C2VarList[[i]]) <- names
}

names(RealName.EmpiricalRList[[i]]) <- names
names(RealName.C1MeanList[[i]]) <- names
names(RealName.C2MeanList[[i]]) <- names
names(RealName.PoolVarList[[i]]) <- names
rownames(RealName.DataList[[i]]) <- names


}
  


#####################
# If Don need EM
#####################	
	if(!is.null(Alpha)&!is.null(Beta)){
	F0Log <- f0(Input=DataList.unlist, AlphaIn=Alpha, BetaIn=Beta, 
			 EmpiricalR=EmpiricalRList.mat, NumOfGroups=NumEachGroup, log=TRUE)
	F1Log <- f1(Input1=DataListSP[[1]], Input2=DataListSP[[2]], 
			 AlphaIn=Alpha, BetaIn=Beta, EmpiricalRSP1=EmpiricalRList.mat.SP[[1]], 
			 EmpiricalRSP2 <- EmpiricalRList.mat.SP[[2]], NumOfGroup=NumEachGroup, log=TRUE)
	F0LogMdf <- F0Log+600
	F1LogMdf <- F1Log+600
	F0Mdf <- exp(F0LogMdf)
	F1Mdf <- exp(F1LogMdf)
	if(!is.null(PInput)){
		z.list <- PInput*F1Mdf/(PInput*F1Mdf+(1-PInput)*F0Mdf)
		PIn <- PInput
	}
	if(is.null(PInput)){
		PIn <- .5
		PInput <- rep(NULL,maxround)
		for(i in 1:maxround){
			z.list <- PIn*F1Mdf/(PIn*F1Mdf+(1-PIn)*F0Mdf)
			zNaNName <- names(z.list)[is.na(z.list)]
			zGood <- which(!is.na(z.list))
			PIn <- sum(z.list[zGood])/length(z.list[zGood])
			PInput[i] <- PIn
		}
	
	zNaNName <- names(z.list)[is.na(z.list)]
	if(length(zNaNName)!=0){
	PNotIn <- rep(1-ApproxVal,length(zNaNName))
	MeanList.NotIn <- MeanList[zNaNName]
	R.NotIn.raw <- MeanList.NotIn*PNotIn/(1-PNotIn)
	if(length(sizeFactors)==ncol(Data))
			R.NotIn <-outer(R.NotIn.raw,sizeFactors)
	if(!length(sizeFactors)==ncol(Data))
			R.NotIn <- R.NotIn.raw*sizeFactors[zNaNName,]
	R.NotIn1 <- matrix(R.NotIn[,Conditions==levels(Conditions)[1]],nrow=nrow(R.NotIn))
    R.NotIn2 <- matrix(R.NotIn[,Conditions==levels(Conditions)[2]],nrow=nrow(R.NotIn))
	NumOfEachGroupNA <- sapply(1:NoneZeroLength, function(i)length(zNaNName%in%rownames(DataList[[i]])))
	
    
      
    
    F0LogNA <- f0(matrix(DataList.unlist[zNaNName,], ncol = ncol(DataList.unlist)), Alpha, Beta, R.NotIn, NumOfEachGroupNA, log=TRUE)
    F1LogNA <- f1(matrix(DataListSP[[1]][zNaNName,],ncol=ncol(DataListSP[[1]])), 
               matrix(DataListSP[[2]][zNaNName,],ncol=ncol(DataListSP[[2]])),
			   Alpha, Beta, R.NotIn1,R.NotIn2, NumOfEachGroupNA, log=TRUE)
	F0LogMdfNA <- F0LogNA+600
		F1LogMdfNA <- F1LogNA+600
		F0MdfNA <- exp(F0LogMdfNA)
		F1MdfNA <- exp(F1LogMdfNA)
		z.list.NotIn <- PIn*F1MdfNA/(PIn*F1MdfNA+(1-PIn)*F0MdfNA)
	z.list[zNaNName] <- z.list.NotIn
	F0Log[zNaNName] <- F0LogNA
	F1Log[zNaNName] <- F1LogNA
	}
	}
	RealName.Z.output <- z.list
	RealName.F0 <- F0Log
	RealName.F1 <- F1Log
	names(RealName.Z.output) <- IsoNamesIn
	names(RealName.F0) <- IsoNamesIn
	names(RealName.F1) <- IsoNamesIn


	output <- list(Alpha=Alpha,Beta=Beta,P=PInput, Z=RealName.Z.output,
			 RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, 
			 VarList=RealName.VarList, QList1=RealName.QList1, QList2=RealName.QList2, 
			 C1Mean=RealName.C1MeanList, C2Mean=RealName.C2MeanList,
			 C1EstVar=RealName.C1VarList, C2EstVar=RealName.C2VarList, 
			 PoolVar=RealName.PoolVarList , DataList=RealName.DataList,
			 PPDE=RealName.Z.output,f0=RealName.F0, f1=RealName.F1)
	return(output)
	}
   
###################
# If only need R
###################
	if(OnlyCalcR==TRUE){
	output <- list(
			 RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, 
			 VarList=RealName.VarList, QList1=RealName.QList1, QList2=RealName.QList2, 
			 C1Mean=RealName.C1MeanList, C2Mean=RealName.C2MeanList,
			 C1EstVar=RealName.C1VarList, C2EstVar=RealName.C2VarList, 
			 PoolVar=RealName.PoolVarList , DataList=RealName.DataList)
	return(output)
	}


#####################	
#Initialize SigIn & ...
#####################	
	AlphaIn <- 0.5
	BetaIn <- rep(0.5,NoneZeroLength)
	PIn <- 0.5


#####################	
# EM
#####################	
	UpdateAlpha <- NULL
	UpdateBeta <- NULL
	UpdateP <- NULL
	UpdatePFromZ <- NULL
    Timeperround <- NULL 
	for (times in 1:maxround){
    	temptime1 <- proc.time()
		UpdateOutput <- suppressWarnings(LogN(DataListIn.unlist,DataListSPIn, EmpiricalRList.Good.mat ,EmpiricalRList.Good.mat.SP,  NumOfEachGroupIn, AlphaIn, BetaIn, PIn, NoneZeroLength))
    	#cat(paste("iteration", times, "done \n",sep=" "))
		AlphaIn <- UpdateOutput$AlphaNew
    	BetaIn <- UpdateOutput$BetaNew
    	PIn <- UpdateOutput$PNew
		PFromZ <- UpdateOutput$PFromZ
    	F0Out <- UpdateOutput$F0Out
		F1Out <- UpdateOutput$F1Out
		UpdateAlpha <- rbind(UpdateAlpha,AlphaIn)
   		UpdateBeta <- rbind(UpdateBeta,BetaIn)
    	UpdateP <- rbind(UpdateP,PIn)
		UpdatePFromZ <- rbind(UpdatePFromZ,PFromZ)
		temptime2 <- proc.time()
		Timeperround <- c(Timeperround,temptime2[3]-temptime1[3])
		cat(paste("time" ,round(Timeperround[times],2),"\n",sep=" "))
		Z.output <- UpdateOutput$ZNew.list[!is.na(UpdateOutput$ZNew.list)]
   		Z.NA.Names <- UpdateOutput$zNaNName
		}
		#Remove this } after testing!!
		 
#    	if (times!=1){  
#        	if((UpdateAlpha[times]-UpdateAlpha[times-1])^2+UpdateBeta[times]-UpdateBeta[times-1])^2+UpdateR[times]-UpdateR[times-1])^2+UpdateP[times]-UpdateP[times-1])^2<=10^(-6)){ 
#           		Result=list(Sig=SigIn, Miu=MiuIn, Tau=TauIn)
#           		break
#        }
#    }
#}

##########Change Names############
## Only z are for Good Ones
GoodData <- GoodData[!GoodData%in%Z.NA.Names]
IsoNamesIn.Good <- IsoNamesIn[GoodData]
RealName.Z.output <- Z.output
RealName.F0 <- F0Out
RealName.F1 <- F1Out
names(RealName.Z.output) <- IsoNamesIn.Good
names(RealName.F0) <- IsoNamesIn.Good
names(RealName.F1) <- IsoNamesIn.Good



#########posterior part for other data set here later############
AllNA <- unique(c(Z.NA.Names,NotIn))
z.list.NotIn <- NULL
AllF0 <- c(RealName.F0)
AllF1 <- c(RealName.F1)
AllZ <- RealName.Z.output

if (length(AllNA)>0){
	Ng.NA <- NgVector[AllNA]
	AllNA.Ngorder <- AllNA[order(Ng.NA)]
	NumOfEachGroupNA <- rep(0,NoneZeroLength)
	NumOfEachGroupNA.tmp <- tapply(Ng.NA,Ng.NA,length)
	names(NumOfEachGroupNA) <- c(1:NoneZeroLength)
	NumOfEachGroupNA[names(NumOfEachGroupNA.tmp)] <- NumOfEachGroupNA.tmp
	PNotIn <- rep(1-ApproxVal,length(AllNA.Ngorder))
	MeanList.NotIn <- MeanList[AllNA.Ngorder]
	R.NotIn.raw <- MeanList.NotIn*PNotIn/(1-PNotIn) 
	if(length(sizeFactors)==ncol(Data))
	R.NotIn <- outer(R.NotIn.raw,sizeFactors)
	if(!length(sizeFactors)==ncol(Data))
	R.NotIn <- R.NotIn.raw*sizeFactors[NotIn,]
	R.NotIn1 <- matrix(R.NotIn[,Conditions==levels(Conditions)[1]],nrow=nrow(R.NotIn))
	R.NotIn2 <- matrix(R.NotIn[,Conditions==levels(Conditions)[2]],nrow=nrow(R.NotIn))
    
	DataListNotIn.unlistWithZ <- matrix(DataList.unlist[AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
	DataListSPNotInWithZ <- vector("list",nlevels(Conditions))
	for (lv in 1:nlevels(Conditions)) 
		DataListSPNotInWithZ[[lv]]  <-  matrix(DataListSP[[lv]][AllNA.Ngorder,],nrow=length(AllNA.Ngorder))
		F0Log <- f0(DataListNotIn.unlistWithZ,  AlphaIn, BetaIn, R.NotIn, NumOfEachGroupNA, log=TRUE)
    	F1Log <- f1(DataListSPNotInWithZ[[1]], DataListSPNotInWithZ[[2]], AlphaIn, BetaIn, R.NotIn1,R.NotIn2, NumOfEachGroupNA, log=TRUE)
		F0LogMdf <- F0Log+600
		F1LogMdf <- F1Log+600
		F0Mdf <- exp(F0LogMdf)
		F1Mdf <- exp(F1LogMdf)
		z.list.NotIn <- PIn*F1Mdf/(PIn*F1Mdf+(1-PIn)*F0Mdf)
#	names(z.list.NotIn)=IsoNamesIn.Good=IsoNamesIn[which(Names%in%NotIn)]
	names(z.list.NotIn) <- IsoNamesIn[AllNA.Ngorder]

	AllZ <- c(RealName.Z.output,z.list.NotIn)
	AllZ <- AllZ[IsoNamesIn]
	AllZ[is.na(AllZ)] <- 0
	F0.NotIn <- F0Log
	F1.NotIn <- F1Log
	names(F0.NotIn) <- IsoNamesIn[names(F0Log)]
    names(F1.NotIn) <- IsoNamesIn[names(F1Log)]
	AllF0 <- c(RealName.F0,F0.NotIn)
	AllF1 <- c(RealName.F1,F1.NotIn)
	AllF0 <- AllF0[IsoNamesIn]
	AllF1 <- AllF1[IsoNamesIn]
	AllF0[is.na(AllF0)] <- 0
	AllF1[is.na(AllF1)] <- 0
}
PPMatNZ <- cbind(1-AllZ,AllZ)
colnames(PPMatNZ) <- c("PPEE","PPDE")
rownames(UpdateAlpha) <- paste("iter",1:nrow(UpdateAlpha),sep="")
rownames(UpdateBeta) <- paste("iter",1:nrow(UpdateBeta),sep="")
rownames(UpdateP) <- paste("iter",1:nrow(UpdateP),sep="")
rownames(UpdatePFromZ) <- paste("iter",1:nrow(UpdatePFromZ),sep="")
colnames(UpdateBeta) <- paste("Ng",1:ncol(UpdateBeta),sep="")

CondOut <- levels(Conditions)
names(CondOut) <- paste("Condition",c(1:length(CondOut)),sep="")

PPMat <- matrix(NA,ncol=2,nrow=nrow(Dataraw))
rownames(PPMat) <- rownames(Dataraw)
colnames(PPMat) <- c("PPEE","PPDE")
if(is.null(AllZeroNames))PPMat <- PPMatNZ
if(!is.null(AllZeroNames))PPMat[names(NotAllZeroNames),] <- PPMatNZ[names(NotAllZeroNames),]


#############Result############################
Result <- list(Alpha=UpdateAlpha,Beta=UpdateBeta,P=UpdateP,
			PFromZ=UpdatePFromZ, Z=RealName.Z.output,PoissonZ=z.list.NotIn, 
			RList=RealName.EmpiricalRList, MeanList=RealName.MeanList, 
			VarList=RealName.VarList, QList1=RealName.QList1, QList2=RealName.QList2, 
			C1Mean=RealName.C1MeanList, C2Mean=RealName.C2MeanList,C1EstVar=RealName.C1VarList, 
			C2EstVar=RealName.C2VarList, PoolVar=RealName.PoolVarList , 
			DataList=RealName.DataList,PPDE=AllZ,f0=AllF0, f1=AllF1,
			AllZeroIndex=AllZeroNames,PPMat=PPMatNZ, PPMatWith0=PPMat,
			ConditionOrder=CondOut, Conditions=Conditions)
}
Any scripts or data that you put into this service are public.
EBSeqHMM documentation built on Nov. 8, 2020, 5:22 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EBSeqHMM
Bayesian analysis for identifying gene or isoform expression changes in ordered RNA-seq experiments

R/EBTest_ext.R
In EBSeqHMM: Bayesian analysis for identifying gene or isoform expression changes in ordered RNA-seq experiments

Defines functions EBTest_ext

Documented in EBTest_ext

Try the EBSeqHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBSeqHMM Bayesian analysis for identifying gene or isoform expression changes in ordered RNA-seq experiments

R/EBTest_ext.R In EBSeqHMM: Bayesian analysis for identifying gene or isoform expression changes in ordered RNA-seq experiments

Defines functions EBTest_ext

Documented in EBTest_ext

Try the EBSeqHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBSeqHMM
Bayesian analysis for identifying gene or isoform expression changes in ordered RNA-seq experiments

R/EBTest_ext.R
In EBSeqHMM: Bayesian analysis for identifying gene or isoform expression changes in ordered RNA-seq experiments