BATools: BATools: An R package that extends 'Bayesian Alphabet' for Whole Genome Prediction and Genome-wide association study

##calculate the variance explained by each window
calc.varw<-function(win,Z,SNPeff){
	len_win=length(win)
	varw=rep(0,len_win)
	start=1
	nanim=dim(Z)[1]
	for(k in 1:len_win){
		end=start+win[k]-1
		if(start==end) gw=Z[,start]*as.numeric(SNPeff[start]) else gw=Z[,start:end]%*%as.numeric(SNPeff[start:end])
		varw[k]=sum(gw*gw)/nanim-(sum(gw)/nanim)^2
		start=end+1
	}
	varw
}

##calculate the posterior probability of each window contains at least one non-zero effect for variable selection methods 
calc.phiw<-function(win,phi){
	start=1
	lenw=length(win)
	phiw=rep(0,lenw)
	for(k in 1:lenw)
	{
		end=start+win[k]-1
		phiw[k]=(sum(phi[start:end])>=1)
		if(k!=lenw) start=end+1
	}
	phiw
}


#Main function for Bayesian whole genome regression when all individuals are genotyped
BayesM <- function(op=NULL,y=NULL,Z=NULL,X=NULL,vtrain=NULL,GWA=NULL,map=NULL) 
{   # START OF FUNCTION
	pi_math = 3.14159265359	
	set.seed(op$seed)  
	whichNa=which(vtrain==FALSE)
	Z0=Z
	y0=y
	X0=X
	if(length(whichNa)>0)
	{
		y=y[-whichNa]
		Z=Z[-whichNa,]
		X=X[-whichNa,]
	}

	if(GWA=="Win") {
	  win=c()
	  for(j in 1:max(map$idw)){
	    win=append(win,sum(map$idw==j))
	  }
	  chr=map %>% distinct(chr,idw) %>% select(chr) %>% t %>% as.numeric
	}else{
	  win=NULL
	  chr=NULL
	}
	# Vectors for saved samples of hyperparameters
	if (op$update_para$df)    {defsave   =  array(0,op$run_para$niter/op$run_para$skip) }
	 {scalesave =  array(0,op$run_para$niter/op$run_para$skip) }
	if (op$update_para$pi)     {pisave    =  array(0,op$run_para$niter/op$run_para$skip) } 
	varesave = array(0,op$run_para$niter/op$run_para$skip)

	# To monitor MH acceptance ratios on degrees of freedom parameter
	alphadef_save = array(0,(op$run_para$niter-op$run_para$burnIn)/op$run_para$skip)
	# TO MONITOR MH ACCEPTANCE RATIOS ON SCALE PARAMETER (WHERE NECESSARY) POST BURN-IN
	alphascalea_save = array(0,(op$run_para$niter-op$run_para$burnIn)/op$run_para$skip)
	# input data
	

	
	
	nx=rownames(X)
	ng=rownames(Z)
	np=names(y)
	idx <- Reduce(intersect, list(nx,ng,np))
	if(dim(X)[2]==1) X=as.matrix(X[idx,],ncol=1) else X=X[idx,]
	y=y[idx]
	Z=Z[idx,]
	
	dimX=dim(X)[2]
    nSNP=dim(Z)[2]  #number of SNP
    nanim=dim(Z)[1]; #number of animals
	nrecords = length(y)
  	G1=nSNP

  	W = as.matrix(cbind(X,Z))
  	Wstacked=as.vector(W);

	# specify prior degrees of belief on genetic variance
	# specify  prior scale on genetic variance
	# inital values   

	def=op$init$df			 # Starting Bayes A df 
	scalea  = op$init$scale    	 # Starting scale parameter
	Pi_SNP = op$init$pi                   # Starting pi parameter   (probability of being a gene!)
	cdef       = op$priors$cdef		 # cdef is scale parameter for proposal density on df
	cscalea     = op$priors$cscalea
	alphacheck = 0
	alphaQTLvar= 0			 # to monitor BayesB QTLvar rejection rates 
	alphatot1= 0
	if (op$update_para$scale & op$update_para$df) alphatot2= 0
	WWdiag     = apply(W,2,crossprod) # diagonals are important

	# used to monitor Metropolis Hastings acceptance ratio for df
	dimW     = dim(W)[2]  # number of columns = # of location parameters
	# initialize a few arrays
	theta      = array(0,dimW)
	SNPeff     = array(0,nSNP)
	if(op$model=="SSVS"){
	    hratio=array(0,nSNP)
	    loghratio=array(0,nSNP)
	    hratiosave=c()
		  c=op$init$c
	}
	varq       = array(scalea,nSNP)  # initialize each SNP variance to overall scale parameter
	SNPvar     = array(0,nSNP)       # used to take running sum of SNP specific variances
	SNPvarlast = array(0,nSNP)
	postprob   = array(0,nSNP)       # indicator variable for inclusion of SNP in model
                                    # later used to determine posterior probability...unique to BayesB and SSVS
	postprob_save=array(0,nSNP)
	if(GWA=="Win" && op$model=="BayesB")
	{
	  p1=array(0,nSNP)
	  p2=array(0,nSNP)
	  pw=array(0,nSNP)
	}
	alphametrovar = array(0,nSNP)    # used to monitor MH acceptance rates for included genes.  

	# This is for computing mean and variance of each SNP effect.
	Mlast =    array(0,dimW)
	Qlast =    array(0,dimW)
	Mcurrent = array(0,dimW)
	Qcurrent = array(0,dimW)
	storeDbarj<-c()

	# adjust y
	ycorr = y - as.vector(W%*%theta)   # adjust obs by everything before starting iterations
	# so this is the residual.

	# use this to set up the starting value for the residual variance
	if(is.null(op$init$vare)) vare = crossprod(ycorr)/(nrecords-dimX) else vare=op$init$vare

	starttime<-proc.time()
	timeSNP = 0
	timetotal=0

	# mcmc sampling
	for (iter in 1:op$run_para$niter) 
	{  #"START" OF MCMC SAMPLING

     	itersave = iter-op$run_para$burnIn
     	if (itersave ==1) 
     	{
        	alphametrovar = array(0,nSNP)  # reinitialize to 0 post burn-in
        	if(op$model=="BayesB") postprob = array(0,nSNP) # reinitialize to 0 post burn-in
     	}

        #####################################################################################
        # sample intercept
        #####################################################################################
     	for(i in 1:dimX)
        {
   			ycorr    = ycorr + W[,i]*theta[i]
   			rhs=t(W[,i])%*%ycorr/vare# 1'(y-W*theta)/sigmae
   			lhs=WWdiag[i]/vare
   			invLhs=1.0/lhs		
           	meancond = rhs*invLhs                          
           	theta[i] = rnorm(1,meancond,sqrt(invLhs))       
           	ycorr    = ycorr - W[,i]*theta[i]  # take it off again to create new "residual"	
        }	

     	#####################################################################################	
     	# sample variance & effect for each SNP (BayesA)	
     	####################################################################################
		if (Pi_SNP == 1)  
		{ # "START" OF BAYESA
			#####################################################################################
			# sample variance for each SNP (Bayes A)	
			#####################################################################################			
			if(op$model=="BayesA") varq = (scalea*def + SNPeff*SNPeff)/rchisq(nSNP,def+1) else varq=rep(scalea,nSNP)
			#####################################################################################
			# sample effect for each SNP (Bayes A)	
			#####################################################################################
			startSNP = proc.time()
			BayesC<- .Call("BayesACL",nSNP,dimX,nanim,Wstacked,WWdiag,theta,ycorr,varq,vare)
			theta=BayesC[[1]]
			ycorr=BayesC[[2]]
			SNPeff=theta[-(1:dimX)]
			G1=nSNP
		}   # "END" FOR BAYESA

		#####################################################################################
		# sample effect for each SNP (Bayes B)	
		#####################################################################################
		if (Pi_SNP < 1 )  
		{   # "START" OF BAYESB
			startSNP = proc.time()
			if(op$model=="BayesB"){
				G1=0
				#####################################################################################
				# sample effect and variance for each SNP (Bayes B)	
				#####################################################################################
				BayesB<-.Call("BayesBC",nSNP,dimX,nanim,Wstacked,WWdiag,theta,ycorr,varq,vare,postprob,scalea,def,alphametrovar,iter,Pi_SNP,G1) 
				theta=BayesB[[1]]
				ycorr=BayesB[[2]]
				SNPeff=theta[-(1:dimX)]
				p1=BayesB[[3]]
				postprob=BayesB[[3]]
			  	postprob_save=postprob_save+postprob 
				postprob   = array(0,nSNP) 
				alphametrovar=BayesB[[4]]
				G1=BayesB[[5]][1] #add G1
				alpha1=BayesB[[6]][1]
				alphacheck=BayesB[[7]][1]	
			}
			if(op$model=="SSVS"){
				#####################################################################################
				# sample effect and variance for each SNP (Bayes C)	
				#####################################################################################
		        BayesC<- .Call("BayesCC",nSNP,dimX,nanim,Wstacked,WWdiag, theta, ycorr,rep(scalea,nSNP),vare,Pi_SNP,postprob,hratio,c)
		        theta=BayesC[[1]]
		        ycorr=BayesC[[2]]
		        SNPeff=theta[-(1:dimX)]
		        postprob=BayesC[[3]]
		        if(itersave>0) postprob_save=postprob_save+postprob
			}
	 		 	
		} # "END" OF BAYESB/SSVS

		if (itersave>1) 
		{     # "START" OF COMPUTE POSTERIOR MEANS & VARIANCES
			Mcurrent = Mlast + (theta-Mlast)/itersave
			Qcurrent = Qlast + (itersave-1)*((theta-Mlast)^2)/itersave
			Mlast = Mcurrent
			Qlast = Qcurrent 

			SNPvar=SNPvarlast + (varq-SNPvarlast)/itersave
			SNPvarlast=SNPvar
			if (iter%%op$run_para$skip == 0) {
			  if(op$model %in% c("rrBLUP","BayesA")){
			    count=count+(SNPeff>0)
			    if(GWA=="Win"){
			      g_total<-Z%*%SNPeff
			      var_total=sum(g_total*g_total)/nanim-(sum(g_total)/nanim)^2
			      varw=calc.varw(win,Z,SNPeff)
			      qw=varw/var_total
			      countqw=countqw+(qw>0.01)
			    }
			  }
			 if(op$model %in% c("SSVS","BayesB") && GWA=="Win"){
			   if(op$model=="SSVS") Wpostprob=Wpostprob+calc.phiw(win,postprob) else Wpostprob=Wpostprob+calc.phiw(win,p1)
			   n_saved_sample=n_saved_sample+1
			   #cat("\n p2",p2,"\n Wpb",Wpostprob,"\n")
			  } 
			} 
		}  # "END" OF COMPUTE POSTERIOR MEANS & VARIANCES
		if (itersave==1) {
			Mlast = theta;SNPvarlast=varq;
		
			  if(op$model %in% c("rrBLUP","BayesA")){
			    count=rep(0,nSNP)
			    if(GWA=="Win") countqw=rep(0,length(win))
			  }
			  if(op$model %in% c("SSVS","BayesB") && GWA=="Win"){
			    Wpostprob=rep(0,length(win))
			    n_saved_sample=0
			  } 
			
		} 

		#####################################################################################	
		#   sample residual variance
		#####################################################################################
     if(op$update_para$vare) vare = ( crossprod(ycorr) +op$priors$tau2_e )/rchisq(1,nrecords+op$priors$nu_e)   # prior on vare used
		#  scale parameter should be e`e + Se,
		#  degrees of freedom should be n + ve

    	#####################################################################################
     	# sample degrees of freedom
     	#####################################################################################
     	if (op$update_para$df)  
		{  #"START" OF SAMPLE THE DEGREES OF FREEDOM
			SNPeff2=SNPeff*SNPeff
			for (cycle in 1:10){
		        mhdf=metropDf(def, scalea, G1=nSNP, SNPeff2, priordf = 1, cdef , max = Inf,pi_math=pi_math)
		        def=as.numeric(mhdf[[1]])
		        alpha1=as.numeric(mhdf[[2]])
		        alphatot1=alphatot1+alpha1
				if (cycle==10 & iter <= op$run_para$burnIn)
				{
				 	alphacheck1 = alphatot1/10;
				 	if (alphacheck1 > .70) { cdef = cdef*1.2} 
					else if(alphacheck1  < .20) {cdef = cdef*.7}     # tune Metropolis sampler #
					alphatot1 = 0;
				}
				else if (cycle==10 & iter > op$run_para$burnIn) 
				{
				 	alphacheck1 = alphatot1/10;
				 	alphadef_save[itersave/op$run_para$skip] = alphacheck1
				 	alphatot1 = 0
				}
			}
     	}   # "END" OF SAMPLE THE DEGREES OF FREEDOM  

        if(op$model=="SSVS"){
	        #####################################################################################
	        #  sample scale#
	        #  based on Gelman Uniform(0,A) prior on scale parameter for "large" A.
	        #####################################################################################
	        if (op$update_para$scale) 
	         {  #"START" OF SAMPLE THE SCALE PARAMETER 
	              #shape_g = 0.5*(nSNP+5)
	   	   	   	  tmp=SNPeff*SNPeff/((1-postprob)/c+postprob)
	              #scale_g = 0.5*(sum(tmp)+0.04)
	              scalea =  (sum(tmp))/rchisq(1,nSNP)
	   	    	  #rgamma(1,shape=shape_g,scale=scale_g)
	         } #"END" OF SAMPLE THE SCALE PARAMETER 
        }else if(op$model=="rrBLUP"){
	        if (op$update_para$scale) 
	         {  #"START" OF SAMPLE THE SCALE PARAMETER 
	              #shape_g = 0.5*(nSNP+5)
	   	   	   	  tmp=SNPeff*SNPeff
	              #scale_g = 0.5*(sum(tmp)+0.04)
	              scalea =  (sum(tmp))/rchisq(1,nSNP)
	   	    	  #rgamma(1,shape=shape_g,scale=scale_g)
	         } #"END" OF SAMPLE THE SCALE PARAMETER 
        }else if(op$model%in%c("BayesA","BayesB")){
	        #####################################################################################
	        #  sample scale#
	        #  based on Gelman Uniform(0,A) prior on scale parameter for "large" A.
	        #####################################################################################
	        if (op$update_para$scale & !(op$update_para$df)) 
	         {  #"START" OF SAMPLE THE SCALE PARAMETER 
	              shape_g = 0.5*(G1*def)+op$priors$shape_scale
	   		        scale_g = 0.5*def*sum(1/varq[which(varq>0)])+op$priors$rate_scale
	              scalea = rgamma(1,shape=shape_g,rate=scale_g)
	         } #"END" OF SAMPLE THE SCALE PARAMETER   
		     
	    	#####################################################################################
	     	#  sample scale usning UNIMH#
	     	#####################################################################################
			if (op$update_para$scale & op$update_para$df)       	  
			{  #"START" OF SAMPLE THE SCALE PARAMETER 
		 		SNPeff2=SNPeff*SNPeff
				for (cycle in 1:10){
					mhscale=metropScale(def, scalea, G1=G1, SNPeff2, cscalea,op$priors$shape_scale, op$priors$rate_scale, pi_math=pi_math)
					scalea=as.numeric(mhscale[[1]])
					alpha2=as.numeric(mhscale[[2]])
					alphatot2=alphatot2+alpha2
					if (cycle==10 & iter <= op$run_para$burnIn)
					{
						alphacheck2 = alphatot2/10;
						if (alphacheck2 > .70) { cscalea = cscalea*1.2}
						else if(alphacheck2  < .20) {cscalea = cscalea*.7}     # tune Metropolis sampler #
						alphatot2 = 0;
					}
					else if (cycle==10 & iter > op$run_para$burnIn) 
					{
						alphacheck2 = alphatot2/10;
						alphascalea_save[itersave/op$run_para$skip] = alphacheck2
						alphatot2 = 0
					}
				}
	      	} #"END" OF SAMPLE THE SCALE PARAMETER 
		}
		#####################################################################################
		#      sample pi
		#####################################################################################
		if (op$update_para$pi) 
		{ 
			if(op$model=="BayesB") 	Pi_SNP = rbeta(1,op$priors$alphapi+G1,op$priors$betapi+nSNP-G1) 
			if(op$model=="SSVS") {
		 	   m1=sum(postprob)
		       Pi_SNP=rbeta(1,op$priors$alphapi+m1,op$priors$betapi+nSNP-m1)
			}	
		} #"END" OF SAMPLE THE PI PARAMETER 

		###########################################################################################
		#store MCMC samples at every "skip"
		###########################################################################################
		if (iter%%op$run_para$skip == 0) 
		{
			varesave[iter/op$run_para$skip] = vare
			scalesave[iter/op$run_para$skip] = scalea
			if(op$update_para$df) {defsave[iter/op$run_para$skip] = def}
			
			if(op$update_para$pi) {pisave[iter/op$run_para$skip] = Pi_SNP}
			
		}

		endSNP = proc.time()
		timeSNP = timeSNP + (endSNP[1]-startSNP[1]) 
		endtime = proc.time()
		timetotal = endtime[1]-starttime[1]
		
		if((op$print_mcmc$piter!=0) & iter%%op$print_mcmc$piter==0)
		{
		  if(op$model %in% c("rrBLUP","BayesA")){
		    tmp=paste("iter= ",iter," vare= ",round(vare,6), "scale= ", round(scalea,8),
		              "\ntimepercycle= ", round(timeSNP/iter,3), "estimated time left=", 
		              round(timeSNP/iter*(op$run_para$niter-iter),2),"seconds \n",sep=" ")
		  }
		  if(op$model %in% c("SSVS","BayesB")){
		    tmp=paste("iter= ",iter," vare= ",round(vare,6), "scale= ", round(scalea,6),"pi= ",round(Pi_SNP,6),
		              "\ntimepercycle= ", round(timeSNP/iter,3), "estimated time left=", 
		              round(timeSNP/iter*(op$run_para$niter-iter),2),"seconds \n",sep=" ")
		  } 
		 	if(op$print_mcmc$print_to=="screen")
			{
				cat(tmp)
			}else{
			write(tmp, file = paste("log",op$seed,".txt",sep=""),append = TRUE)
			}
		}
		
    if (iter%%1000==0) 
    {
			#############################################################################################
			#    PROCESSING MCMC SAMPLES
			#############################################################################################
      	  	hyperparameters=c()
      		counth=0
       	 	hyperparameters = cbind(hyperparameters,varesave);counth=counth+1
			hyperparameters = cbind(hyperparameters,scalesave) ;counth=counth+1
			if (op$update_para$df) {hyperparameters = cbind(hyperparameters,defsave);counth=counth+1}
			if (op$update_para$pi) {hyperparameters = cbind(hyperparameters,pisave) ;counth=counth+1}
			if(counth>0) rownames(hyperparameters) = seq(op$run_para$skip,op$run_para$niter,op$run_para$skip)
			save(iter,hyperparameters,file=paste(op$save.at, "Hyperparameters",op$seed,".RData",sep="")) 
			SNPtimepercycle = timeSNP/iter
			timepercycle=timetotal/iter
			save(iter,timepercycle,SNPtimepercycle,file=paste(op$save.at, "time",op$seed,".RData",sep=""))

    }# "END" OF PROCESSING MCMC SAMPLING
               
		if (iter%%1000==0 & itersave>1 ) 
		{
		#############################################################################################
		#    PROCESSING MCMC SAMPLES
		#############################################################################################
		  if(op$model %in% c("rrBLUP","BayesA")){
		    tmp=count/(itersave/run_para$skip)
		    bpvalue=2*apply(cbind(tmp,1-tmp),1,min)
		    if(GWA=="Win") Wprob=countqw/(itersave/run_para$skip) else Wprob=NULL
		  }
		  if(op$model %in% c("SSVS","BayesB")){
		    #if(op$model=="SSVS") postprob_est   = postprob_save/itersave else postprob_est=postprob/itersave
		    postprob_est   = postprob_save/itersave
		    if(GWA=="Win") Wpostprob_est =Wpostprob/n_saved_sample else Wpostprob_est=NULL
		    names(postprob_est)=colnames(Z)
		  } 
		  meanmu=Mcurrent[1:dimX]
			names(meanmu)=colnames(X)
			meang= Mcurrent[(dimX+1):dimW]
			names(meang)=colnames(Z)
			varg = Qcurrent[(dimX+1):dimW]/itersave
			names(varg)=colnames(Z)
			
 	    save(iter,alphadef_save,alphascalea_save,file=paste(op$save.at, "alphadef",op$seed,".RData",sep=""))
 			
 			if(op$model %in% c("BayesB","SSVS")) save(iter,meanmu,meang,postprob_est,varg,SNPvar,file = paste(op$save.at,"EffectsResults",op$seed,".RData",sep=""))
 				else save(iter,meanmu,meang,varg,SNPvar,bpvalue,Wprob,file = paste(op$save.at,"EffectsResults",op$seed,".RData",sep=""))
 			
		} # "END" OF PROCESSING MCMC SAMPLING

  } # "END" OF MCMC SAMPLING



	names_hypers=c()
	 names_hypers=c(names_hypers,"vare")
	 names_hypers = c(names_hypers,"scale") 
	if (op$update_para$df) {names_hypers = c(names_hypers,"df") }
	if (op$update_para$pi) {names_hypers = c(names_hypers,"pi") }

	sample_idx=c((op$run_para$burnIn/op$run_para$skip+1):(op$run_para$niter/op$run_para$skip))
	if(length(names_hypers)>1) hyper_est=apply(hyperparameters[sample_idx,],2,mean)
	else if (length(names_hypers)==1) hyper_est=mean(hyperparameters[sample_idx,])
	else hyper_est=c(vare,scale)
	
	colnames(hyperparameters)=names_hypers
	if(length(names_hypers)>1) names(hyper_est)=names_hypers  


	if(op$model %in% c("BayesB","SSVS")) BAout<-list(bhat=meanmu,ahat=meang, yhat=X0%*%meanmu+Z0%*%meang,
	                                                 prob=postprob_est,Wprob=Wpostprob_est,eff_sample=effectiveSize(hyperparameters),
	                                                 hypers=hyperparameters,idx=idx,hyper_est=hyper_est,train=vtrain,y=y0,map=map,win=win,Wchr=chr,GWA=GWA,model=op$model)
    else BAout<-list(bhat=meanmu,ahat=meang, yhat=X0%*%meanmu+Z0%*%meang,
                     bpvalue=bpvalue,Wprob=Wprob,eff_sample=effectiveSize(hyperparameters),hypers=hyperparameters,
                     idx=idx,hyper_est=hyper_est,train=vtrain,y=y0,map=map,win=win,Wchr=chr,GWA=GWA,model=op$model)
	
	class(BAout)="ba"
   	return(BAout)
}  # END OF FUNCTION

#######################################################################################
#######################################################################################
chenchunyu88/BATools documentation built on May 19, 2019, 8:21 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
chenchunyu88/BATools
BATools: An R package that extends 'Bayesian Alphabet' for Whole Genome Prediction and Genome-wide association study

R/BayesM.R
In chenchunyu88/BATools: BATools: An R package that extends 'Bayesian Alphabet' for Whole Genome Prediction and Genome-wide association study

R Package Documentation

Browse R Packages

We want your feedback!

chenchunyu88/BATools BATools: An R package that extends 'Bayesian Alphabet' for Whole Genome Prediction and Genome-wide association study

R/BayesM.R In chenchunyu88/BATools: BATools: An R package that extends 'Bayesian Alphabet' for Whole Genome Prediction and Genome-wide association study

R Package Documentation

Browse R Packages

We want your feedback!

chenchunyu88/BATools
BATools: An R package that extends 'Bayesian Alphabet' for Whole Genome Prediction and Genome-wide association study

R/BayesM.R
In chenchunyu88/BATools: BATools: An R package that extends 'Bayesian Alphabet' for Whole Genome Prediction and Genome-wide association study