R/sampling.r
In predmixcor: Classification rule based on Bayesian mixture models with feature selection bias corrected

Documented in bound countN gen.alpha.IGamma lf.phi lf.r prob.sch

##############################################################################

training <- function (
     data,alpha,p,
     iter,iters.phi,ratio.phi,iters.lblphi,
     phi.range,r.shape,r.rate,no.r.bank,
     correction,no.phi.cor,commonr,approxim,
     no.r1=50)
              
{   #preliminary
    #the possible value of ``r'' can take
    rspace <- gen.alpha.IGamma(no.r.bank,r.shape,r.rate) 
    rspace1 <- gen.alpha.IGamma(no.r1,r.shape,r.rate)
    oiter <- 1
    I1 <- I2 <- phi <-  matrix(0, iter + oiter, ncol(data))
    label <- matrix (0, nrow( data ), iter + oiter)
    N1 <- N2 <- rep(0, iter + oiter)

    #r is a matrix used to store the value of r and r1(for only response) 
    #in the paper
    r <- rep (0, iter + oiter)
    r1 <- r

   
    r1[1] <- r[1] <- 10
    phi[1,] <- runif( ncol(data),0.1,0.9 )
    
    label [, 1] <- sample( x = c(1, 2), size = nrow( data ), replace = TRUE )
    n.obs <- nrow( data )
    N1[1] <- sum ( label[, 1] == 1 )
    N2[1] <- n.obs - N1[1]
    I1[1,] <- apply ( data[label[, 1 ] == 1,,drop=FALSE ], 2, sum )
    I2[1,] <- apply ( data[label[, 1 ] == 2,,drop=FALSE ], 2, sum )
    
    record <- matrix(0, 1, 6) 
    if(correction){
        
         #calculating B_alpha, which records the bound of 
	 #n11(col 2 and col3) 
	 #for each
         #n01(col1)
         indexalpha <- bound(alpha,sum(data[, 1] == 0),
	                     sum(data[,1] == 1))
         
         prob.cor.sum.sch <- function(indexalpha,N,phi.range,r,n)
         {             
             #Searching from the past values of r   
             pp <- prob.sch(N = N,r = r,record = record)
             if ( pp == FALSE )
             {
                 pp <- prob.cor.sum(indexalpha = indexalpha,N = N,
                                    phi.range = phi.range, r = r, 
                                    n = n)            
                 record <<- rbind( record, c(r, pp) ) 
             }
             pp
         }
         #the function used to calculate the correction factor for 
	 #updating ``r'',
         #the output is like that of fr, a vector containing the 
	 #likelihood value at
         #each possible ``r'' in ``rspace''
         lf.r.md <- function(rspace,p,indexalpha,phi.range,N,n)
         {
             logp <- rep (0, length( rspace ) )
             for ( i in 1:length( rspace ) )
                logp [i] <- p * log( prob.cor.sum.sch( 
		                     indexalpha = indexalpha, 
                                     N = N, r = rspace[i], 
				     phi.range = phi.range,
                                     n = n))
             logp
         }
    } 
    if(commonr) ix.r <- -1 else ix.r <- 1:ncol(data)  
    ######################################################################
    #starting markov chain
    for (i in oiter+(1:iter))
    {
     	label[, i] <- label[, i - 1]
     	N1[i] <- N1[i - 1]
     	N2[i] <- N2[i - 1]
     	I1[i,] <- I1[i - 1, ]
     	I2[i,] <- I2[i - 1, ]
     	r[i] <- r[i-1]
     	r1[i] <- r1[i-1]
     	
	for(i_lbl in 1:iters.lblphi){
      		#updating phi 
      		for ( j in 1:ncol(data) )
      		{    if(j > 1) thisr <- r[i] else thisr <- r1[i]
        	       phi[i,j] <- mh1.unif( f = lf.phi, x0 = phi[i - 1, j], 
	                         iter = iters.phi,
                                 r = thisr, I1 = I1[i,j],I2 = I2[i,j], 
                                 N1 = N1[i], N2 = N2[i],x.range=phi.range,
                                 ratio = ratio.phi)
           
      	        }
      		#updating labels
      		for (nr in 1:nrow(data))
      		{
        		if( label[nr,i] == 1) {
             		N1[i] <- N1[i] - 1 
	     		I1[i,] <- I1[i,] - data[nr,] 
	 		}
         		else{ 
	     		N2[i] <- N2[i] - 1 
	     		I2[i,] <- I2[i,] - data[nr,] 
	 		}
         		theta2 <- theta1 <- rep(0,ncol(data))
      
         		theta1[-1] <- ( r[i] * phi[i,-1] + I1[i,-1] ) / ( r[i] + N1[i] )
         		theta2[-1] <- ( r[i] * phi[i,-1] + I2[i,-1] ) / ( r[i] + N2[i] )
         		theta1[1] <- (r1[i] * phi[i,1] + I1[i,1]) / (r1[i] + N1[i])
         		theta2[1] <- (r1[i] * phi[i,1] + I2[i,1]) / (r1[i] + N2[i])

         		#counting n0.c1,n1.c1,n0.c2,n1.c2
         		label[nr, i] <- 1
         		N <- countN( data[, 1], label[, i] )	 
         		if(!approxim & correction)
           		prob.z1 <- prob.cor.sum.sch( indexalpha = indexalpha, N = N, 
                        	            r = r[i], phi.range = phi.range,
                                	    n = no.phi.cor)
				    
         		else prob.z1 <- 1			    
                                 
         		log.prob1 <- sum(dbinom( data[nr, ],1,theta1,log = TRUE)) +
                        				     p * log( prob.z1 ) + 
			     				     log((N1[i] + 1)/(N1[i] + N2[i] + 2))

         		label[nr, i] <- 2
         		N <- countN( data[, 1], label[, i] )
	 		if(!approxim & correction)
           		prob.z2 <- prob.cor.sum.sch( indexalpha = indexalpha, N = N, 
                                            r = r[i], phi.range = phi.range,
                                            n = no.phi.cor)
         		else prob.z2 <- 1                             
         		log.prob2 <- sum(dbinom( data[nr, ], 1, theta2, log = TRUE )) +
                        			p * log( prob.z2 ) + 
		        			log( ( N2[i] + 1 ) / ( N1[i] + N2[i] + 2 ) )
				
         		probratio <- exp( log.prob2 - log.prob1 )
         		label[nr, i] <- 1 + as.numeric( runif(1) > 1/(1 + probratio) )

         		if( label[nr,i] == 1){
            	 		 N1[i] <- N1[i] + 1
	    	  		I1[i,] <- I1[i,] + data[nr,] 
	 		}
         		else { 
	   	  		N2[i] <- N2[i] + 1
	    	  		I2[i,] <- I2[i,] + data[nr,] 
			}
	 
      		}
        }
     	#updating r
     	N <- countN(data[, 1], label[, i])

     	log.prob.r <- lf.r(rspace,phi[i,ix.r],I1[i,ix.r],I2[i,ix.r], 
                        N1[i], N2[i]) 
     	if(correction) {
        	log.prob.r <- log.prob.r + 
        	              lf.r.md(rspace = rspace, p = p, 
	                      indexalpha = indexalpha,
                              N = N,phi.range = phi.range,
		              n = no.phi.cor) 
         
    	}
      
     	r[i] <- sample(rspace, 1, 
                    prob = exp(log.prob.r - LOG.sum.exp(log.prob.r)))
     	#updating r1
     	if(commonr) r1[i] <- r[i]
     	else{
             log.prob.r1 <- lf.r(rspace1,phi[i,1],I1[i,1],I2[i,1],N1[i],N2[i]) 
             r1[i] <- sample(rspace1,1,
	                 prob = exp(log.prob.r1 - LOG.sum.exp(log.prob.r1)))
        }
   }
   list(label=label,I1=I1,I2=I2,N1=N1,N2=N2,phi=phi,r=r,r1=r1,alpha_set= 
   rspace,alpha0_set=rspace1)
}

#############################################################################
#this function generates no.alpha alpha's from Inverse Gamma distribution 
#with shape and rate parameters, by taking the quantiles corresponding 
#to the the probabilities spaced equally from w to 1-w, 
#where w = 1/no.alpha/2
gen.alpha.IGamma <- function(no.alpha,shape,rate)
{    w <- 1/no.alpha/2
     1/qgamma(seq(1 - w, w, length = no.alpha),shape,rate)

}

#########################################################################
#this function is used to search whether the probability we want has been
#calculated previously to save time
prob.sch = function(N,r,record)
{
    N1 = N
    N2 = c(N[3:4],N[1:2])
    k = nrow(record)
    found = FALSE
    while ( (!found) & k >= 1 )
    {
        if ( all( record[k,1:4] == N1, r == record[k,5] )  ||
             all( record[k,1:4] == N2, r == record[k,5] )
           )
        {
            found <- record[k,6]
            break
        }
        else k <- k - 1
    }   
    found
}

##########################################################################
#The function for calculating the bound of n11 given n01
#n01 is stored in the first column of the output matrix, the corresponding
#starting and ending points of n11 are stored in the second and the 
#third column.
bound <- function(alpha,n0,n1)
{

   n <- n0+n1
   x.bar <- n1 / n
   x.std <- sqrt(x.bar^2*n0 + (1- x.bar)^2 * n1)
   #the function used to calculate the correlation for 
   #given n01 and n11, in
   #case that the bound determined by equation is possible to 
   #have an error of 1
   calcorl=function(n01,n11)
   {
      if(n01+n11 == 0 || n01+n11 == n)
      0
      else
      abs((-x.bar*n01+(1-x.bar)*n11) / 
          ( x.std*sqrt((n01+n11)-(n01+n11)^2/n) ) )

   }

   indexalpha=matrix(0,n0+1,3)
   indexalpha[,1]=0:n0
   #specially for n01=0
   indexalpha[1,2]=0
   indexalpha[1,3] <- min(
        floor( 1 / ( 1/n + ( (1-x.bar) / ( alpha*x.std ) )^2 ) ) , n1)

   for(n01 in 1:n0)
   {
    tmp.m=(1-x.bar)/n01
    tmp.v=(alpha*x.std/n01)^2
    upper=min(floor ( 1/( tmp.m+0.5*tmp.v -
            sqrt( (1/4*tmp.v+tmp.m-1/n)*tmp.v ) )-n01 + 1e-4 ),
              n1)

    indexalpha[n01+1,3] <- upper - ( calcorl(n01,upper) > alpha )

    lower=max(
                ceiling ( 1/( tmp.m+0.5*tmp.v +
                sqrt( (1/4*tmp.v+tmp.m-1/n)*tmp.v ) )-n01-1e-4)
            ,0)
    indexalpha[n01+1,2] <- lower + ( calcorl(n01,lower) > alpha )
   }

    indexalpha
}
#########################################################################
#This function counts n0.c1,n1.c1,n0.c2,n1.c2
countN <- function(data,label)
{
    c ( sum( (label == 1)*(data == 0) ),
        sum( (label == 1)*(data == 1) ),
        sum( (label == 2)*(data == 0) ),
        sum( (label == 2)*(data == 1) )
      )
}

#########################################################################
#the function used to calculate the log-likelidhood on each ``r'' 
#in ``rspace''
lf.r <- function(rspace,phi,I1,I2,N1,N2)
{
    logp <- rep(0,length(rspace))

    for( i in 1:length(rspace) )
       logp[i] <- sum(
            lgamma( rspace[i] * phi + I1 ) +
            lgamma( rspace[i] * phi + I2 ) +
            lgamma( rspace[i] * ( 1 - phi ) + N1 - I1 ) +
            lgamma( rspace[i] * ( 1 - phi ) + N2 - I2 ) -
            2 * lgamma( rspace[i] * phi ) -
            2 * lgamma( rspace[i] * (1-phi) )
            ) + length(phi) * ( 2 * lgamma( rspace[i] ) -
                lgamma( N1 + rspace[i] ) - lgamma( N2 + rspace[i] ) )
    logp
}
########################################################################
#defining the function of ``p'' for M-H sampling
lf.phi <- function( phi,r,I1,I2,N1,N2 )
{
    
    lgamma( r * phi + I1 ) +
    lgamma( r * phi + I2 ) +
    lgamma( r * (1-phi) + N1 - I1 ) +
    lgamma( r * (1-phi) + N2 - I2 ) -
    2 * lgamma( r * phi ) - 
    2 * lgamma( r * ( 1-phi ) )
}

Any scripts or data that you put into this service are public.

predmixcor documentation built on May 1, 2019, 11:18 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

predmixcor
Classification rule based on Bayesian mixture models with feature selection bias corrected

R/sampling.r
In predmixcor: Classification rule based on Bayesian mixture models with feature selection bias corrected

Defines functions gen.alpha.IGamma prob.sch bound countN lf.r lf.phi

Documented in bound countN gen.alpha.IGamma lf.phi lf.r prob.sch

Try the predmixcor package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

predmixcor Classification rule based on Bayesian mixture models with feature selection bias corrected

R/sampling.r In predmixcor: Classification rule based on Bayesian mixture models with feature selection bias corrected

Defines functions gen.alpha.IGamma prob.sch bound countN lf.r lf.phi

Documented in bound countN gen.alpha.IGamma lf.phi lf.r prob.sch

Try the predmixcor package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

predmixcor
Classification rule based on Bayesian mixture models with feature selection bias corrected

R/sampling.r
In predmixcor: Classification rule based on Bayesian mixture models with feature selection bias corrected