weierstrass: Combining posterior samples from multiple subsets.

Documented in logitTest

#' A test function provide logistic regression (require package "BayesLogit") for testing Weierstrass rejection sampling (and comparing to the fullset posterior/averaging/weighted averaging combiners).
#'
#' @param n The total sample size. Default is 10000.
#' @param samp The posterior sample size on each subset. Default is 20000.
#' @param p The number of predictors (intercept is always included, so the total number of coefficients will be p+1). Default is 5.
#' @param m Number of subsets. Default is 20.
#' @param r The correlation between predictors. Default is 0.3.
#' @param draw Indicate whether the result should be plotted.
#' @param accept The acceptance rate for the Weierstrass rejection sampling.
#'
#' @return A list containing several components.
#' \enumerate{
#' \item Samples: a list containing all subset posterior samples. Input for weierstrass rejection samping.
#' \item true.posterior: a matrix containing posterior samples drawn with full data set.
#' \item CombSample.weight: a matrix containing combined samples generated by the Weierstrass rejection sampler
#' \item CombSample.unweight: a matrix containing combined samples generated by the unweighted Weierstrass rejection sampler
#' \item weight.ave: a matrix containing combined samples via inverse-variance weighted averaging.
#' \item ave: a matrix containing combined samples via simple averaging.
#' }
#'
#' @details The function generates data from a logistic regression and make use of Weierstrass rejection sampler, weighted average and averge to combine the subset posterior samples. The coefficients of the model is generated by the following formula
#'\deqn{\beta_i \sim (-1)^{ber(0.6)}|N(0,4)|}
#' Both the weighted and unweighted weierstrass sampler will be used, and the results are saved for further processing.
#'
#' @seealso \code{\link{weierstrass}} for the details of weierstrass rejection sampling. \code{\link{BinTest}} for another test on the binomial data.
#' 
#' @examples
#' \dontrun{logitTest()}
#' \dontrun{logitTest(n = 20000, p = 10, m = 50, r = 0.4)}
#' 
logitTest <- function(n = 20000, samp = 20000, p = 5, m = 20, r = 0.3, draw = TRUE, accept = 0.1){
    require('BayesLogit')

    #generate the data following logistic regression
    V = n/m
    beta = c(2*((runif(p)>0.6)-1/2)*(abs(rnorm(n = p,mean = 0,sd=2))))
    beta = c(1,beta)

    X = matrix(rnorm(n*p),n,p)
    z = rnorm(n)
    X = X+r*outer(z,rep(1,p))
    X = cbind(rep(1,n),X)

    Y1 = -X%*%beta
    Y = 1/(1+exp(Y1))>runif(n)

   
    logit<-BayesLogit::logit
    
    #obtain the subset posterior
    cat('\n Starting sampling subset posteriors\n\n')
    Samples<-list()
    for(i in 1:m){
        Samples[[i]] = logit(Y[(V*(i-1)+1):(V*i)],X[(V*(i-1)+1):(V*i),],samp = samp,burn = samp/2, m0 = rep(0,p+1), P0 = diag(rep(0.01,p+1))/m)$beta
        cat('subset',toString(i),'completed.\n')
    }

    #obtain the real posterior by BayesLogit
    cat('\n Starting sampling fullset posteriors\n\n')
    true.posterior = logit(Y,X,samp = samp, burn = samp/2, m0 = rep(0,p+1),P0 = diag(0.01,p+1,p+1))$beta
    
    #combine via (weighted) averaging
    ave1 = ave2 = matrix(0, samp, p+1)
    icov = list()
    bigc = matrix(0,p+1,p+1)
    for(i in 1:m){
        icov[[i]] = solve(cov(Samples[[i]]))
        bigc = bigc + icov[[i]]
    }
    for(i in 1:m){
        ave1 = ave1 + Samples[[i]]%*%icov[[i]]%*%solve(bigc)
        ave2 = ave2 + Samples[[i]]
    }
    ave2 = ave2/m

    #combine via Weierstrass rejection sampler
    cat(paste('starting the weierstrass rejection sampling, acceptance rate is',toString(accept),'\n\n'))
    CombSample1 = weierstrass(Samples, accept = accept)
    
    cat('completed.\n\n')
    cat(paste('starting the unweighted weierstrass rejection sampling, acceptance rate is',toString(accept),'\n\n'))
    CombSample2 = weierstrass(Samples, weight = FALSE, accept = accept)
    cat('completed.\n\n')

    #plot the result
    if(draw == TRUE){
        rows = floor(sqrt(p+1))
        cols = ceiling((p+1)/rows)
        par(mfrow = c(rows, cols))
        for(i in 1:(p+1)){
            plot(density(true.posterior[,i]),col='red',main = paste('Posterior for beta',toString(i)))
            lines(density(CombSample1[,i]),col = 'blue')
            lines(density(CombSample2[,i]),col = 'yellow',lty = 2)
            lines(density(ave1[,i]),col = 'green')
            lines(density(ave2[,i]),col = 'pink')
            abline(v = beta[i], col = 'black',lty = 2)
            legend('topleft',c('Fullset','Weierstrass', 'Unweight weiers' ,'Weighted ave','average','true value'),col = c('red','blue','yellow','green','pink','black'),lty = c(1,1,2,1,1,2))
        }
    }
    return(list(Samples = Samples, true.posterior = true.posterior, CombSample.weight = CombSample1, CombSample.unweight = CombSample2, weight.ave = ave1, ave = ave2))
}