R/drawMixture.R
In scalablebayesm: Distributed Markov Chain Monte Carlo for Bayesian Inference in Marketing

Documented in drawMixture

#' Gibbs Sampler Inference for a Mixture of Multivariate Normals
#' 
#' \code{drawMixture} implements a Gibbs sampler to conduct inference on draws from a multivariate normal mixture.
#' 
#' @usage drawMixture(out, N, Z, Prior, Mcmc, verbose)
#' 
#' @param out A list containing compdraw, probdraw, and (optionally) Deltadraw.
#' @param N An integer specifying the number of observational units to sample
#' @param Z An \eqn{(nreg) \times nz} or \eqn{(nlgt) \times nz} matrix of unit characteristics 
#' @param Prior A list with one required parameter: `ncomp`, and optional parameters: `mubar`, `Amu`, `nu`, `V`, `Ad`, `deltaBar`, and `a`.
#' @param Mcmc A list with one required parameter: 'R' - number of iterations, and optional parameters: `s`, `w`, `keep`, `nprint`, and `drawcomp`.
#' @param verbose If \code{TRUE}, enumerates model parameters and timing information.
#'
#' @return A list containing the following elements:
#' \itemize{
#'   \item \strong{nmix}: A list with the following components:
#'   \itemize{
#'     \item \strong{probdraw}: A matrix of size \code{(R/keep) x (ncomp)}, containing the probability draws at each Gibbs iteration.
#'     \item \strong{compdraw}: A list containing the drawn mixture components at each Gibbs iteration.
#'     }
#'   \item \strong{Deltadraw} (optional): A matrix of size \code{(R/keep) x (nz * nvar)}, containing the delta draws, if \code{Z} is not \code{NULL}. If \code{Z} is \code{NULL}, this element is not included.
#' }
#'
#' @references
#' Bumbaca, Federico (Rico), Sanjog Misra, and Peter E. Rossi (2020), "Scalable Target Marketing:
#' Distributed Markov Chain Monte Carlo for Bayesian Hierarchical Models", Journal of Marketing
#' Research, 57(6), 999-1018.
#' 
#'
#' Chapter 5, \emph{Bayesian Statistics and Marketing} by Rossi, Allenby, and McCulloch.
#'
#' @author
#' Federico Bumbaca, Leeds School of Business, University of Colorado Boulder, \email{federico.bumbaca@colorado.edu}
#' 
#' @seealso 
#' \code{\link{rhierLinearDPParallel}}, 
#' \code{\link{rhierMnlDPParallel}}, 
#' 
#' @examples
#' 
#' \donttest{
#' 
#' # Linear DP
#'## Generate single component linear data with Z
#'R = 1000
#'nreg = 1000
#'nobs = 5 #number of observations
#'nvar = 3 #columns
#'nz = 2
#'
#'Z=matrix(runif(nreg*nz),ncol=nz) 
#'Z=t(t(Z)-apply(Z,2,mean))
#'Delta=matrix(c(1,0,1,0,1,2),ncol=nz) 
#'tau0=1
#'iota=c(rep(1,nobs)) 
#'
#'## create arguments for rmixture
#'tcomps=NULL
#'a = diag(1, nrow=3)
#'tcomps[[1]] = list(mu=c(1,-2,0),rooti=a) 
#'tpvec = 1                            
#'ncomp=length(tcomps)
#'
#'regdata=NULL
#'betas=matrix(double(nreg*nvar),ncol=nvar) 
#'tind=double(nreg) 
#'
#'for (reg in 1:nreg) { 
#'  tempout=bayesm::rmixture(1,tpvec,tcomps)
#'  if (is.null(Z)){
#'    betas[reg,]= as.vector(tempout$x)  
#'  }else{
#'    betas[reg,]=Delta%*%Z[reg,]+as.vector(tempout$x)} 
#'  tind[reg]=tempout$z
#'  X=cbind(iota,matrix(runif(nobs*(nvar-1)),ncol=(nvar-1))) 
#'  tau=tau0*runif(1,min=0.5,max=1) 
#'  y=X%*%betas[reg,]+sqrt(tau)*rnorm(nobs)
#'  regdata[[reg]]=list(y=y,X=X,beta=betas[reg,],tau=tau) 
#'}
#'
#'Prior1=list(ncomp=ncomp) 
#'keep=1
#'Mcmc1=list(R=R,keep=keep)
#'Data1=list(list(regdata=regdata,Z=Z))
#'
#'#subsample data
#'N = length(Data1[[1]]$regdata)
#'
#'s=1
#'
#'#Partition data into s shards
#'Data2 = partition_data(Data = Data1, s = s)
#'
#'#Run distributed first stage
#'timing_result1 = system.time({
#'  out_distributed = parallel::mclapply(Data2, FUN = rhierLinearDPParallel, 
#'  Prior = Prior1, Mcmc = Mcmc1, mc.cores = s, mc.set.seed = FALSE)
#'})
#'
#'Z = matrix(unlist(Z), ncol = nz, byrow = TRUE)
#'
#'# Conduct inference on first-stage draws
#'draws = parallel::mclapply(out_distributed, FUN = drawMixture, 
#'Prior=Prior1, Mcmc=Mcmc1, N=N, Z = Z,
#'                           mc.cores = s, mc.set.seed = FALSE) 
#'
#' #Generate single component multinomial data with Z
#'##parameters
#'R = 1000
#'p = 3 # number of choice alternatives                            
#'ncoef = 3
#'nlgt=1000
#'nz = 2
#'
#'# Define Z matrix
#'Z = matrix(runif(nz*nlgt),ncol=nz)
#'Z = t(t(Z)-apply(Z,2,mean))          # demean Z
#'Delta=matrix(c(1,0,1,0,1,2),ncol=2)
#'
#'tcomps=NULL
#'a = diag(1, nrow=3)
#'tcomps[[1]] = list(mu=c(-1,2,4),rooti=a) 
#'tpvec = 1                             
#'ncomp=length(tcomps)
#'
#'simmnlwX= function(n,X,beta){
#'  k=length(beta)
#'  Xbeta=X %*% beta
#'  j=nrow(Xbeta)/n
#'  Xbeta=matrix(Xbeta,byrow=TRUE,ncol=j)
#'  Prob=exp(Xbeta)
#'  iota=c(rep(1,j))
#'  denom=Prob %*% iota
#'  Prob=Prob/as.vector(denom)
#'  y=vector("double",n)
#'  ind=1:j
#'  for (i in 1:n) { 
#'    yvec = rmultinom(1, 1, Prob[i,])
#'    y[i] = ind%*%yvec
#'  }
#'  return(list(y=y,X=X,beta=beta,prob=Prob))
#'}
#'
#'## simulate data
#'simlgtdata=NULL
#'ni=rep(5,nlgt) 
#'for (i in 1:nlgt) 
#'{
#'  if (is.null(Z))
#'  {
#'    betai=as.vector(bayesm::rmixture(1,tpvec,tcomps)$x)
#'  } else {
#'    betai=Delta %*% Z[i,]+as.vector(bayesm::rmixture(1,tpvec,tcomps)$x)
#'  }
#'  Xa=matrix(runif(ni[i]*p,min=-1.5,max=0),ncol=p)
#'  X=bayesm::createX(p,na=1,nd=NULL,Xa=Xa,Xd=NULL,base=1)
#'  outa=simmnlwX(ni[i],X,betai)
#'  simlgtdata[[i]]=list(y=outa$y,X=X,beta=betai)
#'}
#'
#'## set parms for priors and Z
#'Prior1=list(ncomp=ncomp) 
#'keep=1
#'Mcmc1=list(R=R,keep=keep) 
#'Data1=list(list(lgtdata=simlgtdata, p=p, Z=Z))
#'
#'N = length(Data1[[1]]$lgtdata)
#'
#'s=1
#'
#'#Partition data into s shards
#'Data2 = partition_data(Data = Data1, s = s)
#'
#'#Run distributed first stage
#'timing_result1 = system.time({
#'  out_distributed = parallel::mclapply(Data2, FUN = rhierMnlDPParallel, 
#'  Prior = Prior1, Mcmc = Mcmc1, mc.cores = s, mc.set.seed = FALSE)
#'})
#'
#' #Conduct inference on first-stage draws
#'draws = parallel::mclapply(out_distributed, FUN = drawMixture, 
#'Prior=Prior1, Mcmc=Mcmc1, N=N, Z = Z, mc.cores = s, mc.set.seed = FALSE) 
#'
#' }
#' 
#' 
#' @export

drawMixture = function(out,
                        N, #Needs to be less than/equal length of data
                        Z=NULL, #optional, must have N rows
                        Prior,
                        Mcmc,
                        verbose = FALSE)
{
  #
  # Revision History: 
  #   P. Rossi 3/05
  #   add check to see if Mubar is a vector  9/05
  #   fixed bug in saving comps draw comps[[mkeep]]=  9/05
  #   fixed so that ncomp can be =1; added check that nobs >= 2*ncomp   12/06
  #   3/07 added classes
  #   added log-likelihood  9/08
  #   W. Taylor 4/15 - added nprint option to MCMC argument
  #
  # purpose: do Gibbs sampling inference for a mixture of multivariate normals
  #
  # arguments:
  #     Data is a list of y which is an n x k matrix of data -- each row
  #       is an iid draw from the normal mixture
  #     Prior is a list of (Mubar,A,nu,V,a,ncomp)
  #       ncomp is required
  #       if elements of the prior don't exist, defaults are assumed
  #     Mcmc is a list of R, keep (thinning parameter), and nprint
  # Output:
  #     list with elements
  #     pdraw -- R/keep x ncomp array of mixture prob draws
  #     zdraw -- R/keep x nobs array of indicators of mixture comp identity for each obs
  #     compsdraw -- list of R/keep lists of lists of comp parm draws
  #        e.g. compsdraw[[i]] is ith draw -- list of ncomp lists
  #             compsdraw[[i]][[j]] is list of parms for jth normal component
  #             if jcomp=compsdraw[[i]][j]]
  #                        ~N(jcomp[[1]],Sigma), Sigma = t(R)%*%R, R^{-1} = jcomp[[2]]
  #
  # Model:
  #        y_i ~ N(mu_ind,Sigma_ind)
  #        ind ~ iid multinomial(p)  p is a 1x ncomp vector of probs
  # Priors:
  #        mu_j ~ N(mubar,Sigma (x) A^-1)
  #        mubar=vec(Mubar)
  #        Sigma_j ~ IW(nu,V)
  #        note: this is the natural conjugate prior -- a special case of multivariate 
  #              regression
  #        p ~ Dirchlet(a)
  #
  #  check arguments
  #
  #
  # -----------------------------------------------------------------------------------------
  llnmix=function(Y,z,comps){
    #
    # evaluate likelihood for mixture of normals
    #
    zu=unique(z)
    ll=0.0
    for(i in 1:length(zu)){
      Ysel=Y[z==zu[i],,drop=FALSE]
      ll=ll+sum(apply(Ysel,1,lndMvn,mu=comps[[zu[i]]]$mu,rooti=comps[[zu[i]]]$rooti))
    }
    return(ll)
  }
  # -----------------------------------------------------------------------------------------
  #  if(missing(Data)) {message("Requires Data argument -- list of y")}
  #  if(is.null(Data$y)) {message("Requires Data element y")}
  #  y=Data$y
  #
  # check data for validity
  #
  
  PredCompDraws = out$compdraw
  
  if(is.null(out$Deltadraw)){
    PredDeltaDraws = matrix(0, nrow = 1, ncol = 1)
  } else {
    PredDeltaDraws = out$Deltadraw 
  }
  nvar = length(PredCompDraws[[1]][[1]]$mu)
  
  drawdelta=TRUE
  if(is.null(Z)) {
    if(verbose){message("Z not specified")}; drawdelta=FALSE; Z = matrix(0, nrow = 1, ncol = 1)
  } else {
    nz = ncol(Z)
    Z = matrix(Z, ncol = nz, byrow = TRUE)
  }
  
  # check for Prior
  #
  
  # Want to adjust s.th. Prior can be NULL; let parms take default values
  if(missing(Prior)) {message("requires Prior argument ")} else{
    if(is.null(Prior$ncomp)) {stop("requires number of mix comps -- Prior$ncomp")}
    else {ncomp=Prior$ncomp}
    if(is.null(Prior$mubar)) {mubar=matrix(rep(0,nvar),nrow=1)} 
    else {mubar=Prior$mubar; if(is.vector(mubar)) {mubar=matrix(mubar,nrow=1)}}
    if(is.null(Prior$Amu)) {Amu=matrix(ScalableBayesmConstant.A,ncol=1)} 
    else {Amu=Prior$Amu}
    if(is.null(Prior$nu)) {nu=nvar+ScalableBayesmConstant.nuInc} 
    else {nu=Prior$nu}
    if(is.null(Prior$V)) {V=nu*diag(nvar)} 
    else {V=Prior$V}
    if(is.null(Prior$Ad) & drawdelta) {Ad=ScalableBayesmConstant.A*diag(nvar*nz)} else {Ad=Prior$Ad}
    if(drawdelta) {if(ncol(Ad) != nvar*nz | nrow(Ad) != nvar*nz) {message("Ad must be nvar*nz x nvar*nz")}}
    if(is.null(Prior$deltabar)& drawdelta) {deltabar=rep(0,nz*nvar)} else {deltabar=Prior$deltabar}
    if(drawdelta) {if(length(deltabar) != nz*nvar) {message("deltabar must be of length nvar*nz")}}
    if(is.null(Prior$a)) {a=c(rep(ScalableBayesmConstant.a,ncomp))}
    else {a=Prior$a}
  }
  
  #
  # check for adequate no. of observations
  #
  #  if(nobs<2*ncomp)
  #  {message("too few obs, nobs should be >= 2*ncomp")}
  #
  # check dimensions of Priors
  #
  if(ncol(Amu) != nrow(Amu) || ncol(Amu) != 1)
  {message(paste("bad dimensions for Amu",dim(Amu)))}
  if(!is.matrix(mubar))
  {message("mubar must be a matrix")}
  if(nrow(mubar) != 1 || ncol(mubar) != nvar) 
  {message(paste("bad dimensions for mubar",dim(mubar)))}
  if(ncol(V) != nrow(V) || ncol(V) != nvar)
  {message(paste("bad dimensions for V",dim(V)))}
  if(length(a) != ncomp)
  {message(paste("'a' wrong length, length= ",length(a)))}
  bada=FALSE
  for(i in 1:ncomp){if(a[i] < 0) bada=TRUE}
  if(bada) message("invalid values in 'a' vector")
  #
  # check MCMC argument
  #
  
  if(missing(Mcmc)) {stop("requires Mcmc argument")}else{
    if(is.null(Mcmc$R)) 
    {message("requires Mcmc element R")} else {R=Mcmc$R}
    if(is.null(Mcmc$keep)) {keep=ScalableBayesmConstant.keep} else {keep=Mcmc$keep}
    if(is.null(Mcmc$nprint)) {nprint=ScalableBayesmConstant.nprint} else {nprint=Mcmc$nprint}
    if(nprint<0) {message('nprint must be an integer greater than or equal to 0')}
    if(is.null(Mcmc$LogLike)) {LogLike=FALSE} else {LogLike=Mcmc$LogLike}
    if(!is.null(Mcmc$olddelta)) {olddelta=Mcmc$olddelta} else {olddelta = rep(0,nz*nvar)}
  }
  
  if(verbose){
  #
  # Print out the problem
  #
  cat("Starting Gibbs Sampler for Mixture of Normals")
  cat(paste(N, "observations on", nvar, "dimensional data"))
  cat(paste("Using", ncomp, "mixture components"))
  cat(" ")
  
  cat("Prior Parameters:")
  cat("mu_j ~ N(mubar, Sigma (x) A^-1)")
  cat("mubar = ")
  print(mubar)  # Use print() for matrices or vectors
  cat("Precision parameter for prior variance of mu vectors (A) = ", Amu)
  cat("Sigma_j ~ IW(nu, V) nu = ", nu)
  cat("V = ")
  print(V)  # Use print() for matrices or vectors
  cat("Dirichlet parameters")
  print(a)  # Use print() for matrices or vectors
  
  if (drawdelta) {
    cat("deltabar")
    print(deltabar)  # Use print() for matrices or vectors
    cat("Ad")
    print(Ad)  # Use print() for matrices or vectors
  }
  
  cat(" ")
  cat(paste("MCMC Parameters: R =", R, "keep =", keep, "nprint =", nprint, "LogLike =", LogLike))
  }
  compsd=list()
  if(LogLike) ll=double(floor(R/keep))
  
  #
  #initialize delta
  #
  if (!drawdelta){
    olddelta = 0
    Z = matrix(0)
    deltabar = 0
    Ad = matrix(0)
  }
  
  #
  # set initial values of ind
  #
  ind=rep(c(1:ncomp),(floor(N/ncomp)+1))
  ind=ind[1:N]
  if(verbose){
  cat(" ",fill=TRUE)
  cat("starting value for ind",fill=TRUE)
  cat(table(ind))
  cat(" ",fill=TRUE)}
  oldprob=c(rep(1,ncomp))/ncomp # note this is not used
  #fsh()
  
  #Wayne Taylor 8/18/14#####################################################
  out = drawMixture_rcpp_loop(predcompdraws = PredCompDraws, preddeltadraws = PredDeltaDraws,
                              Z = Z, drawdelta = drawdelta, 
                              olddelta = as.matrix(olddelta), deltabar = deltabar, Ad = Ad,
                              mubar = mubar, Amu = Amu, nu = nu, 
                              V = V, a = a, oldprob = oldprob, ind = ind, R = R,
                              keep = keep, nprint = nprint, N = N, verbose = verbose);
  ##########################################################################
  
  attributes(out$nmix)$class="bayesm.nmix"

  #return(list(probdraw=out$probdraw, zdraw=NULL, compdraw=out$compdraw, 
  #            Deltadraw=out$Deltadraw))
  return(out)
}
Any scripts or data that you put into this service are public.
scalablebayesm documentation built on April 3, 2025, 7:55 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
scalablebayesm
Distributed Markov Chain Monte Carlo for Bayesian Inference in Marketing

R/drawMixture.R
In scalablebayesm: Distributed Markov Chain Monte Carlo for Bayesian Inference in Marketing

Defines functions drawMixture

Documented in drawMixture

Try the scalablebayesm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

scalablebayesm Distributed Markov Chain Monte Carlo for Bayesian Inference in Marketing

R/drawMixture.R In scalablebayesm: Distributed Markov Chain Monte Carlo for Bayesian Inference in Marketing

Defines functions drawMixture

Documented in drawMixture

Try the scalablebayesm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

scalablebayesm
Distributed Markov Chain Monte Carlo for Bayesian Inference in Marketing

R/drawMixture.R
In scalablebayesm: Distributed Markov Chain Monte Carlo for Bayesian Inference in Marketing