drawMixture | R Documentation |
drawMixture
implements a Gibbs sampler to conduct inference on draws from a multivariate normal mixture.
drawMixture(out, N, Z, Prior, Mcmc, verbose)
out |
A list containing compdraw, probdraw, and (optionally) Deltadraw. |
N |
An integer specifying the number of observational units to sample |
Z |
An |
Prior |
A list with one required parameter: 'ncomp', and optional parameters: 'mubar', 'Amu', 'nu', 'V', 'Ad', 'deltaBar', and 'a'. |
Mcmc |
A list with one required parameter: 'R' - number of iterations, and optional parameters: 's', 'w', 'keep', 'nprint', and 'drawcomp'. |
verbose |
If |
A list containing the following elements:
nmix: A list with the following components:
probdraw: A matrix of size (R/keep) x (ncomp)
, containing the probability draws at each Gibbs iteration.
compdraw: A list containing the drawn mixture components at each Gibbs iteration.
Deltadraw (optional): A matrix of size (R/keep) x (nz * nvar)
, containing the delta draws, if Z
is not NULL
. If Z
is NULL
, this element is not included.
Federico Bumbaca, Leeds School of Business, University of Colorado Boulder, federico.bumbaca@colorado.edu
Bumbaca, Federico (Rico), Sanjog Misra, and Peter E. Rossi (2020), "Scalable Target Marketing: Distributed Markov Chain Monte Carlo for Bayesian Hierarchical Models", Journal of Marketing Research, 57(6), 999-1018.
Chapter 5, Bayesian Statistics and Marketing by Rossi, Allenby, and McCulloch.
rhierLinearDPParallel
,
rhierMnlDPParallel
,
# Linear DP
## Generate single component linear data with Z
R = 1000
nreg = 1000
nobs = 5 #number of observations
nvar = 3 #columns
nz = 2
Z=matrix(runif(nreg*nz),ncol=nz)
Z=t(t(Z)-apply(Z,2,mean))
Delta=matrix(c(1,0,1,0,1,2),ncol=nz)
tau0=1
iota=c(rep(1,nobs))
## create arguments for rmixture
tcomps=NULL
a = diag(1, nrow=3)
tcomps[[1]] = list(mu=c(1,-2,0),rooti=a)
tpvec = 1
ncomp=length(tcomps)
regdata=NULL
betas=matrix(double(nreg*nvar),ncol=nvar)
tind=double(nreg)
for (reg in 1:nreg) {
tempout=bayesm::rmixture(1,tpvec,tcomps)
if (is.null(Z)){
betas[reg,]= as.vector(tempout$x)
}else{
betas[reg,]=Delta%*%Z[reg,]+as.vector(tempout$x)}
tind[reg]=tempout$z
X=cbind(iota,matrix(runif(nobs*(nvar-1)),ncol=(nvar-1)))
tau=tau0*runif(1,min=0.5,max=1)
y=X%*%betas[reg,]+sqrt(tau)*rnorm(nobs)
regdata[[reg]]=list(y=y,X=X,beta=betas[reg,],tau=tau)
}
Prior1=list(ncomp=ncomp)
keep=1
Mcmc1=list(R=R,keep=keep)
Data1=list(list(regdata=regdata,Z=Z))
#subsample data
N = length(Data1[[1]]$regdata)
s=1
#Partition data into s shards
Data2 = partition_data(Data = Data1, s = s)
#Run distributed first stage
timing_result1 = system.time({
out_distributed = parallel::mclapply(Data2, FUN = rhierLinearDPParallel,
Prior = Prior1, Mcmc = Mcmc1, mc.cores = s, mc.set.seed = FALSE)
})
Z = matrix(unlist(Z), ncol = nz, byrow = TRUE)
# Conduct inference on first-stage draws
draws = parallel::mclapply(out_distributed, FUN = drawMixture,
Prior=Prior1, Mcmc=Mcmc1, N=N, Z = Z,
mc.cores = s, mc.set.seed = FALSE)
#Generate single component multinomial data with Z
##parameters
R = 1000
p = 3 # number of choice alternatives
ncoef = 3
nlgt=1000
nz = 2
# Define Z matrix
Z = matrix(runif(nz*nlgt),ncol=nz)
Z = t(t(Z)-apply(Z,2,mean)) # demean Z
Delta=matrix(c(1,0,1,0,1,2),ncol=2)
tcomps=NULL
a = diag(1, nrow=3)
tcomps[[1]] = list(mu=c(-1,2,4),rooti=a)
tpvec = 1
ncomp=length(tcomps)
simmnlwX= function(n,X,beta){
k=length(beta)
Xbeta=X %*% beta
j=nrow(Xbeta)/n
Xbeta=matrix(Xbeta,byrow=TRUE,ncol=j)
Prob=exp(Xbeta)
iota=c(rep(1,j))
denom=Prob %*% iota
Prob=Prob/as.vector(denom)
y=vector("double",n)
ind=1:j
for (i in 1:n) {
yvec = rmultinom(1, 1, Prob[i,])
y[i] = ind%*%yvec
}
return(list(y=y,X=X,beta=beta,prob=Prob))
}
## simulate data
simlgtdata=NULL
ni=rep(5,nlgt)
for (i in 1:nlgt)
{
if (is.null(Z))
{
betai=as.vector(bayesm::rmixture(1,tpvec,tcomps)$x)
} else {
betai=Delta %*% Z[i,]+as.vector(bayesm::rmixture(1,tpvec,tcomps)$x)
}
Xa=matrix(runif(ni[i]*p,min=-1.5,max=0),ncol=p)
X=bayesm::createX(p,na=1,nd=NULL,Xa=Xa,Xd=NULL,base=1)
outa=simmnlwX(ni[i],X,betai)
simlgtdata[[i]]=list(y=outa$y,X=X,beta=betai)
}
## set parms for priors and Z
Prior1=list(ncomp=ncomp)
keep=1
Mcmc1=list(R=R,keep=keep)
Data1=list(list(lgtdata=simlgtdata, p=p, Z=Z))
N = length(Data1[[1]]$lgtdata)
s=1
#Partition data into s shards
Data2 = partition_data(Data = Data1, s = s)
#Run distributed first stage
timing_result1 = system.time({
out_distributed = parallel::mclapply(Data2, FUN = rhierMnlDPParallel,
Prior = Prior1, Mcmc = Mcmc1, mc.cores = s, mc.set.seed = FALSE)
})
#Conduct inference on first-stage draws
draws = parallel::mclapply(out_distributed, FUN = drawMixture,
Prior=Prior1, Mcmc=Mcmc1, N=N, Z = Z, mc.cores = s, mc.set.seed = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.