R/rhierLinearMixtureParallel.R

Defines functions rhierLinearMixtureParallel

Documented in rhierLinearMixtureParallel

# -*- coding: UTF-8 -*-
#' MCMC Algorithm for Hierarchical Multinomial Linear Model with Mixture-of-Normals Heterogeneity
#'
#' \code{rhierLinearMixtureParallel} implements a MCMC algorithm for hierarchical linear model with a mixture of normals heterogeneity distribution.
#'
#' @usage rhierLinearMixtureParallel(Data, Prior, Mcmc, verbose = FALSE)
#'
#' @param Data A list containing: `regdata` - A \eqn{nreg} size list of multinomial regdata, and optional `Z`- \eqn{nreg \times nz} matrix of unit characteristics.
#' @param Prior A list with one required parameter: `ncomp`, and optional parameters: `deltabar`, `Ad`, `mubar`, `Amu`, `nu`, `V`, `nu.e`, and `ssq`.
#' @param Mcmc A list with one required parameter: `R`-number of iterations, and optional parameters: `keep` and `nprint`.
#' @param verbose If \code{TRUE}, enumerates model parameters and timing information.
#'
#' @details
#' \subsection{Model and Priors}{
#'     \code{nreg} regression equations with \code{nvar} as the number of \eqn{X} vars in each equation \cr
#'     \eqn{y_i = X_i\beta_i + e_i} with \eqn{e_i} \eqn{\sim}{~} \eqn{N(0, \tau_i)}  
#'
#'     \eqn{\tau_i} \eqn{\sim}{~} \eqn{nu.e*ssq_i/\chi^2_{nu.e}} where  \eqn{\tau_i} is the variance of \eqn{e_i}\cr
#'     \eqn{B = Z\Delta + U} or \eqn{\beta_i = \Delta' Z[i,]' + u_i} \cr
#'     \eqn{\Delta} is an \eqn{nz \times nvar} matrix \cr
#'
#'     \eqn{Z} should \emph{not} include an intercept and should be centered for ease of interpretation. 
#'     The mean of each of the \code{nreg} \eqn{\beta}s is the mean of the normal mixture. 
#'     Use \code{summary()} to compute this mean from the \code{compdraw} output.
#'
#'     \eqn{u_i} \eqn{\sim}{~} \eqn{N(\mu_{ind}, \Sigma_{ind})}\cr
#'     \eqn{ind} \eqn{\sim}{~} \eqn{multinomial(pvec)} \cr
#'
#'     \eqn{pvec} \eqn{\sim}{~} \eqn{dirichlet(a)}\cr
#'     \eqn{delta = vec(\Delta)} \eqn{\sim}{~} \eqn{N(deltabar, A_d^{-1})}\cr
#'     \eqn{\mu_j} \eqn{\sim}{~} \eqn{N(mubar, \Sigma_j(x) Amu^{-1})}\cr
#'     \eqn{\Sigma_j} \eqn{\sim}{~} \eqn{IW(nu, V)} \cr
#'    
#'     Be careful in assessing the prior parameter \code{Amu}: 0.01 can be too small for some applications. 
#'     See chapter 5 of Rossi et al for full discussion.\cr    
#' }
#'
#' \subsection{Argument Details}{
#' \emph{\code{Data  = list(regdata, Z)} [\code{Z} optional]}
#' \tabular{ll}{
#'     \code{regdata:        } \tab A \eqn{nreg} size list of regdata \cr
#'     \code{regdata[[i]]$X: } \tab \eqn{n_i \times nvar} design matrix for equation \eqn{i}  \cr
#'     \code{regdata[[i]]$y: } \tab \eqn{n_i \times 1} vector of observations for equation \eqn{i} \cr
#'     \code{Z:              } \tab An \eqn{(nreg) \times nz} matrix of unit characteristics 
#'     }
#'  
#' \emph{\code{Prior = list(deltabar, Ad, mubar, Amu, nu.e, V, ssq, ncomp)} [all but \code{ncomp} are optional]}
#' \tabular{ll}{
#'     \code{deltabar:       } \tab \eqn{(nz \times nvar) \times 1} vector of prior means (def: 0) \cr
#'     \code{Ad:             } \tab prior precision matrix for vec(D) (def: 0.01*I) \cr
#'     \code{mubar:          } \tab \eqn{nvar \times 1} prior mean vector for normal component mean (def: 0) \cr
#'     \code{Amu:            } \tab prior precision for normal component mean (def: 0.01) \cr
#'     \code{nu.e:             } \tab d.f. parameter for regression error variance prior (def: 3) \cr
#'     \code{V:              } \tab PDS location parameter for IW prior on normal component Sigma (def: nu*I) \cr
#'     \code{ssq:            } \tab scale parameter for regression error variance prior (def: \code{var(y_i)}) \cr
#'     \code{ncomp:          } \tab number of components used in normal mixture \cr
#'     }
#'  
#' \emph{\code{Mcmc  = list(R, keep, nprint)} [only \code{R} required]}
#' \tabular{ll}{
#'     \code{R:              } \tab number of MCMC draws \cr
#'     \code{keep:           } \tab MCMC thinning parameter -- keep every \code{keep}th draw (def: 1) \cr
#'     \code{nprint:         } \tab print the estimated time remaining for every \code{nprint}'th draw (def: 100, set to 0 for no print) \cr
#'     } 
#' }
#'
#' @return A list of sharded partitions where each partition contains the following: 
#' \item{compdraw}{A list (length: R/keep) where each list contains `mu` (vector, length: `ncomp`) and `rooti` (matrix, shape: ncomp \eqn{\times} ncomp)}
#' \item{probdraw}{A \eqn{(R/keep) \times (ncomp)} matrix that reports the probability that each draw came from a particular component}
#' \item{Deltadraw}{A \eqn{(R/keep) \times (nz \times nvar)} matrix of draws of Delta, first row is initial value. Deltadraw is NULL if Z is NULL} 
#'
#' @references
#' Bumbaca, Federico (Rico), Sanjog Misra, and Peter E. Rossi (2020), "Scalable Target Marketing:
#' Distributed Markov Chain Monte Carlo for Bayesian Hierarchical Models", Journal of Marketing
#' Research, 57(6), 999-1018.
#'
#' Chapter 5, \emph{Bayesian Statistics and Marketing} by Rossi, Allenby, and McCulloch.
#'
#' @author
#' Federico Bumbaca, Leeds School of Business, University of Colorado Boulder, \email{federico.bumbaca@colorado.edu}
#'
#' @seealso 
#' \code{\link{partition_data}}, 
#' \code{\link{drawPosteriorParallel}}, 
#' \code{\link{combine_draws}}, 
#' \code{\link{rheteroLinearIndepMetrop}}
#'
#' @examples
#' 
#' 
#'######### Single Component with rhierLinearMixtureParallel########
#'R = 500
#'
#'nreg=1000
#'nobs=5 #number of observations
#'nvar=3 #columns
#'nz=2
#'
#'Z=matrix(runif(nreg*nz),ncol=nz) 
#'Z=t(t(Z)-apply(Z,2,mean))
#'
#'Delta=matrix(c(1,-1,2,0,1,0),ncol=nz)
#'tau0=.1
#'iota=c(rep(1,nobs)) 
#'
#'#Default
#'tcomps=NULL
#'a=matrix(c(1,0,0,0.5773503,1.1547005,0,-0.4082483,0.4082483,1.2247449),ncol=3) 
#'tcomps[[1]]=list(mu=c(0,-1,-2),rooti=a) 
#'tpvec=c(1)                               
#'
#'regdata=NULL						  
#'betas=matrix(double(nreg*nvar),ncol=nvar) 
#'tind=double(nreg) 
#'
#'for (reg in 1:nreg) {
#'  tempout=bayesm::rmixture(1,tpvec,tcomps) 
#'  if (is.null(Z)){
#'    betas[reg,]= as.vector(tempout$x)  
#'  }else{
#'    betas[reg,]=Delta %*% Z[reg,]+as.vector(tempout$x)} 
#'  tind[reg]=tempout$z
#'  X=cbind(iota,matrix(runif(nobs*(nvar-1)),ncol=(nvar-1))) 
#'  tau=tau0*runif(1,min=0.5,max=1) 
#'  y=X %*% betas[reg,]+sqrt(tau)*rnorm(nobs)
#'  regdata[[reg]]=list(y=y,X=X,beta=betas[reg,],tau=tau)
#'}
#'
#'
#'Data1=list(list(regdata=regdata,Z=Z))
#'s = 1
#'Data2=scalablebayesm::partition_data(Data1,s=s)
#'
#'Prior1=list(ncomp=1)
#'Mcmc1=list(R=R,keep=1)
#'
#'out2 = parallel::mclapply(Data2, FUN = rhierLinearMixtureParallel, Prior = Prior1,
#'Mcmc = Mcmc1, mc.cores = s, mc.set.seed = FALSE)
#'
#'######### Multiple Components with rhierLinearMixtureParallel########
#'R = 500
#'
#'set.seed(66)
#'nreg=1000
#'nobs=5 #number of observations
#'nvar=3 #columns
#'nz=2
#'
#'Z=matrix(runif(nreg*nz),ncol=nz) 
#'Z=t(t(Z)-apply(Z,2,mean))
#'
#'Delta=matrix(c(1,-1,2,0,1,0),ncol=nz)
#'tau0=.1
#'iota=c(rep(1,nobs)) 
#'
#'#Default
#'tcomps=NULL
#'a=matrix(c(1,0,0,0.5773503,1.1547005,0,-0.4082483,0.4082483,1.2247449),ncol=3) 
#'tcomps[[1]]=list(mu=c(0,-1,-2),rooti=a) 
#'tcomps[[2]]=list(mu=c(0,-1,-2)*2,rooti=a)
#'tcomps[[3]]=list(mu=c(0,-1,-2)*4,rooti=a)
#'tpvec=c(.4,.2,.4)                                   
#'
#'regdata=NULL						  
#'betas=matrix(double(nreg*nvar),ncol=nvar) 
#'tind=double(nreg) 
#'
#'for (reg in 1:nreg) {
#'  tempout=bayesm::rmixture(1,tpvec,tcomps) 
#'  if (is.null(Z)){
#'    betas[reg,]= as.vector(tempout$x)  
#'  }else{
#'    betas[reg,]=Delta %*% Z[reg,]+as.vector(tempout$x)} 
#'  tind[reg]=tempout$z
#'  X=cbind(iota,matrix(runif(nobs*(nvar-1)),ncol=(nvar-1))) 
#'  tau=tau0*runif(1,min=0.5,max=1) 
#'  y=X %*% betas[reg,]+sqrt(tau)*rnorm(nobs)
#'  regdata[[reg]]=list(y=y,X=X,beta=betas[reg,],tau=tau)
#'}
#'
#'
#'Data1=list(list(regdata=regdata,Z=Z))
#'s = 1
#'Data2=scalablebayesm::partition_data(Data1, s=s)
#'
#'Prior1=list(ncomp=3)
#'Mcmc1=list(R=R,keep=1)
#'
#'set.seed(1)
#'out2 = parallel::mclapply(Data2, FUN = rhierLinearMixtureParallel, Prior = Prior1,
#'Mcmc = Mcmc1, mc.cores = s, mc.set.seed = FALSE)
#'
#' @rdname rhierLinearMixtureParallel
#' @export

rhierLinearMixtureParallel=function(Data,Prior,Mcmc, verbose = FALSE){
  # Boyang 2023 
  # revision history:
  #   changed 12/17/04 by rossi to fix bug in drawdelta when there is zero/one unit
  #   in a mixture component
  #   adapted to linear model by Vicky Chen 6/06
  #   put in classes 3/07
  #   changed a check 9/08
  #   W. Taylor 4/15 - added nprint option to MCMC argument
  #
  # purpose: run hierarchical linear model with mixture of normals 
  #
  # Arguments:
  #   Data contains a list of (regdata, and possibly Z)
  #      regdata is a list of lists (one list per unit)
  #          regdata[[i]]=list(y,X)
  #             y is a vector of observations
  #             X is a length(y) x nvar matrix of values of
  #               X vars including intercepts
  #             Z is an nreg x nz matrix of values of variables
  #               note: Z should NOT contain an intercept
  #   Prior contains a list of (nu.e,ssq,deltabar,Ad,mubar,Amu,nu,V,ncomp,a) 
  #      ncomp is the number of components in normal mixture
  #           if elements of Prior (other than ncomp) do not exist, defaults are used
  #   Mcmc contains a list of (s,c,R,keep,nprint)
  #
  # Output:  as list containing
  #   taudraw is R/keep x nreg  array of error variances for each regression
  #   Deltadraw R/keep  x nz*nvar matrix of draws of Delta, first row is initial value
  #   betadraw is nreg x nvar x R/keep array of draws of betas
  #   probdraw is R/keep x ncomp matrix of draws of probs of mixture components
  #   compdraw is a list of list of lists (length R/keep)
  #      compdraw[[rep]] is the repth draw of components for mixtures
  #
  # Priors:
  #    tau_i ~ nu.e*ssq_i/chisq(nu.e)  tau_i is the variance of epsilon_i
  #    beta_i = delta %*% z[i,] + u_i
  #       u_i ~ N(mu_ind[i],Sigma_ind[i])
  #       ind[i] ~multinomial(p)
  #       p ~ dirichlet (a)
  #           a: Dirichlet parameters for Prior on p
  #       delta is a k x nz array
  #          delta= vec(D) ~ N(deltabar,A_d^-1)
  #    mu_j ~ N(mubar,A_mu^-1(x)Sigma_j)
  #    Sigma_j ~ IW(nu,V^-1)
  #    ncomp is number of components
  #
  # MCMC parameters
  #   R is number of draws
  #   keep is thinning parameter, keep every keepth draw
  #   nprint - print estimated time remaining on every nprint'th draw
  #
  #  check arguments
  #
  #--------------------------------------------------------------------------------------------------
  #  create functions needed
  #
  append=function(l) { l=c(l,list(XpX=crossprod(l$X),Xpy=crossprod(l$X,l$y)))}
  #
  getvar=function(l) { 
    v=var(l$y)
    if(is.na(v)) return(1)
    if(v>0) return (v) else return (1)}
  #
  if(missing(Data) || !is.list(Data)) {stop("Requires Data argument -- list of p,lgtdata, and (optionally) Z", .call=FALSE)}
  if(missing(Prior) || !is.list(Prior)) {stop("Requires Prior list argument (at least ncomp)", .call=FALSE)}
  if(missing(Mcmc) || !is.list(Mcmc)) {stop("Requires Mcmc list argument", .call=FALSE)}
  
  if(is.null(Data$regdata)) {stop("Requires Data element regdata (list of Data for each unit)")}
  nreg=length(Data$regdata)
  nvar=ncol(Data$regdata[[1]]$X)
  nz = ncol(Data$Z) 
  
  
  drawdelta=TRUE
  if(is.null(Data$Z)) {
    message("Z not specified",fill=TRUE); #fsh() ; 
    drawdelta=FALSE
  } else {
    if (nrow(Data$Z) != nreg) {
      stop(paste("Nrow(Z) ",nrow(Data$Z),"ne number regressions ",nreg),.call=FALSE)
    } else {
      z=Data$Z
    }
  }
  
  # check regdata for validity
  dimfun=function(l) {c(length(l$y),dim(l$X))}
  dims=sapply(Data$regdata,dimfun) #a matrix of nrow=3, ncol=nreg, where each column includes length of X, length of y, and number of ncol
  dims=t(dims) 
  nvar=quantile(dims[,3],prob=.5) #return number of columns and 50%
  
  for (i in 1:nreg) 
  {
    if(dims[i,1] != dims[i,2]  || dims[i,3] !=nvar) 
    {stop(paste("Bad Data dimensions for unit ",i," dims(y,X) =",dims[i,]))}
  }
  
  # check prior
  if(missing(Prior)) 
  {stop("Requires Prior list argument (at least ncomp)", .call=FALSE)} 
  if(is.null(Prior$nu.e)) {
    nu.e=ScalableBayesmConstant.nu.e
  } else {
    nu.e=Prior$nu.e
  }
  if(is.null(Prior$ssq)) {
    ssq=sapply(Data$regdata,getvar)
  } else {
    ssq=Prior$ssq
  }
  
  if(is.null(Prior$ncomp)) {
    stop("Requires Prior element ncomp (num of mixture components)", 
         .call=FALSE)
  } else {
    ncomp=Prior$ncomp
  }
  
  if(is.null(Prior$mubar)) {
    mubar=matrix(rep(0,nvar),nrow=1)
  } else {
    mubar=matrix(Prior$mubar,nrow=1)
  }
  
  if(ncol(mubar) != nvar) {stop(paste("mubar must have ncomp cols, ncol(mubar)= ",
                                      ncol(mubar)), .call=FALSE)}
  
  if(is.null(Prior$amu)) {
    amu=matrix(ScalableBayesmConstant.A,ncol=1)
  } else {
    amu=matrix(Prior$amu,ncol=1)
  }
  
  if(ncol(amu) != 1 | nrow(amu) != 1) {stop("Am must be a 1 x 1 array", 
                                            .call=FALSE)}
  
  if(is.null(Prior$nu)) {
    nu=nvar+ScalableBayesmConstant.nuInc
  } else {
    nu=Prior$nu
  }
  
  if(nu < 1) {stop("invalid nu value", .call=FALSE)}
  
  if(is.null(Prior$v)) {
    v=nu*diag(nvar)
  } else {
    v=Prior$v
  }
  
  if(sum(dim(v)==c(nvar,nvar)) !=2) {stop("Invalid v in Prior", .call=FALSE)}
  
  if(is.null(Prior$ad) & drawdelta) {
    ad=ScalableBayesmConstant.A*diag(nvar*nz)
  } else {
    ad=Prior$ad
  }
  
  if(drawdelta) {
    if(ncol(ad) != nvar*nz | nrow(ad) != nvar*nz) {
      stop("ad must be nvar*nz x nvar*nz", .call=FALSE)
    }
  }
  
  
  if(is.null(Prior$deltabar)& drawdelta) {
    deltabar=rep(0,nz*nvar)
  } else {
    deltabar=Prior$deltabar
  }
  
  #Check dimension of delta
  if(drawdelta) {
    if(length(deltabar) != nz*nvar) {stop("deltabar must be of length nvar*nz", 
                                          .call=FALSE)}
  }
  
  if(is.null(Prior$a)) {
    a=rep(ScalableBayesmConstant.a,ncomp)
  } else {
    a=Prior$a
  }
  
  if(length(a) != ncomp) {stop("Requires dim(a)= ncomp (no of components)", 
                               .call=FALSE)}
  
  bada=FALSE
  
  for(i in 1:ncomp) { if(a[i] < 0) bada=TRUE}
  if(bada) {stop("invalid values in a vector", .call=FALSE)}
  # check on Mcmc
  if(missing(Mcmc)) 
  {stop("Requires Mcmc list argument", .call=FALSE)}
  else 
  { 
    if(is.null(Mcmc$keep)) {keep=ScalableBayesmConstant.keep} else {keep=Mcmc$keep}
    if(is.null(Mcmc$R)) {stop("Requires MCMC element 'R' - number of iterations", .call=FALSE)} else {r=Mcmc$R}
    if(is.null(Mcmc$nprint)) {nprint=ScalableBayesmConstant.nprint} else {nprint=Mcmc$nprint}
    if(nprint<0) {stop('nprint must be an integer greater than or equal to 0', 
                       .call=FALSE)}
  }
  
  if(verbose){

  message("Starting MCMC Inference for Hierarchical Linear Model:")
  message(paste("Normal Mixture with", ncomp, "components for first stage prior"))
  message(paste("For", nreg, "cross-sectional units"))
  message(" ")
  
  message("Prior Parameters:")
  message("nu.e = ", nu.e)
  message("nu = ", nu)
  message("v")
  print(v) 
  message("mubar")
  print(mubar)  
  message("amu", amu)
  print(amu)  
  message("a")
  print(a) 
  
  if (drawdelta) {
    message("deltabar")
    print(deltabar) 
    message("ad")
    print(ad)  
  }
  
  message(" ")
  message("MCMC Parameters:")
  message(paste("r =", r, "keep =", keep, "nprint =", nprint))
  message("")
  
  }
  
  #  initialize values
  #
  #  Create XpX elements of regdata and initialize tau
  #
  regdata=lapply(Data$regdata,append) #includes y,x, beta,tau, XpX and Xpy
  tau=sapply(regdata,getvar)
  #
  # set initial values for the indicators
  #     ind is of length(nreg) and indicates which mixture component this obs
  #     belongs to.
  #
  ind=NULL
  ninc=floor(nreg/ncomp)
  for (i in 1:(ncomp-1)) {ind=c(ind,rep(i,ninc))}
  if(ncomp != 1) {ind = c(ind,rep(ncomp,nreg-length(ind)))} else {ind=rep(1,nreg)}
  #
  #initialize delta
  #
  if (drawdelta){
    olddelta = rep(0,nz*nvar)
  } else { #send placeholders to the _loop function if there is no Z matrix
    olddelta = 0
    z = matrix(0)
    deltabar = 0
    ad = matrix(0)
  }
  #
  # initialize probs
  #
  oldprob=rep(1/ncomp,ncomp)

  
  ###################################################################
  # Wayne Taylor
  # 09/19/2014
  ###################################################################
  draws =  rhierLinearMixtureParallel_rcpp_loop(regdata, z,
                                                deltabar, ad, mubar, amu,
                                                nu, v, nu.e, ssq,
                                                r, keep, nprint, as.matrix(olddelta),  a, 
                                                oldprob, ind, tau,  
                                                drawdelta, verbose
  )
  ####################################################################
  
  if(drawdelta){
    attributes(draws$Deltadraw)$class=c("bayesm.mat","Mcmc")
    attributes(draws$Deltadraw)$mcpar=c(1,r,keep)
  }

  
  return(draws)
}

Try the scalablebayesm package in your browser

Any scripts or data that you put into this service are public.

scalablebayesm documentation built on April 3, 2025, 7:55 p.m.