#' Fit Mixtures of Perturbed Gaussians
#'
#' This function generates a sample from the posterior of Mixtures of Perturbed Gaussians.
#'
#' @param Y Matrix of the data. Each row represents an observation.
#' @param C Vector of the group label of each observation. Labels are integers starting from 1.
#' @param prior A list giving the prior information. If unspecified, a default prior is used.
#' The list includes the following hyparameters:
#' \code{K_0} Number of mixture components with shared weights.
#' \code{K_1} Number of mixture components with independent weights.
#' \code{epsilon_range} Vector with minimum and maximum values for \code{epsilon}.
#' \code{merge_step} Introduce step to merge mixture components with small KL divergence. Default is \code{merge_step = TRUE}.
#' \code{merge_par} Parameter controlling merging radius. Default is \code{merge_par = 0.1}.
#' @param mcmc A list giving the MCMC parameters. If unspecified, default parameters are used.
#' The list includes the following parameters: \code{nburn} indicates the number of burn-in scans,
#' \code{nsave} indicates the number of scans to be saved,
#' \code{nskip} indicates the thinning interval,
#' \code{ndisplay} indicates the number of scans to be displayed on screen.
#' The total number of scans is equal to \code{nburn + nsave*nskip}.
#' @param state Initial state of the chain. At the moment only the latent variables Z can be initialized.
#' @return A \code{MPG} object.
#' @details
#' \deqn{y_{i,j} = \sum_{k=1}^{K_0+K_1}\pi_{j,k}N(y_{i,j} | \mu_{j,k}, \Sigma_k ) }
#' where \eqn{i = 1, \ldots, n_j} and \eqn{j = 1, \ldots, J}.
#' The weights are defined as follows:
#' \deqn{ \pi_{j,k} = \rho w_{0,k} \;\;\; k = 1, \ldots, K_0,}
#' \deqn{ \pi_{j,k} = (1 - \rho) w_{j,k + K_0} \;\;\; k = 1, \ldots, K_1, }
#' where
#' \deqn{(w_{0,1}, \ldots, w_{0,K_0}) \sim Dirichlet(\alpha_0/K_0) }
#' \deqn{(w_{j,1}, \ldots, w_{j,K_1}) \sim Dirichlet(\alpha_j/K_1) }
#' @examples
#' n = c(250, 250)
#' p = 4
#'
#' Y1 = rbind( matrix( rnorm( n[1]*p), ncol = p), matrix( rnorm(n[2]*p) + 3, ncol = p))
#' Y2 = rbind( matrix( rnorm( n[1]*p), ncol = p), matrix( rnorm(n[2]*p) + 4, ncol = p))
#' Y = rbind(Y1, Y2)
#' C = c( rep(1,sum(n)), rep(2,sum(n)))
#'
#' ans = mpg(Y, C)
mpg <- function(Y, C, prior = NULL, mcmc = NULL, state = NULL)
{
Y = as.matrix(Y)
p = ncol(Y)
if(is.null(prior))
{
prior = list( K_0 = 10,
K_1 = 10,
epsilon_range = c(10^-10, 1),
m_2 = colMeans(Y),
nu_2 = p+2,
nu_1 = p+2,
Psi_2 = cov(Y),
S_2 = 100*cov(Y),
tau_k0 = c(4,4),
tau_alpha = c(1,1),
tau_rho = c(0.5,0.5),
point_masses_rho = c(0.0, 0.0),
tau_varphi = c(0.5,0.5),
point_masses_varphi = c(0.0, 0.0),
merge_step = TRUE,
merge_par = 0.1
)
}
else
{
if(is.null(prior$K_0))
prior$K_0 = 10
if(is.null(prior$K_1))
prior$K_1 = 10
if(is.null(prior$epsilon_range))
prior$epsilon_range = c(10^-10, 1)
if(is.null(prior$m_2))
prior$m_2 = colMeans(Y)
if(is.null(prior$nu_2))
prior$nu_2 = p+2
if(is.null(prior$nu_1))
prior$nu_1 = p+2
if(is.null(prior$Psi_2))
prior$Psi_2 = cov(Y)
if(is.null(prior$S_2))
prior$S_2 = 100*cov(Y)
if(is.null(prior$tau_k0))
prior$tau_k0 = c(4,4)
if(is.null(prior$tau_alpha))
prior$tau_alpha = c(1,1)
if(is.null(prior$tau_rho))
prior$tau_rho = c(0.5, 0.5)
if(is.null(prior$point_masses_rho))
prior$point_masses_rho = c(0,0)
if(is.null(prior$tau_varphi))
prior$tau_varphi = c(0.5,0.5)
if(is.null(prior$point_masses_varphi))
prior$point_masses_varphi = c(0.0, 0.0)
if(is.null(prior$merge_step))
prior$merge_step = TRUE
if(is.null(prior$merge_par))
prior$merge_par = 0.1
}
if(is.null(mcmc))
{
mcmc = list(nburn = 5000, nsave = 1000, nskip = 1, ndisplay = 1000, seed = 42)
}
else
{
if(is.null(mcmc$nburn))
mcmc$nburn = 5000
if(is.null(mcmc$nsave))
mcmc$nsave = 1000
if(is.null(mcmc$nskip))
mcmc$nskip = 1
if(is.null(mcmc$ndisplay))
mcmc$ndisplay = 100
if(is.null(mcmc$seed))
mcmc$seed = 42
}
# set initial random seed for R here, it will propagate down to Rcpp
# but Armadillo's RNG will get set in MCMC class in gibbs.h
# NOTE: Rstudio seems to touch Armadillo's underlying RNG, so
# differing results may be seen when run from Rstudio, see comments
# from Dirk Eddelbuettel here:
# https://github.com/RcppCore/RcppArmadillo/issues/11
set.seed(mcmc$seed)
J = length(unique(C))
if( sum( sort(unique(C)) == 1:J ) != J )
{
print("ERROR: unique(C) should look like 1, 2, ...")
return(0);
}
C = C - 1
if(is.null(state))
{
state = list( Z = (kmeans(Y, prior$K_0 + prior$K_1, iter.max = 100 )$cluster - 1) )
}
ans = fit(Y, C, prior, mcmc, state)
colnames(ans$data$Y) = colnames(Y)
ans$data$C = ans$data$C + 1
ans$chain$Z = ans$chain$Z + 1
class(ans) = "MPG"
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.