#' Hierarchical Pitman-Yor Gibbs sampler
#'
#' @param Y Matrix of taxon counts (rows are populations, columns are species)
#' @param n.iter Number of MCMC iterations (after burn-in)
#' @param n.burn Number of burn-in iterations
#' @param p.shape Shape parameter for gamma prior on top-level concentration parameter
#' @param p.scale Scale parameter for gamma prior on top-level concentration parameter
#' @param p.shape.pop Vector with length equal to number of populations containing shape parameters for gamma priors on population-level concentration parameter
#' @param p.scale.pop Vector with length equal to number of populations containing scale parameters for gamma priors on population-level concentration parameter
#' @param type Type of hierarchical process: "py" for Pitman-Yor process, "dp" for Dirichlet process
#' @param quiet Set to TRUE to suppress console output
#'
#' @return Returns a list containing the following elements:
#' \itemize{
#' \item gamma, alpha, theta, sigma - Vectors containing posterior samples for each of the HPY parameters.
#' \item tab - List with table indicators for each population.
#' \item t.c - Table counts (and species corresponding to each table)
#' \item n.tab - Number of tables in each population
#' \item n.s.tab - Number of tables for a given species in each population
#' }
#' @export
#'
#' @import gStirling
#'
#' @examples
hpySampler <- function(Y, n.iter, n.burn, p.shape, p.scale, p.shape.pop=NULL, p.scale.pop=NULL, type=c("py", "dp"), quiet=FALSE) {
if (!is.numeric(Y) | !is.matrix(Y) |
any(Y<0) | any(Y!=floor(Y))) stop("Y must be a numeric matrix of positive counts")
type <- match.arg(type)
J <- NROW(Y)
K <- NCOL(Y) # Number of distinct species in joint sample
if (is.null(p.shape.pop)) {
p.shape.pop <- rep(p.shape, J)
}
if (is.null(p.scale.pop)) {
p.scale.pop <- rep(p.scale, J)
}
# Initializing PY parameters
gamma <- 5
alpha <- ifelse(type=="dp", 0, 0.5)
theta <- rep(1, J)
if(type=="dp") {
sigma <- rep(0, J)
} else {
sigma <- rep(0.5, J)
}
# Initializing table info
sp.vec <- vector("list", length=J)
n <- rowSums(Y) # The number of individuals in each population
tab <- vector("list", length=J) # List to hold table indicators for each population
t.c <- vector("list", length=J) # Table counts (and species corresponding to each table)
n.tab <- rep(0, J) # Number of tables in each population
#n.s.tab <- matrix(1, J, K) # Number of tables for a given species in each population
n.s.tab <- 1*(Y>0) # Number of tables for a given species in each population
for (j in 1:J) {
# Initially one table for each species
sp.vec[[j]] <- rep(1:K, Y[j,])
tab[[j]] <- rep(1:sum(Y[j,]>0), c(Y[j,])[Y[j,]>0])
t.c[[j]] <- cbind((1:K)[Y[j,]>0], c(Y[j,])[Y[j,]>0])
n.tab[j] <- NROW(t.c[[j]])
}
gamma.s <- rep(0, n.iter)
alpha.s <- rep(0, n.iter)
theta.s <- matrix(0, n.iter, J)
sigma.s <- matrix(0, n.iter, J)
n.tab.s <- matrix(0, n.iter, J)
n.s.tab.s <- array(0, dim=c(n.iter, J, K))
t.c.s <- vector("list", length=n.iter)
# Loop over MCMC iterations
for (i in 1:(n.burn+n.iter)) {
idx <- i - n.burn
if (!quiet & i==1) cat("Beginning burn-in:", "\n")
if (!quiet & i==n.burn+1) cat("Beginning sampling:", "\n")
if (!quiet & i%%100==0) cat(" ", i, "\n")
# Loop over populations
for (j in 1:J) {
#Loop over individuals in a population
for (p in 1:n[j]){
sp.cur <- sp.vec[[j]][p]
# Remove current individual from its table
t.cur <- tab[[j]][p]
t.c[[j]][t.cur,2] <- t.c[[j]][t.cur,2] - 1
if (t.c[[j]][t.cur,2]==0) {
# Drop table and make adjustments
t.c[[j]] <- t.c[[j]][-t.cur,]
tab[[j]][tab[[j]]>t.cur] <- tab[[j]][tab[[j]]>t.cur] - 1
n.tab[j] <- n.tab[j] - 1
n.s.tab[j, sp.cur] <- n.s.tab[j, sp.cur] - 1
}
# Reassign individual to new or existing table
npart <- (gamma+sum(n.tab))*(Y[j,sp.cur]-1-n.s.tab[j, sp.cur]*sigma[j])
dpart <- (theta[j]+n.tab[j]*sigma[j])*(sum(n.s.tab[, sp.cur])-alpha)
prob.new <- ifelse(n.s.tab[j, sp.cur]==0, 1, 1/(1+npart/dpart))
is.new <- rbinom(1, 1, prob.new)
if (is.new) {
# Allocate new table
t.c[[j]] <- rbind(t.c[[j]], c(sp.cur, 1))
n.tab[j] <- n.tab[j] + 1
n.s.tab[j, sp.cur] <- n.s.tab[j, sp.cur] + 1
tab[[j]][p] <- n.tab[j]
} else {
wh.c.sp <- which(t.c[[j]][,1]==sp.cur)
wh.t.new <- sample(1:length(wh.c.sp), 1, prob=(t.c[[j]][wh.c.sp,2]-sigma[j]))
wh.t <- wh.c.sp[wh.t.new]
t.c[[j]][wh.t, 2] <- t.c[[j]][wh.t, 2] + 1
tab[[j]][p] <- wh.t
}
}
if (type=="py") {
sigma[j] <- gStirling::sampDsct(sigma[j], K, n.tab[j],Y[j,,drop=FALSE], n.s.tab[j,,drop=FALSE], theta[j])
}
theta[j] <- gStirling::sampConc(theta[j], n[j], n.tab[j], p.shape.pop[j], p.scale.pop[j], sigma[j])
}
for (j in 1:J) {
if (i>n.burn) {
theta.s[idx,j] <- theta[j]
sigma.s[idx,j] <- sigma[j]
n.tab.s[idx,j] <- n.tab[j]
}
}
# Sample top-level PY parameters
# Discount
sp.tab.totals <- matrix(apply(n.s.tab, 2, sum), ncol=K)
if (type=="py") {
alpha <- gStirling::sampDsct(alpha, K, K, sp.tab.totals, t(rep(1, K)), gamma) # Chris
}
# Concentration
gamma <- gStirling::sampConc(gamma, sum(n.tab), K, p.shape, p.scale, alpha)
if (i>n.burn) {
gamma.s[idx] <- gamma
alpha.s[idx] <- alpha
n.s.tab.s[idx,,] <- n.s.tab
t.c.s[[idx]] <- t.c
}
}
return(list(gamma=gamma.s, alpha=alpha.s, theta=theta.s, sigma=sigma.s,
tab=tab, t.c=t.c.s, n.tab=n.tab.s, n.s.tab=n.s.tab.s))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.