Nothing
#' Cluster time segments into behavioral states
#'
#' This function performs a Gibbs sampler within the Latent Dirichlet Allocation
#' (LDA) model to estimate proportions of each behavioral state for all time
#' segments generated by \code{\link{segment_behavior}}. This is the second
#' stage of the two-stage Bayesian model that estimates proportions of
#' behavioral states by first segmenting individual tracks into relatively
#' homogeneous segments of movement.
#'
#' The LDA model analyzes all animal IDs pooled together, thereby providing
#' population-level estimates of behavioral states.
#'
#' @param dat A data frame returned by \code{\link{summarize_tsegs}} that
#' summarizes the counts of observations per bin and movement variable for all
#' animal IDs.
#' @param gamma1 numeric. A hyperparameter for the truncated stick-breaking
#' prior for estimating the \code{theta} matrix.
#' @param alpha numeric. A hyperparameter for the Dirichlet distribution when
#' estimating the \code{phi} matrix.
#' @param ngibbs numeric. The total number of iterations of the MCMC chain.
#' @param nmaxclust numeric. A single number indicating the maximum number of
#' clusters to test.
#' @param nburn numeric. The length of the burn-in phase.
#' @param ndata.types numeric. A vector of the number of bins used to discretize
#' each movement variable. These must be in the same order as the columns
#' within \code{dat}.
#'
#' @return A list of model results is returned where elements include the
#' \code{phi} matrix for each data stream, \code{theta} matrix, log likelihood
#' estimates for each iteration of the MCMC chain \code{loglikel}, and
#' matrices of the latent cluster estimates for each data stream \code{z.agg}.
#'
#'
#' @examples
#' \donttest{
#' #load data
#' data(tracks.seg)
#'
#' #select only id, tseg, SL, and TA columns
#' tracks.seg2<- tracks.seg[,c("id","tseg","SL","TA")]
#'
#' #summarize data by track segment
#' obs<- summarize_tsegs(dat = tracks.seg2, nbins = c(5,8))
#'
#' #cluster data with LDA
#' res<- cluster_segments(dat = obs, gamma1 = 0.1, alpha = 0.1, ngibbs = 1000,
#' nburn = 500, nmaxclust = 7, ndata.types = 2)
#' }
#'
#' @importFrom stats "rmultinom"
#' @export
cluster_segments=function(dat, gamma1, alpha, ngibbs, nmaxclust, nburn, ndata.types){
ntsegm=nrow(dat)
#separate variables
y=list()
nbins=rep(NA, ndata.types)
for (i in 1:ndata.types){
nome=paste0('y', i)
ind=grep(nome, colnames(dat))
y[[i]]=data.matrix(dat[,ind])
nbins[i]=length(ind)
}
#initial values
phi=z.agg=list()
for (i in 1:ndata.types){
phi[[i]]=matrix(1/nbins[i], nmaxclust, nbins[i])
z.agg[[i]]=array(NA, dim=c(ntsegm, nbins[i], nmaxclust))
}
theta=matrix(1/nmaxclust, ntsegm, nmaxclust)
for (j in 1:ndata.types){
for (i in 1:ntsegm){
for (k in 1:nbins[j]){
z.agg[[j]][i,k,]=rmultinom(1, size=y[[j]][i,k], prob=rep(1/nmaxclust, nmaxclust))
}
}
}
#prepare for gibbs
store.phi=zeroes=list()
for (i in 1:ndata.types){
store.phi[[i]]=matrix(NA, ngibbs, nmaxclust*nbins[i])
zeroes[[i]]=array(0, c(ntsegm, nbins[i], nmaxclust))
}
store.theta=matrix(NA, ngibbs, ntsegm*nmaxclust)
store.loglikel=rep(NA, 1)
#progress bar
pb <- progress::progress_bar$new(
format = " iteration (:current/:total) [:bar] :percent [Elapsed: :elapsed, Remaining: :eta]",
total = ngibbs, clear = FALSE, width = 100)
#run gibbs sampler
for (i in 1:ngibbs){
pb$tick() #create progress bar
#re-order clusters
if (i < nburn & i%%50==0){
med=apply(theta, 2, mean)
ordem=order(med, decreasing=T)
theta=theta[,ordem]
for (j in 1:ndata.types){
z.agg[[j]]=z.agg[[j]][,,ordem]
phi[[j]]=phi[[j]][ordem,]
}
}
#sample from FCD's
z.agg=sample.z(ntsegm=ntsegm, nbins=nbins, y=y, nmaxclust=nmaxclust,
phi=phi, ltheta=log(theta), zeroes=zeroes, ndata.types=ndata.types)
v=sample.v(z.agg=z.agg, gamma1=gamma1,
ntsegm=ntsegm, ndata.types=ndata.types, nmaxclust=nmaxclust)
theta=get.theta(v=v, nmaxclust=nmaxclust, ntsegm=ntsegm)
phi=sample.phi(z.agg=z.agg, alpha=alpha, nmaxclust=nmaxclust,
nbins=nbins, ndata.types=ndata.types)
#calculate log-likelihood
p1=0
for (j in 1:ndata.types){
prob1=theta %*% phi[[j]]
p1=p1+sum(y[[j]]*log(prob1))
}
#store results
for (j in 1:ndata.types){
store.phi[[j]][i,]=phi[[j]]
}
store.theta[i,]=theta
store.loglikel[i]=p1
}
list(phi=store.phi, theta=store.theta, loglikel=store.loglikel, z.agg=z.agg)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.