# initializations:
#' Randomly generate a Q matrix within the identifying conditions
#'
#' This function initializes Q (if unknown) for MCMC sampling within identifiability
#' constraints
#'
#' @param M latent state dimension
#' @param L dimension of the binary responses
#' @param p Bernoulli probability of 1 in the Q matrix (except two diagonal matrices)
#' @examples
#'
#' simulate_Q(3,100)
#'
#' @return a binary matrix of dimension M by L
#' @export
simulate_Q <- function(M,L,p=0.1){
Q_col_ordered <-cbind(diag(1,M),diag(1,M),
matrix(stats::rbinom(M*(L-2*M),1,p),nrow=M))
m_only_two <- which(rowSums(Q_col_ordered[,-(1:(2*M))]) == 0)
Q_col_ordered[cbind(m_only_two, sample((2*M+1):(L),length(m_only_two)))] <- 1
# if a row sums to two, add an extra "1".
Q_col_ordered[,sample(1:L)]
}
#'Randomly generate a Q matrix within the identifying condition (data-driven)
#'
#'This function initializes Q (if unknown) during MCMC sampling chain within identifiability
#'constraints. It is a warm start - because it will not assign an "1" to dimension "l" with
#'few ones in the data. NB: harder to get 1 to zero? easy to get zero to one?
#'
#' @param M latent state dimension
#' @param dat binary data matrix (rows for observations, columns for dimensions)
#' @param p Bernoulli probability of 1 in the Q matrix (except two diagonal matrices)
#' @param frac A threshold - this function only initializes the dimensions with at least
#' \code{frac}*100 percent observed frequencies.
#'
#' @examples
#' # simulate data:
#' L0 <- 100
#' options_sim0 <- list(N = 200, # sample size.
#' M = 3, # true number of machines.
#' L = L0, # number of antibody landmarks.
#' K = 8, # number of true components.,
#' theta = rep(0.8,L0), # true positive rates
#' psi = rep(0.01,L0), # false positive rates
#' alpha1 = 1 # half of the people have the first machine.
#')
#'
#' #image(simulate_data(options_sim0,SETSEED = TRUE)$datmat)
#' simu <- simulate_data(options_sim0, SETSEED=TRUE)
#' simu_dat <- simu$datmat
#' simulate_Q_dat(5,simu_dat)
#'
#' @return a binary matrix of dimension M by L
#' @export
#'
#'
simulate_Q_dat <- function(M,dat,p=0.1,frac=1/4){
ind_all <- which(colSums(dat)>nrow(dat)*frac) # this initial value helps MCMC sampling!.
res <- matrix(0,nrow=M,ncol=ncol(dat))
for (m in 1:M){
res[m,sample(ind_all,ceiling(p*length(ind_all)))] <- 1
}
res
}
# compute the likelihood (see src\bfa_cpp.cpp).
# update parameters:
# eta_star <- as.matrix(expand.grid(rep(list(0:1), options_sim0$M)),ncol=options_sim0$M)
# sum(abs(log_full0(Y,eta_star,Q, p, theta, psi)-
# log_full(Y,eta_star,Q, p, theta, psi))) # perhaps write this into a unit test.
#' update true and false positive rates
#'
#' This function samples the true (theta) and false (psi) positive rates;
#' Constraint: \code{theta[l]> 0.5 > psi[l]}
#'
#' @param Y binary data matrix
#' @param H a binary matrix: rows for all individuals, columns for latent states
#' @param Q Q matrix
#' @param a_theta,a_psi Beta hyperparameters for priors on \code{theta} and
#' \code{psi}, respectively. 2 by L matrices.
#' @param old_theta_vec sampled \code{theta} parameters from the previous iteration (vector
#' of length L); to sample \code{psi} given the constraint
#' @return a list of true \code{theta} and false \code{psi} positive rates, each of length L.
#' @export
update_positive_rate <- function(Y,H,Q,a_theta,a_psi,old_theta_vec=NULL){
xi <- (H%*%Q>0.5)+0 # <-- NB: for DINO model. The design matrix
#in the manuscript. Should be modified for general restricted LCM.
psi_a1 <- colSums((1-xi)*Y)+a_psi[1,]
psi_a2 <- colSums((1-xi)*(1-Y))+a_psi[2,]
theta_a1 <- colSums(xi*Y)+a_theta[1,]
theta_a2 <- colSums(xi*(1-Y))+a_theta[2,]
if (is.null(old_theta_vec)){
psi_samp <- sapply(1:ncol(Q),function(i) {stats::rbeta(1,psi_a1[i],psi_a2[i])})
} else{
psi_samp <- sapply(1:ncol(Q),function(i) {#stats::rbeta(1,theta_a1[i],theta_a2[i])
trunc_rbeta(1,psi_a1[i],psi_a2[i],upper=min(0.49,old_theta_vec[i]))
})
}
# theta_all_a1 <- sum(colSums(xi*Y))+a_theta[1,1]
# theta_all_a2 <- sum(colSums(xi*(1-Y)))+a_theta[2,1]
# theta_samp <- rep(stats::rbeta(1,theta_all_a1,theta_all_a2), ncol(Q))
#theta_samp <- sapply(1:ncol(Q),function(i) {stats::rbeta(1,theta_a1[i],theta_a2[i])})
theta_samp <- sapply(1:ncol(Q),function(i) {#stats::rbeta(1,theta_a1[i],theta_a2[i])
trunc_rbeta(1,theta_a1[i],theta_a2[i],lower=max(0.51,psi_samp[i]))
})
list(theta=theta_samp,psi=psi_samp)
}
#' sample alpha_s - hyperparameter for positive rates theta_l, psi_l
#'
#' Function to sample the parameters from a grid. NB: can just fix it at
#' a particular value.
#'
#'@param theta_vec vector of positive rates
#'@param a_theta_one scalar between 0 and 1.
#'@param e,f hyperparameter for Gamma distribution over original alpha_s (mean
#'e/f, variance e/f^2).
#'@param show_density show the full conditional density of alpha_s
#'given other unknown parameters; default is \code{FALSE}.
#'
#'@return an updated alpha_s value (positive)
#'
#'@examples
#' s <- 10#rgamma(1,0.1,0.01)
#' a1 <- 0.9
#' n <- 20
#' p <- rbeta(n, s*a1,s*(1-a1))
#'
#' par(mfrow=c(1,3))
#' plot(p,main="raw data: probabilities")
#'
#' update_pseudo_n_PR(p,a1,e = 0.1,f=0.01,show_density =TRUE)
#' abline(v=s/(1+s),col="blue")
#' legend("topleft",c("True s/(1+s)","simulated s/(1+s)"),
#' col=c("blue","red"),lty=c(1,2))
#' mtext(paste0("true s: ",as.character(s)),3)
#'
#' update_mu_PR(p,s,a=2,b=1,show_density=TRUE)
#' abline(v=a1,col="blue")
#' legend("topleft",c("True a_theta1","simulated a_theta1"),
#' col=c("blue","red"),lty=c(1,2))
#' mtext(paste0("true a_theta1: ",as.character(a1)),3)
#'
#' \dontrun{
#' par(mfrow=c(1,2))
#' a10 <- 0.9
#' s0 <- 5
#' miter <- 100
#' res <- matrix(NA,nrow=2,ncol=miter)
#' res[,1] <- c(a10,s0)
#' for (iter in 2:miter){
#' res[2,iter] <- update_pseudo_n_PR(p,res[1,iter-1],e = 0.1,f=0.01,show_density =!TRUE)
#' res[1,iter] <- update_mu_PR(p,res[2,iter],a=2,b=1,show_density=!TRUE)
#' }
#' plot(res[1,],type="l",main="mu")
#' abline(h=c(a1),col=c(2))
#' plot(res[2,],type="l",main="s")
#' abline(h=c(s),col=c(2))
#' }
#'@export
update_pseudo_n_PR <- function(theta_vec,a_theta_one,e=1,f=0.1,show_density=FALSE) {
th <- seq(0.001,.999,length=5000) # grid over (0,1) after reparametrization.
part1 <- 0
L <- length(theta_vec)
for (m in 1:L){
part1 <- part1+ (th/(1-th)*a_theta_one-1)*log(theta_vec[m])+
(th/(1-th)*(1-a_theta_one)-1)*log(1-theta_vec[m])
}
tmp <- stats::dgamma(th/(1-th),e,f,log=T)-2*log(1-th)+L*lgamma(th/(1-th))+part1-
L*lgamma(th/(1-th)*a_theta_one)-L*lgamma(th/(1-th)*(1-a_theta_one))
tmp <- exp(tmp-max(tmp))
rtheta <- sample(th,1,prob=tmp)
if (show_density){
graphics::plot(th,tmp,type="l")
graphics::abline(v=rtheta,col="red",lty=2)
}
(rtheta)/(1-rtheta)
}
#' sample a_theta_one - hyperparameter (mean) for positive rates theta_l, psi_l
#'
#' Function to sample the parameters from a grid
#'
#'@param theta_vec vector of positive rates that share a population mean
#'@param s positive scalar; represents the current sampled value of pseudo-sample size
#'@param a,b hyperparameter for Beta distribution over original a_theta_one.
#'@param show_density Show the full conditional density
#'given other unknown parameters; Default to \code{FALSE}
#'
#'@seealso \code{\link{update_pseudo_n_PR}} for examples.
#'@return an updated alpha_s value (positive)
#'
#'@export
update_mu_PR <- function(theta_vec,s,a=2,b=1,show_density=TRUE){
th <- seq(0.001,.999,length=5000) # grid over (0,1).
part1 <- 0
L <- length(theta_vec)
for (m in 1:L){
part1 <- part1 + (th*s-1)*log(theta_vec[m])+
(s*(1-th)-1)*log(1-theta_vec[m])
}
tmp <- stats::dbeta(th,a,b,log=T)+L*lgamma(s)+part1-
L*lgamma(s*th)-L*lgamma(s*(1-th))
tmp <- exp(tmp-max(tmp))
rtheta <- sample(th,1,prob=tmp)
if (show_density){
graphics::plot(th,tmp,type="l")
graphics::abline(v=rtheta,col="red",lty=2)
}
rtheta
}
#' sample alpha - hyperparameter for latent state prevalences p_m
#'
#' This function samples the parameters from a grid (this is used only for model
#' with pre-specified latent state dimension M);
#' NB: currently it uses uniform prior or Beta prior for the reparameterized
#' hyperparameter; check if this is can be replaced by Gamma.
#'
#'@param H_star binary matrix of latent state profiles (row for clusters, column for
#'latent states)
#'@param t number of clusters
#'@param M latent state dimension
#'@param a,b hyperparameter for Beta distribution over reparameterized alpha.
#'@param show_density show the full conditional density of alpha
#'given other unknown parameters; \code{FALSE} by default.
#'
#'@return an updated alpha value (positive)
#'
#'@export
update_alpha <- function(H_star,t,M,a=1,b=1,show_density=FALSE) {
th <- seq(0.001,.999,length=5000) # grid over (0,1) after reparametrization.
sm <- apply(H_star,2,sum,na.rm=T)
part1 <- 0
for (m in 1:M){
part1 <- part1+lgamma(th/(1-th)/M+sm[m])
}
tmp <- stats::dbeta(th,a,b,log=T)+M*log(th/(1-th))+part1-M*lgamma(th/(1-th)/M+t+1)
tmp <- exp(tmp-max(tmp))
rtheta <- sample(th,1,prob=tmp)
if (show_density){
graphics::plot(th,tmp,type="l")
graphics::abline(v=rtheta,col="red",lty=2)
}
(rtheta)/(1-rtheta)
}
#log_full(simu_dat,as.matrix(expand.grid(c(0,1),c(0,1),c(0,1),c(0,1),c(0,1))),
# Q,p,theta,psi)
#' update latent state prevalences
#'
#' This function updates the prevalence parameters
#'
#' @param H_star latent state profiles for all clusters
#' @param alpha hyperparameter
#' @param M dimension of the latent state vector (number of columns for H_star)
#'
#' @return vector of length M (each between 0 and 1).
#' @export
update_prevalence <- function(H_star,alpha,M){
n1_star <- apply(H_star,2,sum,na.rm=T)
n0_star <- apply(1-H_star,2,sum,na.rm=T)
sapply(1:ncol(H_star),function(i) {stats::rbeta(1,n1_star[i]+alpha/M, n0_star[i]+1)})
# based on finite dimension IBP construction.
}
#' Update the current element of Q_ml or not
#'
#' Function to test whether an update of the current element of Q_ml is needed.
#' This function must be used for the constrained Gibbs sampler.
#'
#' @param Q a matrix with rows corresponding to latent states and columns
#' for number of binary measurements
#' @param k,l row and column indices for checking whether to update Q_kl
#' @return A logical value. TRUE for updating in constrained Gibbs sampler, FALSE
#' for skipping the Gibbs update.
#' @references Chen, Y., Culpepper, S. A., Chen, Y., and Douglas, J. (2017). Bayesian estimation of the DINA Q matrix.
#' @export
do_update_Q_one <- function(Q,k,l){ # this saves lots of speed:
test <- rep(NA,3)
ind_unit_Q <- colSums(Q)==1
test[1] <- equal_unit_vec(Q[,l],k)
test[2] <- (sum(Q[k,])==3) && (Q[k,l]==1)
Q_cand <- Q[,ind_unit_Q,drop=FALSE]
test[3] <- (Q[k,l]==0) && sum(Q[,l])==1 &&
(sum(sapply(1:ncol(Q_cand), function(c) equal_unit_vec(Q_cand[,c],which(Q[,l]==1))))==2)
!any(test)
}
#' Compute the full conditional probability of Q_ml given the rest of parameters
#'
#' Function to compute this log full conditional density, which will be used
#' in \link{update_Q}
#'
#' @param Yl binary vector (e.g., observed values at dimension l)
#' @param eta_star a matrix of latent state profiles, rows for clusters,
#' columns for M latent states
#' @param Ql l-th column of the Q matrix
#' @param thetal,psil true, false positive rate (both between 0 and 1)
#'
#' @return log conditional probability of Q_ml given other unknown parameters
#' @export
log_pr_Qml_cond <- function(Yl,eta_star,Ql,thetal,psil){
n1 <- sum(Yl)
n0 <- sum(1-Yl)
xil <- (eta_star%*%matrix(Ql,ncol=1) > 0.5)+0 # NB: works for DINO model;
# need revision for general restricted LCMs.
PR_mat <- (1-xil)*(n1*log(psil)+n0*log(1-psil))+
xil*(n1*log(thetal)+n0*log(1-thetal)) # conditional distribution given the
# design matrix.
}
#' update the Q matrix element-wise
#'
#' This function updates Q matrix by Gibbs sampler (with option to do constrained
#' updates within the identifiability constraint). Only used when Q is not known.
#' NB: - do we need M here? do we modify M after collapsing partner machines.
#' - need to speed up.
#'
#' @param Y binary data matrix
#' @param Q_old the Q matrix from the last scan in Gibbs sampler (of dimension M by L)
#' @param H matrix of latent state profiles, rows for N subjects,
#' columns for M latent states
#' @param z a vector of individual cluster indicators
#' @param t the number of clusters at a particular iteration
#' @param mylist the ordered list that keeps the cluster ids in increasing order
#' @param p Latent state prevalences; a vector of length identical to the columns of H.
#' @param theta,psi True and false positive rates. Both are vectors of length L
#' @param constrained Default to FALSE; Set to TRUE if doing constrained Gibbs sampling
#' within identifiability constraints.
#'
#' @return A Q matrix with all its elements updated.
#' @export
update_Q <- function(Y,Q_old,H,z,t,mylist,p,theta,psi,constrained=FALSE){
M <- nrow(Q_old)
N <- nrow(Y)
L <- ncol(Y)
H_star_enumerate <- as.matrix(expand.grid(rep(list(0:1), M)),ncol=M)
if (constrained){ # constrained Gibbs sampling:
for (k in sample(1:M,replace=FALSE)){ # begin iteration over elements of Q.
for (l in sample(1:L,replace=FALSE)){
if (do_update_Q_one(Q_old,k,l)){ # begin an update if needed.
L0 <- L1 <- 0
q_now <- Q_old[k,l]
Q_old[k,l] <- 0
#interact <- - 0.1 # <--- negative value to encourage sparse patterns per dimension.
#vec_interact <- c(Q_old[k,])
#vec_interact <- c(Q_old[k,],Q_old[-k,l])
#vec_interact <- c(Q_old[,l])
#ising_tmp <- sum(outer(vec_interact,vec_interact,"*")[upper.tri(outer(vec_interact,vec_interact,"*"))])
for (j in 1:t){ #Yl,eta_star,Ql,thetal,psil
#L0 <- L0 + log_marginal(Y[z==mylist[j],l,drop=FALSE],
# H_star_enumerate,Q_old[,l,drop=FALSE],p,theta[l],psi[l]) # <--- integrate over H.
L0 <- L0 + log_pr_Qml_cond(Y[z==mylist[j],l,drop=FALSE],
H[j,,drop=FALSE],Q_old[,l],theta[l],psi[l])
}
#L0 <- L0 #+ dbinom(sum(vec_interact),L-2*M,0.1,log=TRUE) # <-- encourage sparse columns.
#L0 <- L0 - log(1+exp(-interact*ising_tmp)) # <-- encourage sparse columns.
Q_old[k,l] <- 1
#vec_interact <- c(Q_old[k,])
#vec_interact <- c(Q_old[k,],Q_old[-k,l])
#vec_interact <- Q_old[,l]
#ising_tmp <- sum(outer(vec_interact,vec_interact,"*")[upper.tri(outer(vec_interact,vec_interact,"*"))])
for (j in 1:t){
#L1 <- L1 + log_marginal(Y[z==mylist[j],l,drop=FALSE],
# H_star_enumerate,Q_old[,l,drop=FALSE],p,theta[l],psi[l]) # <---- integrate over H.
L1 <- L1 + log_pr_Qml_cond(Y[z==mylist[j],l,drop=FALSE],
H[j,,drop=FALSE],Q_old[,l],theta[l],psi[l])
}
#L1 <- L1 #+ dbinom(sum(vec_interact),L-2*M,0.1,log=TRUE)# <-- encourage sparse columns.
#L1 <- L1 - log(1+exp(-interact*ising_tmp)) # <-- encourage sparse columns.
curr_prob <- exp(L1- matrixStats::logSumExp(c(L0,L1)))
#print(curr_prob)
Q_old[k,l] <- stats::rbinom(1,1,prob = curr_prob)
#Q_old[k,l] <- metrop_flip(q_now,curr_prob) # <-- if doing metroplized flipping.
}# end an update if needed.
}
} #end iteration over elements of Q.
} else { # Gibbs update without constraints of Q within a set.
for (k in sample(1:M,replace=FALSE)){
for (l in sample(1:L,replace=FALSE)){ # begin iteration over elements.
L0 <- L1 <- 0
q_now <- Q_old[k,l]
Q_old[k,l] <- 0
for (j in 1:t){ #Yl,eta_star,Ql,thetal,psil
L0 <- L0 + log_pr_Qml_cond(Y[z==mylist[j],l,drop=FALSE],
H[j,,drop=FALSE],Q_old[,l],theta[l],psi[l])
}
Q_old[k,l] <- 1
for (j in 1:t){
L1 <- L1 + log_pr_Qml_cond(Y[z==mylist[j],l,drop=FALSE],
H[j,,drop=FALSE],Q_old[,l],theta[l],psi[l])
}
curr_prob <- exp(L1- matrixStats::logSumExp(c(L0,L1)))
#print(curr_prob)
#Q_old[k,l] <- metrop_flip(q_now,curr_prob)
Q_old[k,l] <- stats::rbinom(1,1,prob = curr_prob)
}
} #end iteration over elements.
}
Q_old
}
#' Compute the full conditional probability of column Q_l given other unknown parameters
#'
#' Function to compute the log full conditional density for all patterns of the
#' l-th column of Q. This function is used in \link{update_Q_col_block}.
#'
#' @param Yl a column of the binary data matrix
#' @param eta_star a matrix of machine usage indicators, rows for clusters, columns for M machines
#' @param Ql_enumerate l-th column of the Q matrix
#' @param thetal,psil True and false positive rates. Both are vectors of length L
#'
#' @return a vector of log conditional probability of column Q_l taking each of
#' \code{2^nrow(Ql_enumerate)} given other unknown parameters,
#' the dimension will be identical to \code{2^nrow(Ql_enumerate)}.
#' @export
log_pr_Qml_cond_enum <- function(Yl,eta_star,Ql_enumerate,thetal,psil){
n1 <- sum(Yl)
n0 <- sum(1-Yl)
xil <- (eta_star%*%matrix(Ql_enumerate,nrow=ncol(eta_star)) > 0.5)+0 # NB: only works for DINO model; need to revise
# for other general restricted LCMs.
PR_mat <- (1-xil)*(n1*log(psil)+n0*log(1-psil))+
xil*(n1*log(thetal)+n0*log(1-thetal))
}
#' Block update the Q matrix by column
#'
#' This function updates Q matrix by Gibbs sampler (without identifiability
#' constraints)
#'
#' @param Y binary data matrix
#' @param Q_old the Q matrix from the last scan in Gibbs sampler (of dimension M by L)
#' @param H matrix of latent state profiles, rows for N subjects, columns for M latent states
#' @param z a vector of individual cluster indicators
#' @param t the number of clusters at an iteration
#' @param mylist the ordered list that keeps the cluster ids in increasing order
#' @param p latent state prevalences; a vector of length identical to the columns of H.
#' @param theta,psi True and false positive rates, each of length L
#'
#' @return A Q matrix with all elements updated.
#' @export
update_Q_col_block <- function(Y,Q_old,H,z,t,mylist,p,theta,psi){
M <- nrow(Q_old)
N <- nrow(Y)
L <- ncol(Y)
Ql_enum <- t(as.matrix(expand.grid(rep(list(0:1), M)),ncol=M))
for (l in sample(1:L,replace=FALSE)){ # begin iteration over columns:
L_enum <- rep(0,2^M)
for (j in 1:t){
L_enum <- L_enum + log_pr_Qml_cond_enum(Y[z==mylist[j],l,drop=FALSE],
H[j,,drop=FALSE],Ql_enum,theta[l],psi[l])
# for all patterns of each column, we compute the likelihood and sum over clusters.
}
curr_prob <- exp(L_enum-matrixStats::logSumExp(L_enum))
Q_old[,l] <- Ql_enum[,sample(1:2^M,1,prob = curr_prob)]
}
Q_old
}
#' Main function for model estimation
#'
#' Estimating restricted latent class models (RLCM) by MCMC sampling
#' given pre-specified upper bound for (M) latent state dimension. This function performs MCMC sampling
#' with user-specified options.
#' NB:
#' 1) add flexibility to specify other parameters as fixed (or partially
#' fixed such as the rows of Q).
#'
#' @param dat binary data matrix (row for observations and column for dimensions)
#' @param model_options Specifying assumed model options:
#' \itemize{
#' \item \code{n} The number of observations
#' \item \code{t_max} The maximum (guessed) number of clusters in the data during
#' the posterior inference
#' \item \code{m_max} For a model with pre-specified number of states, \code{m_max};
#' In an infinite dimension model, \code{m_max} is
#' the maximum (guessed) latent state dimension during the posterior inference
#' (see slice_sampler to come); one can increase this number if
#' this package recommends so in the printed message;
#' \item \code{a_theta, a_psi} hyperparameters for true and false positive rates;
#' \code{a_theta} and \code{a_psi} are both 2 by L. If provided with these
#' parameter values, the algorithm does not pool information across measurements.
#' It assumes independent priors for all true and false positive rates.
#'
#' \item \code{pooling_pr} Set to \code{TRUE} if to pool theta and psi (must NOT
#' set \code{a_theta} or \code{a_psi}; must set \code{a0_TPR},
#' \code{a0_FPR},\code{e0_TPR},\code{f0_TPR},\code{e0_FPR},\code{f0_FPR} that are
#' hyperparameters of the priors for TPR or FPR:
#'
#' \code{theta[1] <--Beta-- a_theta[1:2,l] <--deterministic--
#' (a0_TPR, hyper_pseudo_n_TPR <--Gamma-- (e0_TPR, f0_TPR))
#' --deterministic--> a_theta[1:2,l] --Beta-->theta[2]}; (a_theta has duplicated
#' columns, hence an arbitary column index l)
#'
#' \item \code{hyper_pseudo_n_TPR},\code{hyper_pseudo_n_FPR} pseudo sample
#' size in \code{Beta(hyper_pseudo_n_TPR*a0_TPR,hyper_pseudo_n_TPR*(1-a0_TPR))};
#' usually works well by setting this to a large number, say 10 or 50. Each
#' is assigned a \code{Gamma(e0_XPR, f0_XPR)} prior
#'
#' \item \code{a0_TPR} mean of a Beta prior TPR
#' \item \code{a0_FPR} mean of a Beta prior FPR
#'
#' The following four parameters are not needed if \code{hyper_pseudo_n_TPR},\code{hyper_pseudo_n_FPR}
#' are specified.
#' \item \code{e0_TPR,f0_TPR} the first, second Gamma parameter for the TPR
#' pseudo sample size hyperparameter: \code{hyper_pseudo_n_TPR}
#' \item \code{e0_FPR,f0_FPR} the first, second Gamma parameter for the FPR
#' pseudo sample size hyperparameter: \code{hyper_pseudo_n_FPR}
#' \item \code{log_v} The character string representing the prior
#' distribution for the number of true clusters, e.g.,
#' \code{"function(k) {log(0.1) + (k-1)*log(0.9)}"}. We pre-computed
#' log of the coefficients in Mixture of Finite Mixtures
#' (Miller and Harrison, 2017, JASA). Use this code:
#' \code{mfm_coefficients(eval(parse(text=model_options0$log_pk)),
#' model_options0$gamma,
#' model_options0$n,
#' model_options0$t_max+1)}
#' \item \code{the_partition} Put a vector of z here if not to update
#' the partition/clustering. The z must be 1) from 1 to t, which is
#' the number of groups, 2) consecutive from 1 to t.
#' \item \code{a_alpha, b_alpha} Just for infinite latent state dimension model -
#' Gamma hyperparameter for the hyperprior on \code{alpha}.
#' (see slice_sampler to come)
#' }
#' The following are not updated if provided with fixed values
#' \itemize{
#' \item \code{Q} Q matrix
#' \item \code{theta} a vector of true positive rates
#' \item \code{psi} a vector of false positive rates
#' \item \code{p} a vector of latent state prevalences
#' \item \code{alpha} The hyperparameter
#' for \code{Beta(alpha/m_max,1)} (can set to \code{m_max});
#' For infinite dimension model, the hyperparameter for IBP (can set to 1).
#' }
#'
#'
#'
#' @param mcmc_options Options for MCMC algorithm:
#' \itemize{
#' \item \code{n_total} total number of MCMC iterations
#' \item \code{n_keep} the number of MCMC samples kept for posterior inference.
#' \item \code{n_burnin} the number of burnin iterations to be discarded
#' for drawing the posterior inference.
#' \item \code{n_split} the number of restricted Gibbs scan to arrive at a launch state;
#' see \code{\link{split_merge}} and \code{\link{restricted_gibbs}}
#' \item \code{print_mod} print intermediate model fitting information
#' \item \code{constrained} update the Q matrix with identifiability constraints (if \code{TRUE});
#' otherwise, set to \code{FALSE}.
#' \item \code{block_update_H} update rows of H (if \code{TRUE}) or not
#' (if \code{NULL} or \code{FALSE} - must be so for slice_sampler to come).
#' \item \code{block_update_Q} update columns of Q (if \code{TRUE}) or not
#' (if \code{NULL} or \code{FALSE} - must be so for slice_sampler to come).
#' Then no identifiability constraint is imposed upon Q at any iterations.
#' \item \code{ALL_IN_ONE_START} \code{TRUE} for putting all subjects in one cluster,
#' \code{FALSE} by starting from a hierarchical agglomerative clustering (complete linkage) and cut
#' to produce \code{floor(t_max/4)} clusters. Consider this as a warm start.
#' \item \code{MORE_SPLIT} when getting to launch state of the partition,
#' \code{TRUE} For biasing towards split when reaching a random launch state;
#' \code{FALSE} for choosing a pair (i,j) uniformly and then merge (if they belong to distinct clusters)
#' or split (if they belong to the identical cluster).
#' }
#'
#' @example /inst/example/simulation_fixed_M.R
#'
#' @return posterior samples for quantities of interest.
#' It is a list comprised of the following elements:
#' \itemize{
#' \item \code{t_samp}
#' \item \code{z_samp}
#' \item \code{N_samp}
#' \item \code{keepers} indices of MCMC samples kept for inference; will be used
#' to see which ones to use for inference after \code{n_burnin}.
#' \item \code{H_star_samp} \code{t_max+3} by \code{m_max} binary matrix
#' \item \code{alpha_samp}
#' } The following are recorded if they are not fixed in a priori:
#' \itemize{
#' \item \code{Q_samp}
#' \item \code{theta_samp}
#' \item \code{psi_samp}
#' \item \code{p_samp}
#' }
#' @export
sampler <- function(dat,model_options,mcmc_options){
# # <<<<------ uncomment for testing:
# dat <- simu_dat
# model_options <- model_options0
# mcmc_options <- mcmc_options0
# # <<<<------ end of testing data.
# # <<<<------ uncomment for testing:
# dat <- simu_dat
# model_options <- model_options_refit_given_partition
# mcmc_options <- mcmc_options_refit_given_partition
# # <<<<------ end of testing data.
#
# read in data:
#
n <- nrow(dat) # number of observations
L <- ncol(dat) # number of dimensions (e.g.,protein landmarks).
#hmcols <- package_env$hmcols
YlGnBu5 <- c('#ffffd9','#c7e9b4','#41b6c4','#225ea8','#081d58',"#092d94")
hmcols <- colorRampPalette(YlGnBu5)(256)
#
# posterior sampling options:
#
ALL_IN_ONE_START <- mcmc_options$ALL_IN_ONE_START # start the chain with all individuals in one cluster.
n_total <- mcmc_options$n_total # total number of mcmc iterations.
n_keep <- mcmc_options$n_keep # toral number of samples kept for posterior inference.
keepers <- seq(ceiling(n_total/n_keep),n_total,len=n_keep)
keep_index <- 0 # the index to keep during MCMC inference.
n_split <- mcmc_options$n_split # number of intermediate GS scan to arrive at launch state.
init_frac <- mcmc_options$init_frac
block_update_Q <- !is.null(mcmc_options$block_update_Q) && mcmc_options$block_update_Q # TRUE for updating columns of Q. FALSE otherwise.
block_update_H <- !is.null(mcmc_options$block_update_H) && mcmc_options$block_update_H # TRUE for updating rows of H. FALSE otherwise.
constrained <- !is.null(mcmc_options$constrained) && mcmc_options$constrained
PPD <- !is.null(mcmc_options$predictive_samp) && mcmc_options$predictive_samp
#
# working model options: (call it "working" because one can use m_max larger
# than its true value)
#
t_max <- model_options$t_max # maximum allowable number of clusters.
m_max <- model_options$m_max # maximum number of machines (truncated to m_max).
b <- model_options$b # Gamma parameter in MFM - hyperparameter for Dirichlet distn.
log_v <- model_options$log_v # coefficients for MFM.
do_update_partition <- is.null(model_options$the_partition)
POOL_PR <- !is.null(model_options$pooling_pr) && model_options$pooling_pr
is_identity_Q <- NULL
if (is.null(model_options$Q)){
Q_samp <- array(0,c(m_max,L,n_keep))
Q_merge_samp <- Q_samp
Q <- simulate_Q_dat(m_max,dat,0.1,min(max(colMeans(dat)),init_frac[1])) # random initialization; warm start.
}else{
Q <- model_options$Q # if Q is given.
is_identity_Q <- (nrow(Q)==ncol(Q)) && sum(abs(Q-diag(nrow(Q))))<1e-3
}
if (is.null(model_options$Q) | (!is.null(is_identity_Q) && !is_identity_Q) | block_update_H){ # <-- if Q may be non-diagonal; or if we block update H.
H_star_enumerate <- as.matrix(expand.grid(rep(list(0:1), m_max)),ncol=m_max) # all binary patterns for latent state profiles. 2^m_max of them.
}
# initialize clustering:
if (ALL_IN_ONE_START){
t <- 1 # number of clusters.
z <- rep(1,n) # z[i] is the cluster ID for observation i.
mylist <- rep(0,t_max+3); mylist[1] <- 1 # mylist[1:t] is the list of active cluster IDs.
# mylist is maintained in increasing order for 1:t, and
# is 0 after that.
c_next <- 2 # an available cluster ID to be used next.
N <- rep(0,t_max+3); N[1] <- n # N[c] is the size of cluster c. Note that N is different from n.
log_p <- rep(0,n+1) # probability for sampling z; n+1 because at most there could be n clusters;
# and sometimes one needs to assign an observation to a new cluster - by current bookkeeping,
# we are not efficient in memory and create a new cluster ID - hence +1.
zs <- rep(1,n) # temporary cluster indicators for split-merge assignments.
S <- rep(0,n) # temporary variable for indicies used during split-merge step.
} else{ # improve initial single cluster by warm start from hierarchical clustering:
hc <- hclust(dist(dat),"complete")
t <- floor(t_max/4)
z <- cutree(hc,k = t)
mylist <- rep(0,t_max+3); mylist[1:t] <- 1:t # mylist[1:t] is the list of active cluster IDs.
c_next <- t+1
N <- rep(0,t_max+3); N[1:t] <- table(z)
log_p <- rep(0,n+1)
zs <- z
S <- rep(0,n)
}
# deal with fixed clusters (this is to initialize):
if (!is.null(model_options$partition_partial)){
partition_partial <- model_options$partition_partial
force_cl <- fix_cluster(partition_partial,z,N,t,mylist)
z <- force_cl$z
N <- force_cl$N
t <- force_cl$t
mylist <- force_cl$mylist
c_next <- force_cl$c_next
zs <- z
S <- rep(0,n)
}
log_Nb <- log(1:n)+b # the multipliers needed when assigning an observation to an existing cluster. Restaurant process.
#
# create placeholders for storing posterior samples + initialize the values at the first iteration:
#
t_samp <- rep(0,n_keep) # posterior samples # of cluster (pseudo).
N_samp <- matrix(0,nrow=t_max+3,ncol=n_keep) # posterior samples of cluster sizes.
z_samp <- matrix(0,nrow=n,ncol=n_keep) # posterior samples of cluster indicators.
H_star_samp <- array(0,c(t_max+3,m_max,n_keep))# component-specific machine profiles.
mylist_samp <- matrix(0,nrow=length(mylist),ncol=n_keep) # need it to retrieve the correct latent state vector.
# true positive rates:
if (is.null(model_options$theta)){
theta_samp <- matrix(0,nrow=L,ncol=n_keep)
if (POOL_PR){ # <-- If pool PRs:
if (is.null(model_options$hyper_pseudo_n_TPR)){
hyper_pesudo_n_TPR_samp <- rep(0,n_keep)
hyper_pseudo_n_TPR <- 10 # <--- initialization.
} else{
hyper_pseudo_n_TPR <- model_options$hyper_pseudo_n_TPR # <--- read in.
}
if (is.null(model_options$a0_TPR)){
a0_TPR_samp <- rep(0,n_keep)
a0_TPR <- 0.9 # <--- initialization.
} else{
a0_TPR <- model_options$a0_TPR
}
theta <- trunc_rbeta(L,hyper_pseudo_n_TPR*a0_TPR,hyper_pseudo_n_TPR*(1-a0_TPR),0.8,1)
} else{ # <--- not pooling PRs:
a_theta <- model_options$a_theta # <--------------------------- 2 by L matrix.
theta <- sapply(1:L,function(i){stats::rbeta(1,a_theta[1,i],a_theta[2,i])}) # initialization.
#theta <- rep(0.9,L)
}
} else{
theta <- model_options$theta # use specified true positive rates.
}
# false positive rates:
if (is.null(model_options$psi)){
psi_samp <- matrix(0,nrow=L,ncol=n_keep)
if (POOL_PR){ # if pooling positive rates:
if (is.null(model_options$hyper_pseudo_n_FPR)){
hyper_pesudo_n_FPR_samp <- rep(0,n_keep)
hyper_pseudo_n_FPR <- 10 # <--- initialization.
} else{
hyper_pseudo_n_FPR <- model_options$hyper_pseudo_n_FPR # <--- read in.
}
if (is.null(model_options$a0_FPR)){
a0_FPR_samp <- rep(0,n_keep)
a0_FPR <- 0.1 # <--- initialization.
} else{
a0_FPR <- model_options$a0_FPR
}
psi <- trunc_rbeta(L,hyper_pseudo_n_FPR*a0_FPR,hyper_pseudo_n_FPR*(1-a0_FPR),0.001,0.2)
} else{ # if not pooling positive rates:
a_psi <- model_options$a_psi # <--------------------------- 2 by L matrix.
psi <- sapply(1:L,function(i){stats::rbeta(1,a_psi[1,i],a_psi[2,i])}) # initialization.
#psi <- rep(0.1, L) # initialization.
}
} else{
psi <- model_options$psi # use specified false positive rates.
}
# probability of a latent state being positive:
if (is.null(model_options$p0)){
p_samp <- matrix(0,nrow=m_max,ncol=n_keep)
p <- rep(0.5,m_max) # initialization.
} else{
p <- model_options$p0 # fix prevalence as specified.
}
if (is.null(model_options$alpha) && is.null(model_options$p0)){
alpha_samp <- rep(0,n_keep) # hyperparameter for latent state prevalence; Beta(alpha/M,1) for IBP.
alpha <- m_max # initialization.
}else{
alpha <- model_options$alpha # fix alpha.
}
# when estimating Q, one can fix a "best" clustering estimate obtained
# from an initial fit:
if (!do_update_partition){
z <- model_options$the_partition #<-- must be from 1 to the total number of clusters.
t <- length(unique(z))
mylist[1:t] <- 1:t
}
# if to do posterior predictive sampling:
if (PPD){Y_pred_samp <- array(NA,c(n,L,n_keep))}
#
# begin MCMC iteration:
#
cat("[rewind] Begin MCMC for model with #latent states (M =", m_max, ")\n")
if(!is.null(model_options$Q) && is_identity_Q){cat("> identity Q provided.\n")}
if(!is.null(model_options$Q) && !is_identity_Q){cat("> Q provided; Not identity.\n")}
for (iter in 1:n_total){ # <---- BEGING MCMC ITERATIONS:
VERBOSE <- iter%%mcmc_options$print_mod==0
#
# Assign individuals to clusters:
#
if (!do_update_partition){ # BEGIN choosing to update partition or not:
N <- sapply(1:t,function(uu){sum(z==uu)})
}else{
# update cluster indicators z for all subjects - one complete Gibbs scan to refine clusters:
if (!is.null(model_options$partition_partial)){
set_change <- sample((1:n)[-unlist(model_options$partition_partial)])
} else{
set_change <- sample(1:n)
}
for (i in set_change){ # iterate over subjects for cluster assignment:
# remove obs i from its current cluster:
c <- z[i] # <--- get ith person's cluster ID.
N[c] <- N[c]-1 # <--- remove it from cluster ID=c.
if (N[c]>0){ # if there are observations left after removal of i, use c_next:
c_prop <- c_next # <-- c_next next available cluster id to use.
} else{# if i was by itself, use its cluster ID again.
c_prop <- c
mylist <- ordered_remove(c,mylist,t)
t <- t-1 # total number of clusters is now reduced by one.
}
#
# compute probability for Gibbs updating - the probability of assigning a subject
# to a cluster.
#
if (!is.null(model_options$Q) && is_identity_Q){ # <--- if Q is specified and identity:
for (j in 1:t){
cc <- mylist[j]
log_p[j] <- log_Nb[N[cc]]+log_marginal_Q_identity(rbind(dat[(z==cc)[-i],,drop=FALSE],dat[i,]),p,theta,psi)-
log_marginal_Q_identity(dat[(z==cc)[-i],,drop=FALSE],p, theta,psi) # existing cluster.
}
log_p[t+1] <- log_v[t+1]-log_v[t] + log(b) + log_marginal_Q_identity(dat[i,,drop=FALSE],p,theta,psi) # new cluster.
} else{ # <--- if Q is not specified or specified but not identity:
for (j in 1:t){
cc <- mylist[j]
log_p[j] <- log_Nb[N[cc]]+log_marginal(rbind(dat[(z==cc)[-i],,drop=FALSE],dat[i,]),H_star_enumerate,Q,p,theta,psi)-
log_marginal(dat[(z==cc)[-i],,drop=FALSE],H_star_enumerate,Q,p, theta,psi) # existing cluster.
}
log_p[t+1] <- log_v[t+1]-log_v[t] + log(b) + log_marginal(dat[i,,drop=FALSE],H_star_enumerate,Q,p,theta,psi) # new cluster.
}
j <- sample(t+1,1,prob = exp(log_p[1:(t+1)]))
# add obs i to its new cluster:
if (j<=t){ # if not on its own:
c <- mylist[j]
} else{ # if on its own:
c <- c_prop
mylist <- ordered_insert(c,mylist,t)
t <- t+1
c_next <- ordered_next(mylist)
if(t>t_max) {stop("[rewind] Sampled t has exceeded t_max. Increast t_max and retry.")}
}
z[i] <- c
N[c] <- N[c] + 1
}# END cluster assignment iteration over subjects.
# After iterate through other subjects not in partition_partial,
# isolate the partition_partial (because other subjects may join
# the clusters in partition_partial)
if (!is.null(model_options$partition_partial)){
partition_partial <- model_options$partition_partial
force_cl <- fix_cluster(partition_partial,z,N,t,mylist)
z <- force_cl$z
N <- force_cl$N
t <- force_cl$t
mylist <- force_cl$mylist
c_next <- force_cl$c_next
}
}# END IFELSE for doing partition update or not.
if (do_update_partition){
# if (use_splitmerge){}
MORE_SPLIT <- mcmc_options$MORE_SPLIT
if (!is.null(model_options$partition_partial)){
res_split_merge <- split_merge(dat,z,zs,S,mylist,N,t,b,log_v,n_split,Q,p,theta,psi,MORE_SPLIT,model_options$partition_partial)
} else{
res_split_merge <- split_merge(dat,z,zs,S,mylist,N,t,b,log_v,n_split,Q,p,theta,psi,MORE_SPLIT)
}
t <- res_split_merge$t
z <- res_split_merge$z
N <- res_split_merge$N
mylist <- res_split_merge$mylist
c_next <- ordered_next(mylist)
}
if(t>t_max) {stop("[rewind] Sampled t has exceeded t_max. Increast t_max and retry.")}
## Simulate xi for posterior predictive checking:
# #if (is.null(model_options$Q)){
# xi_samp <- array(0,c(t_max+3,L,n_total)) # add this to the results.
# for (j in 1:t){
# xi_samp[mylist[j],,iter] <- update_xi(dat[z==mylist[j],,drop=FALSE],p,theta,psi)
# }
# xi_star <- xi_samp[mylist[1:t],,iter]
# #}
## TODO: determine a good frac threshold.
if (iter>1 && sum(colSums(H_star)==0)>0){ # this is added..... useful or not??
Q[colSums(H_star)==0,] <- simulate_Q_dat(sum(colSums(H_star)==0),dat,frac=init_frac[2])
}
#image(f(Q),col=hmcols)
#
# update H: latent state profile given clusters: (check here for sampling H)
#
H_star_redun <- matrix(0,nrow=t_max+3,ncol=m_max) # prior to merging redundant rows or columns.
for (j in 1:t){
if (block_update_H){
curr_pattern_log_p <- log_full(dat[z==mylist[j],,drop=FALSE],H_star_enumerate,Q,p,theta,psi)
curr_pattern_p <- exp(curr_pattern_log_p-matrixStats::logSumExp(curr_pattern_log_p))
curr_ind <- sample(1:(2^m_max),1,prob=curr_pattern_p)
H_star_redun[mylist[j],] <- H_star_enumerate[curr_ind,]
} else{
for (m in 1:m_max){
H_star_redun[mylist[j],m] <- 0
L0 <- log_full(dat[z==mylist[j],,drop=FALSE],
H_star_redun[mylist[j],,drop=FALSE],Q,p,theta,psi)
H_star_redun[mylist[j],m] <- 1
L1 <- log_full(dat[z==mylist[j],,drop=FALSE],
H_star_redun[mylist[j],,drop=FALSE],Q,p,theta,psi)
curr_eta_p <- exp(L1-matrixStats::logSumExp(c(L0,L1)))
H_star_redun[mylist[j],m] <-stats::rbinom(1,1,curr_eta_p)
#H_star_samp[mylist[j],m,iter] <- metrop_flip(H_star_samp[mylist[j],m,iter],curr_eta_p)
}
}
}
H_star <- H_star_redun[mylist[1:t],,drop=FALSE] # don't use Z to subset H_star!!! Do so for H_star_samp.
#
# update Q:
#
if (is.null(model_options$Q)){
if (block_update_Q && constrained){
stop("[rewind] 'block_update_Q' and 'constrained' in 'model_options' must NOT be TRUE at the same time. Set one to FALSE and retry.")}
if (block_update_Q){
Q <- update_Q_col_block(dat,Q,H_star,z,t,mylist,p,theta,psi)
} else{
Q <- update_Q(dat,Q,H_star,z,t,mylist,p,theta,psi,constrained)
}
}
if (VERBOSE){
cat("\n[rewind] iteration ", iter, ":\n");
cat("------------ \n");
if (do_update_partition && !is.null(MORE_SPLIT) && MORE_SPLIT){cat("> Biasing towards splitting while reaching the launch state.\n")}
cat("> Latent state profiles for t=",t," pseudo-clusters of sizes: ",N[N!=0],"\n")
cat("> Merging identical rows (pseudo-clusters) and columns (partner latent states):\n")
cat(">> H^* Before:\n")
print_mat <- H_star; rownames(print_mat) <- paste(c("pseudo-cluster",rep("",t-1)),1:t,sep=" ");
colnames(print_mat) <- paste(c("state(machine)",rep("",ncol(print_mat)-1)),1:ncol(print_mat),sep=" ")
print(print_mat) # <-- removed all zero columns.
merged_res <- merge_H_Q(H_star,1:t,t,Q,TRUE,skip_Q=!is.null(model_options$Q))
if (is.null(model_options$alpha) && is.null(model_options$p0)){
cat("> Finite IBP hyperparameter: alpha = ", alpha,"\n")
}
if (!is.null(mcmc_options$print_Q) && mcmc_options$print_Q) {image(f(Q),col=hmcols)}
}
#
# update true/false positive rates - theta/psi:
#
if (is.null(model_options$theta) && is.null(model_options$psi)){
# if theta and psi are not specified:
#xi_temp <- (H_star%*%Q>0.5)+0; ind_eff <- which(colSums(xi_temp)>0)
if (POOL_PR){
if (is.null(model_options$hyper_pseudo_n_TPR)){
hyper_pseudo_n_TPR <- update_pseudo_n_PR(theta, a0_TPR, e = model_options$e0N_TPR, f = model_options$f0N_TPR, show_density = FALSE) # <-- hard coded.
}
if (is.null(model_options$a0_TPR)){
a0_TPR <- update_mu_PR(theta,hyper_pseudo_n_TPR, a=model_options$e0_TPR,b=model_options$f0_TPR,show_density=FALSE) # <-- hard coded.
}
if (is.null(model_options$hyper_pseudo_n_FPR)){ # e, f are gamma parameters:
hyper_pseudo_n_FPR <- update_pseudo_n_PR(psi, a0_FPR, e = model_options$e0N_FPR, f = model_options$f0N_FPR, show_density = FALSE) # <-- hard coded.
}
if (is.null(model_options$a0_FPR)){
a0_FPR <- update_mu_PR(psi,hyper_pseudo_n_FPR,a=model_options$e0_FPR,b=model_options$f0_FPR,show_density=FALSE) # <-- hard coded.
}
a_theta <- replicate(L,hyper_pseudo_n_TPR*c(a0_TPR,1-a0_TPR))
a_psi <- replicate(L,hyper_pseudo_n_FPR*c(a0_FPR,1-a0_FPR))
}
res_update_pr <- update_positive_rate(dat,H_star_redun[z,,drop=FALSE],Q,a_theta,a_psi,theta)
theta <- res_update_pr$theta
psi <- res_update_pr$psi
}
# update hyperparameter for {p_m}:
if (is.null(model_options$alpha) && is.null(model_options$p0)){
alpha <- update_alpha(H_star,t,m_max)
}
# update prevalence vector {p_m}:
if (is.null(model_options$p0)){
p <- update_prevalence(H_star,alpha,m_max)
}
#
# Record MCMC results:
#
if (iter==keepers[keep_index+1]){
keep_index <- keep_index +1
t_samp[keep_index] <- t
for (j in 1:t){N_samp[mylist[j],keep_index] <- N[mylist[j]]}
if (is.null(model_options$p0)){
p_samp[,keep_index] <- p
}
H_star_samp[mylist[1:t],,keep_index] <- H_star # <-- okay, insert H_star at places indicated by mylist.
for (i in 1:n){z_samp[i,keep_index] <- z[i]}
mylist_samp[,keep_index] <- mylist
if (is.null(model_options$theta)){
if (POOL_PR){
if (is.null(model_options$hyper_pseudo_n_TPR)){
hyper_pesudo_n_TPR_samp[keep_index] <- hyper_pseudo_n_TPR
}
if (is.null(model_options$a0_TPR)){
a0_TPR_samp[keep_index] <- a0_TPR
}
}
theta_samp[,keep_index] <- theta # of length L.
}
if (is.null(model_options$psi)){
if (POOL_PR){
if (is.null(model_options$hyper_pseudo_n_FPR)){
hyper_pesudo_n_FPR_samp[keep_index] <- hyper_pseudo_n_FPR
}
if (is.null(model_options$a0_FPR)){
a0_FPR_samp[keep_index] <- a0_FPR
}
}
psi_samp[,keep_index] <- psi
}
if (is.null(model_options$Q)){
Q_samp[,,keep_index] <- Q
}
if (is.null(model_options$alpha) && is.null(model_options$p0)){
alpha_samp[keep_index] <- alpha
}
if (!is.null(mcmc_options$predictive_samp) && mcmc_options$predictive_samp){ # <-- slow.
Y_pred <- (H_star_redun[z,]%*%Q>0.5)+0 # DINO.
for (i_pred in 1:n){
for (l_pred in 1:L){
curr_p_pred <- c(theta[l_pred],psi[l_pred])[2-Y_pred[i_pred,l_pred]]
Y_pred[i_pred,l_pred] <- rbinom(1,1,curr_p_pred)
}
}
Y_pred_samp[,,keep_index] <- Y_pred
}
}
}# ----------> END MCMC ITERATIONS:
res <- list(keepers=keepers,t_samp=t_samp,
N_samp=N_samp,z_samp=z_samp,
H_star_samp = H_star_samp,
mylist_samp=mylist_samp,keepers=keepers)
if (is.null(model_options$theta)){
res$theta_samp <- theta_samp # of length L.
}
if (is.null(model_options$psi)){
res$psi_samp <- psi_samp
}
if (POOL_PR){
if (is.null(model_options$hyper_pseudo_n_TPR)){
res$hyper_pesudo_n_TPR_samp <- hyper_pesudo_n_TPR_samp
}
if (is.null(model_options$a0_TPR)){
res$a0_TPR_samp <- a0_TPR_samp
}
if (is.null(model_options$hyper_pseudo_n_FPR)){
res$hyper_pesudo_n_FPR_samp <- hyper_pesudo_n_FPR_samp
}
if (is.null(model_options$a0_FPR)){
res$a0_FPR_samp <- a0_FPR_samp
}
}
if (is.null(model_options$Q)){
res$Q_samp <- Q_samp
}
if (is.null(model_options$p0)){
res$p_samp <- p_samp
}
if (is.null(model_options$alpha) && is.null(model_options$p0)){
res$alpha_samp <- alpha_samp
}
if (!is.null(mcmc_options$predictive_samp) && mcmc_options$predictive_samp){
res$Y_pred_samp <- Y_pred_samp
}
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.