Nothing
#' @title Multistate Life Table Method
#' @description A Bayesian Multistate Life Table Method for survey data, developed by Lynch and Zang (2022), allowing for large state spaces with quasi-absorbing states (i.e., structural zeros in a transition matrix).
#' @details This function came from the deprecated \href{https://github.com/jwindle/BayesLogit}{bayeslogit} package, which conducts Bayesian multinomial logistic regressions using Polya-Gamma latent variables (Polson et al. 2013). It should be jointly used with the mlifetable() function, which will generate life tables based on the estimates from regressions.
#' @param y A vector of state transitions, which can be created either manually or with \code{CreateTrans()}. See more details using \code{?lifedata}.
#' @param X A matrix of covariates. Note that \code{X} must include age as a covariate.
#' @param samp Number of posterior samples. For efficiency purposes, if you need a large sample (e.g., \eqn{\ge}5000), we recommend parallel computing in a cluster.
#' @param burn 'burn-in' period. Default is 500.
#' @param verbose Progress report. Default is 10, which means this function will report the current progress for every 10 posterior samples.
#' @param thin The thinning strategy to reduce autocorrelation. For example, if \code{thin = 5}, this function will select 1 from every 5 posterior samples and generate a new dataset named \code{outwstepwidth.txt}. Default is 5.
#' @param trace.plot If TRUE, this function will create a new directory under given \code{file_path} and output corresponding trace plots using samples after burn-in.
#' @param file_path The file path for outputs. If a path is specified, the result will also be saved in the given file path. You can find two result files in the specified file: \code{result.txt} and \code{resultwstep.txt}. The former contains all posterior samples generated after burn-in. The latter is sampled from the former one with a specified sampling interval.
#' @seealso \code{\link{mlifeTable}}, \code{\link{lifedata}}, \code{\link{CreateTrans}}
#' @import grDevices
#' @importFrom stats model.matrix pnorm runif rexp rnorm
#' @export
#' @return
#' A list that contains two arrays:
#' \itemize{
#' \item \strong{out}: An array that contains all posterior samples generated.
#' \item \strong{outwstepwidth}: An array generated by selecting one sample from every \emph{thin} samples in \strong{out}.
#' }
#' The number of columns in both arrays is determined by the number of covariates in X and the number of unique transition status in y. For example, if we have 12 covariates in X and 36 unique transitions in y, our result will contain (12+1)*(36-1)= 455 columns in total.
#' @examples
#' \donttest{
#' data <- lifedata
#' y <- data[,1]
#' X <- data[,-1]
#'
#' # This example will take about 30 mins.
#' out <- bayesmlogit(y, X ,samp=1000, burn=500,verbose=10)
#'
#'
#' }
bayesmlogit <- function(y, X,
file_path=NA,
samp=1000, burn=500, verbose=100,
thin = 5,
trace.plot=FALSE){
UseMethod("bayesmlogit")
}
#' @export
bayesmlogit.default <- function(y, X,file_path=NA,
samp=1000, burn=500, verbose=1000,
thin = 5,
trace.plot=FALSE)
{
if (any(is.na(y)) | any(is.na(X)))
stop("Missing values need to be elimnated before using this function",
call. = FALSE)
##Pre-process
y.1 <- sort(unique(y))
y <- match(y,y.1)
data_f <- as.data.frame(cbind(y,X))
names(data_f)[1] <- 'y'
data_f$y <- as.factor(data_f$y)
J.1 <- nlevels(data_f$y)
X <- stats::model.matrix(y ~ ., data=data_f)
col_names <- colnames(X)
y.all <- stats::model.matrix(~ y - 1, data=data_f)
y <- y.all[,-J.1]
##Construct sub-functions. These codes are all copied from the deprecated bayeslogit package.
TRUNC = 0.64
mass.texpon <- function(Z)
{
x = TRUNC;
fz = pi^2 / 8 + Z^2 / 2;
b = sqrt(1.0 / x) * (x * Z - 1);
a = -1.0 * sqrt(1.0 / x) * (x * Z + 1);
x0 = log(fz) + fz * TRUNC;
xb = x0 - Z + stats::pnorm(b, log.p=TRUE);
xa = x0 + Z + stats::pnorm(a, log.p=TRUE);
qdivp = 4 / pi * ( exp(xb) + exp(xa) );
1.0 / (1.0 + qdivp);
}
rtigauss <- function(Z, R=TRUNC)
{
Z = abs(Z);
mu = 1/Z;
X = R + 1;
if (mu > R) {
alpha = 0.0;
while (stats::runif(1) > alpha) {
## X = R + 1
## while (X > R) {
## X = 1.0 / rgamma(1, 0.5, rate=0.5);
## }
E = stats::rexp(2)
while ( E[1]^2 > 2 * E[2] / R) {
E = stats::rexp(2)
}
X = R / (1 + R*E[1])^2
alpha = exp(-0.5 * Z^2 * X);
}
}
else {
while (X > R) {
lambda = 1.0;
Y = stats::rnorm(1)^2;
X = mu + 0.5 * mu^2 / lambda * Y -
0.5 * mu / lambda * sqrt(4 * mu * lambda * Y + (mu * Y)^2);
if ( stats::runif(1) > mu / (mu + X) ) {
X = mu^2 / X;
}
}
}
X;
}
a.coef <- function(n,x)
{
if ( x>TRUNC )
pi * (n+0.5) * exp( -(n+0.5)^2*pi^2*x/2 )
else
(2/pi/x)^1.5 * pi * (n+0.5) * exp( -2*(n+0.5)^2/x )
}
rpg.devroye.1 <- function(Z)
{
Z = abs(Z) * 0.5;
## PG(1,z) = 1/4 J*(1,Z/2)
fz = pi^2 / 8 + Z^2 / 2;
## p = (0.5 * pi) * exp( -1.0 * fz * TRUNC) / fz;
## q = 2 * exp(-1.0 * Z) * pigauss(TRUNC, 1.0/Z, 1.0);
num.trials = 0;
total.iter = 0;
while (TRUE)
{
num.trials = num.trials + 1;
if ( stats::runif(1) < mass.texpon(Z) ) {
## Truncated Exponential
X = TRUNC + stats::rexp(1) / fz
}
else {
## Truncated Inverse Normal
X = rtigauss(Z)
}
## C = cosh(Z) * exp( -0.5 * Z^2 * X )
## Don't need to multiply everything by C, since it cancels in inequality.
S = a.coef(0,X)
Y = stats::runif(1)*S
n = 0
while (TRUE)
{
n = n + 1
total.iter = total.iter + 1;
if ( n %% 2 == 1 )
{
S = S - a.coef(n,X)
if ( Y<=S ) break
}
else
{
S = S + a.coef(n,X)
if ( Y>S ) break
}
}
if ( Y<=S ) break
}
## 0.25 * X
list("x"=0.25 * X, "n"=num.trials, "total.iter"=total.iter)
}
## N - number of trials
## J - number of categories
## P - number of covariates
## y - N x J-1 matrix. y_{ij} = fraction of outcomes in jth category on trial i.
## X - N x P design matrix
## n - N x 1 matrix of rolls
## Assume beta_J = 0 for identification.
m.0=array(0, dim=c(ncol(X), ncol(y)))
P.0=array(0, dim=c(ncol(X), ncol(X), ncol(y)))
N = nrow(X);
P = ncol(X);
J = ncol(y) + 1;
out = list(
beta = array(0, dim=c(samp, P, J-1))
)
beta = matrix(0, P, J);
w = matrix(0, N, J);
## Precompute. (Initialize). These codes are all copied from the deprecated bayeslogit package.
n=rep(1,nrow(as.matrix(y)))
kappa = (y - 0.5)*n;
b.0 = matrix(0, P, J-1);
for (j in 1:(J-1)) b.0[,j] = P.0[,,j] %*% m.0[,j];
## A = rowSums( exp(X %*% beta[,-1]) );
for (i in 1:(samp+burn)) {
for (j in 1:(J-1)) {
## For now recompute at each iteration. Try taking out later.
A = rowSums( exp(X %*% beta[,-j]) );
c.j = log(A);
eta.j = X %*% beta[,j] - c.j;
## omega.j
for (q in 1:N){
#print(eta.j[q])
w[q,j] = rpg.devroye.1(eta.j[q])$x;
}
## beta.j
PL.j = t(X) %*% (X * w[,j]);
bL.j = t(X) %*% (kappa[,j] + c.j * w[,j]);
P1.j = PL.j + P.0[,,j];
V1.j = chol2inv(chol(P1.j));
m1.j = V1.j %*% (bL.j + b.0[,j]);
sqrtV1.j = t(chol(V1.j));
beta[,j] = m1.j + sqrtV1.j %*% stats::rnorm(P);
## A += exp(X %*% beta[,j]) - exp(X %*% beta[,(j+1) %% (J-1)])
## Store
if (i > burn) {
out$beta[i-burn,,j] = beta[,j];
}
}
if (i %% verbose == 0) {
cat("Finished", i,"/",burn+samp,"\n")
}
}
##Process the result
dim(out$beta) <- c(samp,P*(J-1))
out <- as.matrix(out$beta)
indx <- seq(thin, samp, thin)
out2 <- out[indx, ]
if(!is.na(file_path)){
utils::write.csv(out, file=paste(file_path,"/result.csv",sep=''), row.names=FALSE)
utils::write.csv(out2, file=paste(file_path,"/resultwstep.csv",sep=''), row.names=FALSE)
}
if(trace.plot == TRUE & !is.na(file_path)){
output.path <- paste(file_path,"/TracePlots",sep ='')
dir.create(output.path)
for(i in 1:dim(out)[2]){
grDevices::png(file=paste(output.path,"/TracePlot",
"_transition=", y.1[ceiling(i/P)],
"_covariate=", col_names[(i-1) %% P +1],
".png",
sep=''))
p <- plot(x=1:dim(out)[1],y=out[,i],type="l",main="Trace plot")
grDevices::dev.off()
}
}
result <- list(out = data.frame(out), outwstepwidth = data.frame(out2))
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.