#' @export
fastcountuniq <- function(x)
{
data.table(x)[, .N, keyby = x]$N
}
#' @export
fastnormdens <- function(x, mean = 0, sd = 0){
(1/(sd*sqrt(2*pi)))*exp(-0.5*((x-mean)/sd)^2)
}
# speed up this function if necessary by replacing dt() with a faster function
dt_ls <- function(x, df=1, mu=0, sigma=1) (1/sigma) * dt((x - mu)/sigma, df)
#' @title Nonparametric Type I Tobit Bayesian Additive Regression Trees implemented using MCMC
#'
#' @description Type I Tobit Bayesian Additive Regression Trees implemented using MCMC with a Dirichlet Process Mixture of normal distributions for the error term
#' @import dbarts
#' @import truncnorm
#' @import mvtnorm
#' @import censReg
#' @import dqrng
#' @import data.table
#' @import accelerometry
#' @import wrswoR
#' @param x.train The training covariate data for all training observations. Number of rows equal to the number of observations. Number of columns equal to the number of covariates.
#' @param x.test The test covariate data for all test observations. Number of rows equal to the number of observations. Number of columns equal to the number of covariates.
#' @param y The training data vector of outcomes. A continuous, censored outcome variable.
#' @param n.iter Number of iterations excluding burnin.
#' @param n.burnin Number of burnin iterations.
#' @param below_cens Number at or below which observations are censored.
#' @param above_cens Number at or above which observations are censored.
#' @param n.trees (dbarts control option) A positive integer giving the number of trees used in the sum-of-trees formulation.
#' @param n.chains (dbarts control option) A positive integer detailing the number of independent chains for the dbarts sampler to use (more than one chain is unlikely to improve speed because only one sample for each call to dbarts).
#' @param n.threads (dbarts control option) A positive integer controlling how many threads will be used for various internal calculations, as well as the number of chains. Internal calculations are highly optimized so that single-threaded performance tends to be superior unless the number of observations is very large (>10k), so that it is often not necessary to have the number of threads exceed the number of chains.
#' @param printEvery (dbarts control option)If verbose is TRUE, every printEvery potential samples (after thinning) will issue a verbal statement. Must be a positive integer.
#' @param printCutoffs (dbarts control option) A non-negative integer specifying how many of the decision rules for a variable are printed in verbose mode
#' @param rngKind (dbarts control option) Random number generator kind, as used in set.seed. For type "default", the built-in generator will be used if possible. Otherwise, will attempt to match the built-in generator’s type. Success depends on the number of threads.
#' @param rngNormalKind (dbarts control option) Random number generator normal kind, as used in set.seed. For type "default", the built-in generator will be used if possible. Otherwise, will attempt to match the built-in generator’s type. Success depends on the number of threads and the rngKind
#' @param rngSeed (dbarts control option) Random number generator seed, as used in set.seed. If the sampler is running single-threaded or has one chain, the behavior will be as any other sequential algorithm. If the sampler is multithreaded, the seed will be used to create an additional pRNG object, which in turn will be used sequentially seed the threadspecific pRNGs. If equal to NA, the clock will be used to seed pRNGs when applicable.
#' @param updateState (dbarts control option) Logical setting the default behavior for many sampler methods with regards to the immediate updating of the cached state of the object. A current, cached state is only useful when saving/loading the sampler.
#' @param tree_power Tree prior parameter for outcome model.
#' @param tree_base Tree prior parameter for outcome model.
#' @param node.prior (dbarts option) An expression of the form dbarts:::normal or dbarts:::normal(k) that sets the prior used on the averages within nodes.
#' @param resid.prior (dbarts option) An expression of the form dbarts:::chisq or dbarts:::chisq(df,quant) that sets the prior used on the residual/error variance
#' @param proposal.probs (dbarts option) Named numeric vector or NULL, optionally specifying the proposal rules and their probabilities. Elements should be "birth_death", "change", and "swap" to control tree change proposals, and "birth" to give the relative frequency of birth/death in the "birth_death" step.
#' @param sigmadbarts (dbarts option) A positive numeric estimate of the residual standard deviation. If NA, a linear model is used with all of the predictors to obtain one.
#' @param print.opt Print every print.opt number of Gibbs samples.
#' @param lambda0 Lambda parameter for the base distribution for the error term.
#' @param nu0 nu parameter for sigma prior in base distribution G_0
#' @param sigest Estiamted standard deviation of outcome or error (used for setting base distirbution parameters).
#' @param sigquant Parameter for setting lambda0 (if NA). lambda0 set such that the sigquant quantile of the base distribution of sigma is the standard deviation of the outcome (as estimated by Maximum Likelihood assuming censored normal outcome).
#' @param alpha_prior The prior for the alpha parameter of the Dirichlet Process mixture of normals. If "vh" then apply the Gamma(c1,c2) prior of van Hasselt (2011) and Escobar (1994). If "george", then apply the prior of George (2019), McCulloch (2021), Conley (2008), and Antoniak (1974).
#' @param c1 If alpha_prior == "vh", then c1 is the shape parameter of the Gamma distribution.
#' @param c2 If alpha_prior == "vh", then c2 is the rate parameter of the Gamma distribution.
#' @param alpha_gridsize If alpha_prior = "george", this is the size of the grid to use for the discretized samples of alpha
#' @param mixstep If TRUE, includes a mixing step to speed up convergence of the Dirichlet Process Mixture draws. Default is TRUE.
#' @param init.many.clust If TRUE, initialize the Dirichlet Process Mixture with many clusters instead of 1 cluster. Default is FALSE.
#' @param k0_resids If FALSE (default) the maximum absolute value of the outcome determines k0 (with lambda0). If TRUE, the maximum residual from a linear regression determines k0.
#' @param sparse If equal to TRUE, use Linero Dirichlet prior on splitting probabilities
#' @param alpha_a_y Linero alpha prior parameter for outcome equation splitting probabilities
#' @param alpha_b_y Linero alpha prior parameter for outcome equation splitting probabilities
#' @param alpha_split_prior If true, set hyperprior for Linero alpha parameter
#' @export
#' @return The following objects are returned:
#' \item{Z.matcens}{Matrix of draws of latent (censored) outcomes for censored observations. Number of rows equals number of censored training observations. Number of columns equals n.iter . Rows are ordered in order of censored observations in the training data.}
#' \item{Z.matcensbelow}{Matrix of draws of latent (censored) outcomes for observations censored from below. Number of rows equals number of training observations censored from below. Number of columns equals n.iter . Rows are ordered in order of censored observations in the training data. }
#' \item{Z.matcensabove}{Matrix of draws of latent (censored) outcomes for observations censored from above. Number of rows equals number of training observations censored from above. Number of columns equals n.iter . Rows are ordered in order of censored observations in the training data. }
#' \item{mu}{Matrix of draws of the sum of terminal nodes, i.e. f(x_i), for all training observations. Number of rows equals number of training observations. Number of columns equals n.iter .}
#' \item{mucens}{Matrix of draws of the sum of terminal nodes, i.e. f(x_i), for all censored training observations. Number of rows equals number of censored training observations. Number of columns equals n.iter .}
#' \item{muuncens}{Matrix of draws of the sum of terminal nodes, i.e. f(x_i), for all uncensored training observations. Number of rows equals number of uncensored training observations. Number of columns equals n.iter .}
#' \item{mucensbelow}{Matrix of draws of the sum of terminal nodes, i.e. f(x_i), for all training observations censored from below. Number of rows equals number of training observations censored from below. Number of columns equals n.iter .}
#' \item{mucensabove}{Matrix of draws of the sum of terminal nodes, i.e. f(x_i), for all training observations censored from above Number of rows equals number of training observations censored from above Number of columns equals n.iter .}
#' \item{ystar}{Matrix of training sample draws of the outcome assuming uncensored (can take values below below_cens and above above_cens. Number of rows equals number of training observations. Number of columns equals n.iter .}
#' \item{ystarcens}{Matrix of censored training sample draws of the outcome assuming uncensored (can take values below below_cens and above above_cens. Number of rows equals number of censored training observations. Number of columns equals n.iter .}
#' \item{ystaruncens}{Matrix of uncensored training sample draws of the outcome assuming uncensored (can take values below below_cens and above above_cens. Number of rows equals number of uncensored training observations. Number of columns equals n.iter .}
#' \item{ystarcensbelow}{Matrix of censored from below training sample draws of the outcome assuming uncensored (can take values below below_cens and above above_cens. Number of rows equals number of training observations censored from below. Number of columns equals n.iter .}
#' \item{ystarcensabove}{Matrix of censored from above training sample draws of the outcome assuming uncensored (can take values below below_cens and above above_cens. Number of rows equals number of training observations censored from above. Number of columns equals n.iter .}
#' \item{test.mu}{Matrix of draws of the sum of terminal nodes, i.e. f(x_i), for all test observations. Number of rows equals number of test observations. Number of columns equals n.iter .}
#' \item{test.y_nocensoring}{Matrix of test sample draws of the outcome assuming uncensored. Can take values below below_cens and above above_cens. Number of rows equals number of test observations. Number of columns equals n.iter .}
#' \item{test.y_withcensoring}{Matrix of test sample draws of the outcome assuming censored. Cannot take values below below_cens and above above_cens. Number of rows equals number of test observations. Number of columns equals n.iter .}
#' \item{test.probcensbelow}{Matrix of draws of probabilities of test sample observations being censored from below. Number of rows equals number of test observations. Number of columns equals n.iter .}
#' \item{test.probcensabove}{Matrix of draws of probabilities of test sample observations being censored from above. Number of rows equals number of test observations. Number of columns equals n.iter .}
#' \item{sigma}{Vector of draws of the standard deviation of the error term. Number of elements equals n.iter .}
#' \item{alpha_s_y_store}{For Dirichlet prior on splitting probabilities in outcome equation, vector of alpha hyperparameter draws for each iteration.}
#' \item{var_count_y_store}{Matrix of counts of splits on each variable in outcome observation. The number of rows is the number of potential splitting variables. The number of columns is the number of post-burn-in iterations.}
#' \item{s_prob_y_store}{Splitting probabilities for the outcome equation. The number of rows is the number of potential splitting variables. The number of columns is the number of post-burn-in iterations. }
#' @examples
#'
#'#example taken from https://stats.idre.ucla.edu/r/dae/tobit-models/
#'
#'dat <- read.csv("https://stats.idre.ucla.edu/stat/data/tobit.csv")
#'
#'train_inds <- sample(1:200,190)
#'test_inds <- (1:200)[-train_inds]
#'
#'ytrain <- dat$apt[train_inds]
#'ytest <- dat$apt[test_inds]
#'
#'xtrain <- cbind(dat$read, dat$math)[train_inds,]
#'xtest <- cbind(dat$read, dat$math)[test_inds,]
#'
#'tobart_res <- tbart1(xtrain,xtest,ytrain,
#' below_cens = -Inf,
#' above_cens = 800,
#' n.iter = 400,
#' n.burnin = 100)
#'
#' @export
tbart1np <- function(x.train,
x.test,
y,
n.iter=1000,
n.burnin=100,
below_cens = 0,
above_cens = Inf,
n.trees = 50L,
n.burn = 0L,
n.samples = 1L,
n.thin = 1L,
n.chains = 1,
n.threads = 1L,#guessNumCores(),
printEvery = 100L,
printCutoffs = 0L,
rngKind = "default",
rngNormalKind = "default",
rngSeed = NA_integer_,
updateState = TRUE,
tree_power = 2,
tree_base = 0.95,
node.prior = dbarts:::normal,
resid.prior = dbarts:::chisq,
proposal.probs = c(birth_death = 0.5, swap = 0.1, change = 0.4, birth = 0.5),
sigmadbarts = NA_real_,
print.opt = 100,
lambda0 = NA,
sigest = NA,
nu0=10,
sigquant = 0.95,
alpha_prior = "vh",
c1 = 2,
c2 = 2,
alpha_gridsize = 100L,
mixstep=TRUE,
init.many.clust = FALSE,
k0_resids = TRUE,
nolinregforlambda = FALSE,
censsigprior = TRUE,
sparse = FALSE,
alpha_a_y = 0.5,
alpha_b_y = 1,
alpha_split_prior = TRUE
){
# if(is.vector(x.train) | is.factor(x.train)| is.data.frame(x.train)) x.train = as.matrix(x.train)
# if(is.vector(x.test) | is.factor(x.test)| is.data.frame(x.test)) x.test = as.matrix(x.test)
# if((!is.matrix(x.train))) stop("argument x.train must be a double matrix")
# if((!is.matrix(x.test)) ) stop("argument x.test must be a double matrix")
if(nrow(x.train) != length(y)) stop("number of rows in x.train must equal length of y.train")
if((ncol(x.test)!= ncol(x.train))) stop("input x.test must have the same number of columns as x.train")
#indexes of censored observations
cens_inds <- which( (y <= below_cens) | (y >= above_cens))
if(length(cens_inds)==0) stop("No censored observations")
uncens_inds <- which( (y > below_cens) & (y < above_cens))
censbelow_inds <- which(y <= below_cens )
censabove_inds <- which(y >= above_cens )
#create z vector
z <- rep(NA, length(y))
z[uncens_inds] <- y[uncens_inds]
#this line is perhaps unnecessary
z[which(y <= below_cens )] <- below_cens
z[which(y >= above_cens )] <- above_cens
n <- length(y)
n0 <- length(cens_inds)
n1 <- length(uncens_inds)
n_censbelow <- length(which(y <= below_cens))
n_censabove <- length(which(y >= above_cens))
ntest = nrow(x.test)
p_y <- ncol(x.train)
if(sparse){
s_y <- rep(1 / p_y, p_y) # probability vector to be used during the growing process for DART feature weighting
rho_y <- p_y # For DART
if(alpha_split_prior){
alpha_s_y <- p_y
}else{
alpha_s_y <- 1
}
alpha_scale_y <- p_y
}
draw = list(
Z.mat = array(NA, dim = c(n, n.iter)),
Z.matcens = array(NA, dim = c(n0, n.iter)),
#Z.matuncens = array(NA, dim = c(n1, n.iter)),
Z.matcensbelow = array(NA, dim = c(n_censbelow, n.iter)),
Z.matcensabove = array(NA, dim = c(n_censabove, n.iter)),
mu = array(NA, dim = c(n, n.iter)),#,
mucens = array(NA, dim = c(n0, n.iter)),#,
muuncens = array(NA, dim = c(n1, n.iter)),#,
mucensbelow = array(NA, dim = c(n_censbelow, n.iter)),#,
mucensabove = array(NA, dim = c(n_censabove, n.iter)),#,
ystar = array(NA, dim = c(n, n.iter)),#,
ystarcens = array(NA, dim = c(n0, n.iter)),#,
ystaruncens = array(NA, dim = c(n1, n.iter)),#,
ystarcensbelow = array(NA, dim = c(n_censbelow, n.iter)),#,
ystarcensabove = array(NA, dim = c(n_censabove, n.iter)),#,
test.mu = array(NA, dim = c(ntest, n.iter)),#,
test.y_nocensoring = array(NA, dim = c(ntest, n.iter)),#,
test.y_withcensoring = array(NA, dim = c(ntest, n.iter)),#,
test.probcensbelow = array(NA, dim = c(ntest, n.iter)),#,
test.probcensabove = array(NA, dim = c(ntest, n.iter)),
sigmavecs_train = array(NA, dim = c(n, n.iter)),
sigmavecs_test = array(NA, dim = c(ntest, n.iter)),
error_mu_train = array(NA, dim = c(n, n.iter)),
error_mu_test = array(NA, dim = c(ntest, n.iter)),
cond_exp_train = array(NA, dim = c(n, n.iter)),
cond_exp_test = array(NA, dim = c(ntest, n.iter)),
alpha = rep(NA,n.iter)
)
if(sparse){
var_count_y <- rep(0, p_y)
draw$alpha_s_y_store <- rep(NA, n.iter)
draw$var_count_y_store <- matrix(0, ncol = p_y, nrow = n.iter)
draw$s_prob_y_store <- matrix(0, ncol = p_y, nrow = n.iter)
}
control <- dbartsControl(updateState = updateState, verbose = FALSE, keepTrainingFits = TRUE,
keepTrees = TRUE,
n.trees = n.trees,
n.burn = n.burn,
n.samples = n.samples,
n.thin = n.thin,
n.chains = n.chains,
n.threads = n.threads,
printEvery = printEvery,
printCutoffs = printCutoffs,
rngKind = rngKind,
rngNormalKind = rngNormalKind,
rngSeed = rngSeed)
# print(colnames(Xmat.train))
# print("begin dbarts")
# alpha <- rgamma(n = 1, shape = c1, rate = c2) #draw from prior
alpha <- c1/c2 # initialize at prior mean
df0 <- data.frame(y)
estResult <- censReg(y ~ 1,left = below_cens, right = above_cens, data = df0)
sum_est <- summary( estResult )
# mu0 <- sum_est$estimate["(Intercept)", "Estimate"]
mu0 <- 0#sum_est$estimate["(Intercept)", "Estimate"]
# print("Line 222")
# intialize parameters
# nu parameter for sigma prior in base distribution G_0
# nu0 <- 10 # set to 10 as in full nonparametric BART paper
# nu0 <- 3 # standard ABRT default would be to set it to 3
# lambda set as in BART but with a 0.9 quantile instead of 0.95
if(!is.na(sigest)){
if(sigest == "naive"){
sigest <- sd(z)
censsigprior <- TRUE
}
}
# code below for setting lambda taken from BART package
if(censsigprior){
if(is.na(lambda0)) {
if(is.na(sigest)) {
if( (ncol(x.train) < n) & ! nolinregforlambda) {
# df = data.frame(t(x.train),y.train)
# lmf = lm(y.train~.,df)
# sigest = summary(lmf)$sigma
df0 <- data.frame(x.train,y)
# print("df0 = ")
# print(df0)
estResult <- censReg(y ~ .,left = below_cens, right = above_cens, data = df0)
sum_est <- summary( estResult )
# print("sum_est = ")
# print(sum_est)
if(is.null(coef(estResult))){
# estResult <- censReg(y ~ 1,left = below_cens, right = above_cens, data = df0)
# sum_est <- summary( estResult )
templm <- lm(y ~. , data = df0)
df0 <- data.frame(y = y,
df0[,names(which(!is.na(templm$coefficients[2:length(templm$coefficients)])))])
estResult <- censReg(y ~ .,left = below_cens, right = above_cens, data = df0)
sum_est <- summary( estResult )
}
sigest <- exp(sum_est$estimate["logSigma", "Estimate"])
} else {
df0 <- data.frame(y)
estResult <- censReg(y ~ 1,left = below_cens, right = above_cens, data = df0)
sum_est <- summary( estResult )
# print("sum_est = ")
# print(sum_est)
sigest <- exp(sum_est$estimate["logSigma", "Estimate"])
# sigest = sd(y.train)
}
}
qchi = qchisq(1.0-sigquant,nu0)
lambda0 = (sigest*sigest*qchi)/nu0 #lambda parameter for sigma prior
} else {
sigest=sqrt(lambda0)
}
}else{
# sigest <- sd(z)
if( (ncol(x.train) < n) & ! nolinregforlambda) {
df0 <- data.frame(x.train,y)
templm <- lm(y ~. , data = df0)
sum_est <- summary( templm )
sigest <- sum_est$sigma
}else{
sigest <- sd(z)
}
qchi = qchisq(1.0-sigquant,nu0)
lambda0 = (sigest*sigest*qchi)/nu0 #lambda parameter for sigma prior
}
k_s <- 10 # using default from fully nonparametric BART paper
#not obvious how to set this
#could set to maximum of linear model residuals
# emax <- max(abs(y -mu0 ))
emax <- max(abs(y - mean(y) ))
if(k0_resids == TRUE){
df0 <- data.frame(x.train,y)
lmf <- lm(y~.,df0)
emax <- max(abs(lmf$residuals))
}
k0 <- ( ( k_s*sqrt(lambda0))/emax )^2
# print("nu0/2 = ")
# print(nu0/2)
#
# print("nu0*lambda0/2 = ")
# print(nu0*lambda0/2)
sigma_init <- sigest#sqrt(1/rgamma(n = 1, shape = nu0/2, rate = nu0*lambda0/2))
mu_init <- 0#rnorm(1, mean = mu0, sd = sigma_init/sqrt(k0))
sigma1_vec_train <- rep(sigma_init, n)
sigma1_vec_test <- rep(sigma_init, ntest)
mu1_vec_train <- rep(mu_init,n)
mu1_vec_test <- rep(mu_init,ntest)
if(init.many.clust==TRUE){
# testinitinds <- sample(1:n, ntest, replace = FALSE)
# testinitinds <- sample.int(n, ntest, replace = FALSE)
testinitinds <- dqsample.int(n, ntest, replace = FALSE)
sigma1_vec_train <- sqrt(1/rgamma(n = n, shape = nu0/2, rate = nu0*lambda0/2))
sigma1_vec_test <- sigma1_vec_train[testinitinds]
mu1_vec_train <- rnorm(n = n, mean = mu0, sd = sigma_init/sqrt(k0))
mu1_vec_test <- mu1_vec_train[testinitinds]
}
# mu2_vec_train <- rep(0,n)
# mu2_vec_test <- rep(0,ntest)
# print("Line 301")
varthetamat <- cbind(mu1_vec_train, sigma1_vec_train)
varthetamat_test <- cbind(mu1_vec_test, sigma1_vec_test)
# varthetamat <- cbind(mu1_vec_train, sigma1_vec_train)
if(n_censbelow > 0){
z[which(y <= below_cens )] <- rtruncnorm(n_censbelow, a = -Inf, b = below_cens,
mean = mu1_vec_train[censbelow_inds],
sd = sigma1_vec_train[censbelow_inds])
}
if(n_censabove > 0){
z[which(y >= above_cens )] <- rtruncnorm(n_censabove, a = above_cens, b = Inf,
mean = mu1_vec_train[censabove_inds],
sd = sigma1_vec_train[censabove_inds])
}
weightstemp_y <- 1/(sigma1_vec_train^2)
# print("Line 324")
#
# print("length(z) = ")
# print(length(z) )
# print("length(weightstemp_y) = ")
# print(length(weightstemp_y) )
#
# print("weightstemp_y = ")
# print(weightstemp_y)
#
# print("nrow(x.train) = ")
# print(nrow(x.train))
#
# print("ncol(x.train) = ")
# print(ncol(x.train))
#
# print("ncol(x.test) = ")
# print(ncol(x.test))
#
# print("z = ")
# print(z)
dftrain <- data.frame(y00 = z - 0, x00 = x.train)
# print("dftrain = ")
# print(dftrain)
#
# print("lm(y00~., data = dftrain, weights = weightstemp_y)")
# print(lm(y00 ~., data = dftrain, weights = weightstemp_y))
if(nrow(x.test ) == 0){
sampler <- dbarts(y00 ~ . ,
data = dftrain,
#test = x.test,
weights = weightstemp_y,
control = control,
tree.prior = dbarts:::cgm(power = tree_power, base = tree_base, split.probs = rep(1 / p_y, p_y)),
node.prior = node.prior,
resid.prior = resid.prior,
proposal.probs = proposal.probs,
sigma = 1
)
}else{
dftest <- data.frame(y00 = NA, x00 = x.test)
# print("dftest = ")
# print(dftest)
#
# print("length(weightstemp_y) = ")
# print(length(weightstemp_y))
# weightstemp_testy <- 1/(sigma1_vec_test^2)
sampler <- dbarts(y00 ~ . ,
data = dftrain,
test = dftest,
# weights = weightstemp_y,
control = control,
tree.prior = dbarts:::cgm(power = tree_power, base = tree_base, split.probs = rep(1 / p_y, p_y)),
node.prior = node.prior,
resid.prior = resid.prior,
proposal.probs = proposal.probs,
sigma = 1
)
}
# print("Line 350")
sampler$setResponse(y = z - mu1_vec_train)
sampler$setSigma(sigma = 1)
sampler$setWeights(weights = weightstemp_y)
if(sparse){
tempmodel <- sampler$model
tempmodel@tree.prior@splitProbabilities <- s_y
sampler$setModel(newModel = tempmodel)
}
#sampler$setPredictor(x= Xmat.train$x, column = 1, forceUpdate = TRUE)
#mu = as.vector( alpha + X.mat %*% beta )
sampler$sampleTreesFromPrior()
samplestemp <- sampler$run()
#mutemp <- samplestemp$train[,1]
#suppose there are a number of samples
# print("sigma = ")
sigma <- samplestemp$sigma
mu <- samplestemp$train[,1]
mutest <- samplestemp$test[,1]
if(sparse){
tempcounts <- fcount(sampler$getTrees()$var)
tempcounts <- tempcounts[tempcounts$x != -1, ]
var_count_y <- rep(0, p_y)
var_count_y[tempcounts$x] <- tempcounts$N
}
# mu <- sampler$predict(dftrain)
#
#
# if(nrow(x.test ) == 0){
#
# mutest <- samplestemp$test[,1]
# }else{
# mutest <- sampler$predict(dftest)
# }
ystar <- rnorm(n,mean = mu + mu1_vec_train, sd = sigma1_vec_train)
# must draw test parameter values
# test_clusts <- sample(0:n, size = ntest, replace = TRUE, prob = c(alpha/(alpha+n), rep(1/(alpha+n) ,n) ))
test_clusts <- sample.int(n+1, size = ntest, replace = TRUE, prob = c(alpha/(alpha+n), rep(1/(alpha+n) ,n) ))-1
# test_clusts <- dqsample.int(n+1, size = ntest, replace = TRUE, prob = c(alpha/(alpha+n), rep(1/(alpha+n) ,n) ))-1
if(sum(test_clusts >0) == 0){
# skip
}else{
sigma1_vec_test[test_clusts > 0] <- sigma1_vec_train[test_clusts[test_clusts > 0] ]
mu1_vec_test[test_clusts > 0] <- mu1_vec_train[test_clusts[test_clusts > 0] ]
}
if(sum(test_clusts ==0) == 0){
# skip
}else{
# draw from prior
numzeros <- sum(test_clusts == 0)
sigma1_vec_test[test_clusts == 0] <- sqrt(1/rgamma(n = numzeros, shape = nu0/2, rate = nu0*lambda0/2) )
mu1_vec_test[test_clusts == 0] <- rnorm(n = numzeros, mean = mu0, sd = sigma1_vec_test[test_clusts == 0]/sqrt(k0))
}
# print("Line 398")
ystartest <- rnorm(ntest,
mean = mutest + mu1_vec_test,
sd = sigma1_vec_test)
# ystartestcens <- rtruncnorm(ntest,
# a = below_cens, b = above_cens,
# mean = mutest + mu1_vec_test,
# sd = sigma1_vec_test)
ystartestcens <- ystartest
ystartestcens[ystartest < below_cens] <- below_cens
ystartestcens[ystartest > above_cens] <- above_cens
probcensbelow_train <- pnorm(below_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train)
probcensabove_train <- 1 - pnorm(above_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train)
probcensbelow <- pnorm(below_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test)
probcensabove <- 1 - pnorm(above_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test)
if(below_cens == - Inf){
if(above_cens == Inf){
condexptrain <- (mu + mu1_vec_train)
condexptest <- (mutest + mu1_vec_test)
}else{ # above_cens !=Inf
condexptrain <-
(mu + mu1_vec_train)*(1- probcensabove_train ) +
sigma1_vec_train*( - fastnormdens(above_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) ) +
above_cens*probcensabove_train
condexptest <-
(mutest + mu1_vec_test)*(1- probcensabove ) +
sigma1_vec_test*( -fastnormdens(above_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) ) +
above_cens*probcensabove
}
}else{ # below_cens != - Inf
if(above_cens == Inf){
condexptrain <- below_cens*probcensbelow_train +
(mu + mu1_vec_train)*(1 - probcensbelow_train) +
sigma1_vec_train*( fastnormdens(below_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) )
condexptest <- below_cens*probcensbelow +
(mutest + mu1_vec_test)*(1 - probcensbelow) +
sigma1_vec_test*( fastnormdens(below_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) )
}else{ # above_cens !=Inf
condexptrain <- below_cens*probcensbelow_train +
(mu + mu1_vec_train)*(1- probcensabove_train - probcensbelow_train) +
sigma1_vec_train*( fastnormdens(below_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) -
fastnormdens(above_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) ) +
above_cens*probcensabove_train
condexptest <- below_cens*probcensbelow +
(mutest + mu1_vec_test)*(1- probcensabove - probcensbelow) +
sigma1_vec_test*( fastnormdens(below_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) -
fastnormdens(above_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) ) +
above_cens*probcensabove
}
}
#save the first round of values
if(n.burnin == 0){
draw$Z.mat[,1] = z
draw$Z.matcens[,1] = z[cens_inds]
# draw$Z.matuncens[,1] = z[uncens_inds]
draw$Z.matcensbelow[,1] = z[censbelow_inds]
draw$Z.matcensabove[,1] = z[censabove_inds]
draw$mu[,1] = mu
draw$mucens[,1] = mu[cens_inds]
draw$muuncens[,1] = mu[uncens_inds]
draw$mucensbelow[,1] = mu[censbelow_inds]
draw$mucensabove[,1] = mu[censabove_inds]
draw$ystar[,1] = ystar
draw$ystarcens[,1] = ystar[cens_inds]
draw$ystaruncens[,1] = ystar[uncens_inds]
draw$ystarcensbelow[,1] = ystar[censbelow_inds]
draw$ystarcensabove[,1] = ystar[censabove_inds]
draw$test.mu[,1] = mutest
draw$test.y_nocensoring[,1] = ystartest
draw$test.y_withcensoring[,1] = ystartestcens
draw$test.probcensbelow[,1] = probcensbelow
draw$test.probcensabove[,1] = probcensabove
# draw$sigma[1] = sigma
draw$sigmavecs_train[, 1] = sigma1_vec_train
draw$sigmavecs_test[, 1] = sigma1_vec_test
draw$error_mu_train[, 1] = mu1_vec_train
draw$error_mu_test[, 1] = mu1_vec_test
draw$cond_exp_train[, 1] = condexptrain
draw$cond_exp_test[, 1] = condexptest
draw$alpha[1] = 1
if(sparse){
draw$alpha_s_y_store[1] <- alpha_s_y
# draw$alpha_s_z_store[1] <- alpha_s_z
draw$var_count_y_store[1,] <- var_count_y
# draw$var_count_z_store[1,] <- var_count_z
draw$s_prob_y_store[1,] <- s_y
# draw$s_prob_z_store[1,] <- s_z
}
}
# print("Begin For loop")
###### Begin for loop ##################
#loop through the Gibbs sampler iterations
for(iter in 2:(n.iter+n.burnin)){
########### 2 draw the latent outcome####################
# z[cens_inds] <- rtruncnorm(n0, a= below_cens, b = above_cens, mean = mu[cens_inds], sd = sigma)
if(length(censbelow_inds) > 0){
z[censbelow_inds] <- rtruncnorm(n_censbelow, a= -Inf, b = below_cens,
mean = mu[censbelow_inds] + mu1_vec_train[censbelow_inds],
sd = sigma1_vec_train[censbelow_inds])
}
if(length(censabove_inds) > 0){
z[censabove_inds] <- rtruncnorm(n_censabove, a= above_cens, b = Inf,
mean = mu[censabove_inds] + mu1_vec_train[censabove_inds],
sd = sigma1_vec_train[censabove_inds])
}
######## 3 Draw trees ######################
# print("3 Draw trees")
#set the response.
#Check that 0 is a reasonable initial value
#perhaps makes more sense to use initial values of Z
# sampler$setResponse(y = z)
# weightstemp_y <- 1/sigma1_vec_train
weightstemp_y <- 1/(sigma1_vec_train^2)
sampler$setResponse(y = z- mu1_vec_train)
sampler$setSigma(sigma = 1)
sampler$setWeights(weights = weightstemp_y)
if(sparse){
tempmodel <- sampler$model
tempmodel@tree.prior@splitProbabilities <- s_y
sampler$setModel(newModel = tempmodel)
}
# sampler$setSigma(sigma = 1)
#sampler$setPredictor(x= Xmat.train$x, column = 1, forceUpdate = TRUE)
#mu = as.vector( alpha + X.mat %*% beta )
samplestemp <- sampler$run()
# sigma <- samplestemp$sigma
# mu <- samplestemp$train[,1]
# mutest <- samplestemp$test[,1]
mu <- samplestemp$train[,1]
mutest <- samplestemp$test[,1]
if(sparse){
tempcounts <- fcount(sampler$getTrees()$var)
tempcounts <- tempcounts[tempcounts$x != -1, ]
var_count_y <- rep(0, p_y)
var_count_y[tempcounts$x] <- tempcounts$N
}
# mu <- sampler$predict(dftrain)
#
#
# if(nrow(x.test ) == 0){
#
# mutest <- samplestemp$test[,1]
# }else{
# mutest <- sampler$predict(dftest)
# }
######### 4 draw components of mixture ####################
# print(" 4 draw components of mixture")
qi0vec <- alpha*dt_ls(z, df = nu0, mu = mu, sigma = sqrt(lambda0*(1 + (1/k0))) )
#loop over individuals for updates
for(i in 1:n){
# qi0 <- alpha*dt_ls(z[i], df = nu0, mu = mu[i], sigma = lambda0*(1 + (1/k0)) )
qi0 <- qi0vec[i]
# check if should drop just i^th row or all in same cluster as i.
varthetamattemp <- varthetamat[-i,, drop = FALSE]
# vartheta_unique_mat <- unique(varthetamattemp)
tempcol <- varthetamattemp[,1, drop = FALSE]
# THERE IS PROBABLY A MUCH FASTER WAY OF DOING THIS JUST BY UPDATING IN ITERATION
# BY ACCOUNTING FOR LAST INCLUDED NUMBER AND DROPPPED ROW i
tempord <- order(tempcol, method = "radix")
tempsort <- tempcol[tempord]
tempuniinds <- tempord[!duplicated(tempsort)]
# tempuniinds <- unique(match(tempsort,tempcol))
counts_ord <- rle2(tempsort)[,2]
# counts_ord <- rle(tempsort)$lengths
# tempsort <- sort(tempcol, na.last = TRUE)
# # tempuniinds <- order(tempcol)[!duplicated(tempsort)]
# tempuniinds <- unique(match(tempsort,tempcol))
# counts_ord <- rle(tempsort)$lengths
# tempuniinds <- order(tempcol)[!duplicated(sort(tempcol))]
# temprle <- rle(sort(tempcol))$lengths
# counts_ord <- fastcountuniq(tempcol)
# CHECK THESE LINES
# ux = sort(unique(tempcol)) #vartheta_unique_mat[,1, drop = FALSE] #sort(unique(tempcol))
# idx = match(tempcol, ux)
# tempuniinds <- unique(idx)
# counts_ord = tabulate(idx, nbins=length(ux))
# tempord <- order(ux[tempuniinds])
vartheta_unique_mat <- varthetamattemp[tempuniinds, , drop = FALSE]
#also try split(seq_along(vec), vec)
# tempord <- order(unique(varthetamattemp[,1 , drop = FALSE]))
# tempord <- order(vartheta_unique_mat[,1 , drop = FALSE])
# print("vartheta_unique_mat = ")
# print(vartheta_unique_mat)
# print("tempord = ")
# print(tempord)
# vartheta_unique_mat <- vartheta_unique_mat[tempord, , drop = FALSE]
# print("vartheta_unique_mat = ")
# print(vartheta_unique_mat)
# tempcol <- varthetamattemp[,1, drop = FALSE]
# counts_ord <- table(varthetamattemp[,1, drop = FALSE])
# ux = vartheta_unique_mat[,1, drop = FALSE] #sort(unique(tempcol))
# ux = sort(unique(tempcol))
# print("ux = ")
# print(ux)
# idx = match(tempcol, ux)
# print("idx = ")
# print(idx)
#
# print("length(ux)")
# print(length(ux))
# counts_ord = tabulate(idx, nbins=length(ux))
num_unique <- nrow(vartheta_unique_mat)
# print("counts_ord = ")
# print(counts_ord)
# print("num_unique = ")
# print(num_unique)
if( sum(counts_ord) != n-1 ){
print("tempcol = ")
print(tempcol)
print("fastcountuniq(1:5) = ")
print(fastcountuniq(1:5))
print("fastcountuniq(tempcol) = ")
print(fastcountuniq(tempcol))
print("counts_ord = ")
print(counts_ord)
print("num_unique = ")
print(num_unique)
print("n-1 = ")
print(n-1)
print("sum(counts_ord) = ")
print(sum(counts_ord))
stop("Bug in count of unique values")
}
if( length(counts_ord) != num_unique ){
print("counts_ord = ")
print(counts_ord)
print("num_unique = ")
print(num_unique)
stop("Bug in count of unique values")
}
# if(any(is.na(counts_ord))){
#
# print("counts_ord = ")
# print(counts_ord)
# print("tempuniinds = ")
# print(tempuniinds)
# stop("counts_ord NA")
# }
q_rs <- rep(NA, nrow(vartheta_unique_mat))
# CHECK THIS AND SPEED IT UP
q_rs <- counts_ord*fastnormdens(z[i],
mean = mu[i] + vartheta_unique_mat[,1],
sd = vartheta_unique_mat[,2])
# for(j in 1:num_unique){
# q_rs[j] <- counts_ord[j]*fastnormdens(z[i],
# mean = vartheta_unique_mat[j,1],
# sd = vartheta_unique_mat[j,2])
# }
# if(is.na(qi0)){
#
# print("qi0 = ")
# print(qi0)
# print("q_rs = ")
# print(q_rs)
# print("counts_ord = ")
# print(counts_ord)
# print("tempuniinds = ")
# print(tempuniinds)
# print("vartheta_unique_mat = ")
# print(vartheta_unique_mat)
# stop("qi0 NA before normalize")
# }
tempdemon <- qi0 + sum(q_rs)
qi0 <- qi0/tempdemon
q_rs <- q_rs/tempdemon
# if(is.na(qi0)){
#
# print("qi0 = ")
# print(qi0)
# print("q_rs = ")
# print(q_rs)
# print("counts_ord = ")
# print(counts_ord)
# print("tempuniinds = ")
# print(tempuniinds)
# print("vartheta_unique_mat = ")
# print(vartheta_unique_mat)
# stop("qi0 NA")
# }
# if(any(is.na(q_rs))){
#
# print("qi0 = ")
# print(qi0)
# print("q_rs = ")
# print(q_rs)
# stop("q_rs NA")
# }
# SPEED UP THIS STEP
# rprime <- sample(0:num_unique, size = 1, replace = TRUE, prob = c(qi0, q_rs))
# rprime <- sample.int(num_unique+1, size = 1, replace = TRUE, prob = c(qi0, q_rs))-1
rprime <- sample_int_expj(num_unique+1, size = 1, prob = c(qi0, q_rs))-1
# rprime <- dqsample.int(num_unique+1, size = 1, replace = TRUE, prob = c(qi0, q_rs))-1
if(rprime>0){
# if none equal to current varthetamat[i,] and there are more than one equal to i+1^th row
# then can keep same vartheta_unique_mat in next iteration and it is straightforward to update the counts.
# not implemented yet
varthetamat[i,] <- vartheta_unique_mat[rprime,]
mu1_vec_train[i] <- varthetamat[i,1]
sigma1_vec_train[i] <- varthetamat[i,2]
}else{
# speed up these samples
varthetamat[i,2] <- sqrt(1/rgamma(n = 1,
shape = (nu0+1)/2,
rate = (nu0*lambda0/2) + (z[i] - mu[i] )^2/( 2*(1 + 1/k0)) ) )
# speed this up
varthetamat[i,1] <- rnorm(n=1,
mean = (z[i] - mu[i] )/(k0+1) ,
sd = varthetamat[i,2]/sqrt(k0+1) )
mu1_vec_train[i] <- varthetamat[i,1]
sigma1_vec_train[i] <- varthetamat[i,2]
} # end else statement corresponding to rprime == 0
} # end loop over i that updates components
######### 5 mixing step ##########################
# print("5 mixing step")
if(mixstep == TRUE){
vartheta_unique_mat <- unique(varthetamat)
for(j in 1:nrow(vartheta_unique_mat)){
#speed up sampling if possible
clust_inds <- which(varthetamat[,1, drop = FALSE] == vartheta_unique_mat[j,1])
n_j <- length(clust_inds)
clust_mean <- mean(z[clust_inds] - mu[clust_inds])
varthetamat[clust_inds,2] <- sqrt(1/rgamma(n = 1,#n_j,
shape = (nu0+n_j)/2,
rate = (nu0*lambda0/2) +
sum((z[clust_inds] - mu[clust_inds] - clust_mean )^2)/2 +
(k0*n_j/( k0 + n_j) )*( clust_mean^2 / 2) ) )
varthetamat[clust_inds,1] <- rnorm(n=1,#n_j,
mean = (n_j * clust_mean )/(k0+n_j) ,
sd = varthetamat[clust_inds,2]/sqrt(k0+n_j) )
mu1_vec_train[clust_inds] <- varthetamat[clust_inds,1]
sigma1_vec_train[clust_inds] <- varthetamat[clust_inds,2]
}
}
######### 6 sample alpha ######################
# print("6 sample alpha")
# vartheta_unique_mat <- unique(varthetamat)
# This step can be implemented using the prior of van Hasselt (2011)
# or the prior of George et al. (2019), McCulloch et al. (2021) ans Conley et al. (2008)
# count number of unique mixture components
# there is probably a more efficient way of doing this
# create a matrix in which each row contains all of an individual's mixture component parameters
# varthetamat <- cbind(mu1_vec_train, mu2_vec_train, phi1_vec_train, gamma1_vec_train)
# varthetamat_test <- cbind(mu1_vec_test, mu2_vec_test, phi1_vec_test, gamma1_vec_test)
#obtain the unique rows (components)
vartheta_unique_mat <- unique(varthetamat)
#number of unique components
k_uniq <- nrow(vartheta_unique_mat)
if(alpha_prior == "vh"){
######### VH Step 6 a: Sample auxiliary variable kappa ######################################################
kappa_aux <- rbeta(n = 1, shape1 = alpha+1, shape2 = n)
######### VH Step 6 b: Sample alpha from a mixture of gamma distributions ######################################################
#obtain the mixing probability
p_kappa <- (c1+k_uniq-1)/(n*(c2-log(kappa_aux))+c1+k_uniq-1)
#sample a mixture component
mix_draw <- rbinom(n = 1,size = 1,prob = p_kappa)
# draw alpha from the drawn component
if(mix_draw==1){
alpha <- rgamma(1,shape = c1 + k_uniq, rate = c2 - log(kappa_aux))
}else{
alpha <- rgamma(1,shape = c1 + k_uniq - 1, rate = c2 - log(kappa_aux))
}
}else{
if(alpha_prior == "george"){
#Calculate alpha_min and alpha_max
Imin <- 1
Imax <- floor(0.1*n)
#consider IVBART prior
# Imin <- 2
# Imax <- floor(0.1*n)+1
if(floor(0.1*n) <= 1){
stop("Not enough observations for Prior of George et al. (2019)")
}
psiprior <- 0.5
# from ivbart package code
# https://github.com/rsparapa/bnptools/blob/master/ivbart/R/amode.R
egamm <- 0.5772156649
tempmin <- digamma(Imin) - log(egamm+log(n))
tempmax <- digamma(Imax) - log(egamm+log(n))
alpha_min <- exp(tempmin)
alpha_max <- exp(tempmax)
alpha_values <- seq(from = alpha_min,
to = alpha_max,
length.out = alpha_gridsize)
temp_aprior <- 1 - (alpha_values-alpha_min)/(alpha_max-alpha_min)
temp_aprior <- temp_aprior^psiprior
# temp_aprior = temp_aprior/sum(temp_aprior)
log_tempvals <- k_uniq*log(alpha_values) + lgamma(alpha_values) - lgamma(n+alpha_values)
# print("log_tempvals = ")
# print(log_tempvals)
# print("temp_aprior = ")
# print(temp_aprior)
# temp_kgivenalpha <- exp(log_tempvals)
#
# # temp_kgivenalpha <- ((alpha_values)^(k_uniq))*gamma(alpha_values)/gamma(n+alpha_values)
# temp_alpha_postprobs <- temp_kgivenalpha*temp_aprior
logtemp_alpha_postprobs <- log_tempvals + log(temp_aprior)
maxll <- max(logtemp_alpha_postprobs)
temp_alpha_postprobs <- exp(logtemp_alpha_postprobs- maxll)
# print("temp_kgivenalpha = ")
# print(temp_kgivenalpha)
# print("gamma(alpha_values) = ")
# print(gamma(alpha_values))
#
#
# print("gamma(n+alpha_values) = ")
# print(gamma(n+alpha_values))
#
#
# print("alpha_values = ")
# print(alpha_values)
#
# print("temp_kgivenalpha = ")
# print(temp_kgivenalpha)
# print("temp_aprior = ")
# print(temp_aprior)
#
# print("temp_alpha_postprobs = ")
# print(temp_alpha_postprobs)
post_probs_alphs <- temp_alpha_postprobs/sum(temp_alpha_postprobs)
# print("post_probs_alphs = ")
# print(post_probs_alphs)
#sample from 1 to alpha_gridsize
# index_alpha <- sample.int(n = alpha_gridsize, size = 1, prob = post_probs_alphs, replace = TRUE)
index_alpha <- sample_int_expj(n = alpha_gridsize, size = 1, prob = post_probs_alphs)
alpha <- alpha_values[index_alpha]
}else{
stop("Alpha prior must be vh or george")
}
} # end of George et al. alpha draw code
########### splitting probability draws #############################
if (sparse & (iter > floor(n.burnin * 0.5))) {
# s_update_z <- update_s(var_count_z, p_z, alpha_s_z)
# s_z <- s_update_z[[1]]
s_update_y <- update_s(var_count_y, p_y, alpha_s_y)
s_y <- s_update_y[[1]]
if(alpha_split_prior){
# alpha_s_z <- update_alpha(s_z, alpha_scale_z, alpha_a_z, alpha_b_z, p_z, s_update_z[[2]])
alpha_s_y <- update_alpha(s_y, alpha_scale_y, alpha_a_y, alpha_b_y, p_y, s_update_y[[2]])
}
}
########### draw y value predictions ##############################
# not sure where to make these draws (relative to other steps)
# print("draw y value predictions ")
# must draw test parameter values
# test_clusts <- sample(0:n, size = ntest, replace = TRUE, prob = c(alpha/(alpha+n), rep(1/(alpha+n) ,n) ))
test_clusts <- sample.int(n+1, size = ntest, replace = TRUE, prob = c(alpha/(alpha+n), rep(1/(alpha+n) ,n) ))-1
# test_clusts <- dqsample.int(n+1, size = ntest, replace = TRUE, prob = c(alpha/(alpha+n), rep(1/(alpha+n) ,n) ))-1
if(sum(test_clusts >0) == 0){
# skip
}else{
sigma1_vec_test[test_clusts > 0] <- sigma1_vec_train[test_clusts[test_clusts > 0] ]
mu1_vec_test[test_clusts > 0] <- mu1_vec_train[test_clusts[test_clusts > 0] ]
}
if(sum(test_clusts == 0) == 0){
# skip
}else{
# draw from prior
numzeros <- sum(test_clusts == 0)
sigma1_vec_test[test_clusts == 0] <- sqrt(1/rgamma(n = numzeros, shape = nu0/2, rate = nu0*lambda0/2) )
mu1_vec_test[test_clusts == 0] <- rnorm(n = numzeros, mean = mu0, sd = sigma1_vec_test[test_clusts == 0]/sqrt(k0))
}
#draw uncensored predictions of y
ystar <- rnorm(n,mean = mu+ mu1_vec_train , sd = sigma1_vec_train)
ystartest <- rnorm(ntest,mean = mutest + mu1_vec_test, sd = sigma1_vec_test)
# ystartestcens <- rtruncnorm(ntest, a= below_cens, b= above_cens, mean = mutest, sd = sigma)
ystartestcens <- ystartest
ystartestcens[ystartest < below_cens] <- below_cens
ystartestcens[ystartest > above_cens] <- above_cens
probcensbelow_train <- pnorm(below_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train)
probcensabove_train <- 1 - pnorm(above_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train)
probcensbelow <- pnorm(below_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test)
probcensabove <- 1 - pnorm(above_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test)
if(below_cens == - Inf){
if(above_cens == Inf){
condexptrain <- (mu + mu1_vec_train)
condexptest <- (mutest + mu1_vec_test)
}else{ # above_cens !=Inf
condexptrain <-
(mu + mu1_vec_train)*(1 - probcensabove_train ) +
sigma1_vec_train*( - fastnormdens(above_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) ) +
above_cens*probcensabove_train
condexptest <-
(mutest + mu1_vec_test)*(1 - probcensabove ) +
sigma1_vec_test*( - fastnormdens(above_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) ) +
above_cens*probcensabove
}
}else{ # below_cens != - Inf
if(above_cens == Inf){
condexptrain <- below_cens*probcensbelow_train +
(mu + mu1_vec_train)*(1 - probcensbelow_train) +
sigma1_vec_train*( fastnormdens(below_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) )
condexptest <- below_cens*probcensbelow +
(mutest + mu1_vec_test)*(1 - probcensbelow) +
sigma1_vec_test*( fastnormdens(below_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) )
}else{ # above_cens !=Inf
condexptrain <- below_cens*probcensbelow_train +
(mu + mu1_vec_train)*(1 - probcensabove_train - probcensbelow_train) +
sigma1_vec_train*( fastnormdens(below_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) -
fastnormdens(above_cens, mean = mu + mu1_vec_train, sd = sigma1_vec_train) ) +
above_cens*probcensabove_train
condexptest <- below_cens*probcensbelow +
(mutest + mu1_vec_test)*(1 - probcensabove - probcensbelow) +
sigma1_vec_test*( fastnormdens(below_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) -
fastnormdens(above_cens, mean = mutest + mu1_vec_test, sd = sigma1_vec_test) ) +
above_cens*probcensabove
}
}
if(iter > n.burnin){
iter_min_burnin <- iter-n.burnin
draw$Z.mat[,iter_min_burnin] = z
draw$Z.matcens[,iter_min_burnin] = z[cens_inds]
# draw$Z.matuncens[,iter_min_burnin] = z[uncens_inds]
draw$Z.matcensbelow[,iter_min_burnin] = z[censbelow_inds]
draw$Z.matcensabove[,iter_min_burnin] = z[censabove_inds]
draw$mu[,iter_min_burnin] = mu + mu1_vec_train
draw$mucens[,iter_min_burnin] = mu[cens_inds] + mu1_vec_train[cens_inds]
draw$muuncens[,iter_min_burnin] = mu[uncens_inds] + mu1_vec_train[uncens_inds]
draw$mucensbelow[,iter_min_burnin] = mu[censbelow_inds] + mu1_vec_train[censbelow_inds]
draw$mucensabove[,iter_min_burnin] = mu[censabove_inds] + mu1_vec_train[censabove_inds]
draw$ystar[,iter_min_burnin] = ystar
draw$ystarcens[,iter_min_burnin] = ystar[cens_inds]
draw$ystaruncens[,iter_min_burnin] = ystar[uncens_inds]
draw$ystarcensbelow[,iter_min_burnin] = ystar[censbelow_inds]
draw$ystarcensabove[,iter_min_burnin] = ystar[censabove_inds]
draw$test.mu[,iter_min_burnin] = mutest + mu1_vec_test
draw$test.y_nocensoring[,iter_min_burnin] = ystartest
draw$test.y_withcensoring[,iter_min_burnin] = ystartestcens
draw$test.probcensbelow[,iter_min_burnin] = probcensbelow
draw$test.probcensabove[,iter_min_burnin] = probcensabove
draw$sigmavecs_train[, iter_min_burnin] = sigma1_vec_train
draw$sigmavecs_test[, iter_min_burnin] = sigma1_vec_test
draw$error_mu_train[, iter_min_burnin] = mu1_vec_train
draw$error_mu_test[, iter_min_burnin] = mu1_vec_test
draw$cond_exp_train[, iter_min_burnin] = condexptrain
draw$cond_exp_test[, iter_min_burnin] = condexptest
draw$alpha[iter_min_burnin] <- alpha
if(sparse){
draw$alpha_s_y_store[iter_min_burnin] <- alpha_s_y
# draw$alpha_s_z_store[iter_min_burnin] <- alpha_s_z
draw$var_count_y_store[iter_min_burnin,] <- var_count_y
# draw$var_count_z_store[iter_min_burnin,] <- var_count_z
draw$s_prob_y_store[iter_min_burnin,] <- s_y
# draw$s_prob_z_store[iter_min_burnin,] <- s_z
}
}
if(iter %% print.opt == 0){
print(paste("Gibbs Iteration", iter))
# print(c(sigma2.alpha, sigma2.beta))
}
}#end iterations of Gibbs sampler
return(draw)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.