Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = (4/6)*7
)
## ----setup--------------------------------------------------------------------
library(QAEnsemble)
## ----eval=TRUE----------------------------------------------------------------
library(svMisc)
library(coda)
## ----eval=TRUE----------------------------------------------------------------
set.seed(10)
## ----eval=TRUE----------------------------------------------------------------
a_shape = 8
sigma_scale = 1100
time_use = 12*c(1:110)
plot(time_use, exp(-1*((time_use/sigma_scale)^a_shape)), xlab="Months (t)", ylab="Survival probability (S)", type = "l", col = "red")
## ----eval=TRUE----------------------------------------------------------------
#Input for the 'create_life_table_info' function:
#interval_time_input: the start and end times for each time interval (Note: it
#is assumed in the 'create_life_table_info' function that the first start time
#is at month 0)
#num_ran_samples_input: the number of random samples from the Weibull
#distribution
#data_weibull_input: the Weibull distribution mortality time samples
#Output for the 'create_life_table_info' is a list containing two data frames.
#The first data frame contains the life table information: time intervals, the
#number of people at risk in the interval, number of deaths that occur in the
#interval, risk of death in the interval, probability of surviving in the
#interval, and the survival probabilities. The second data frame contains the
#interval-censored information in a format easy to use in the likelihood
#function: interval-censored mortality time observation number, interval left
#bound, and interval right bound.
create_life_table_info <- function(interval_time_input, num_ran_samples_input, data_weibull_input)
{
num_at_risk_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
num_deaths_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
risk_death_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
prob_surv_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
s_values_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
obs_num_output = matrix(1:num_ran_samples_input, nrow = 1, ncol = num_ran_samples_input)
left_endpoint_output = matrix(NA, nrow = 1, ncol = num_ran_samples_input)
right_endpoint_output = matrix(NA, nrow = 1, ncol = num_ran_samples_input)
for(t in 1:length(interval_time_input))
{
if(t == 1)
{
num_at_risk_during_interval_output[t] = num_ran_samples_input
num_deaths_during_interval_output[t] = length(data_weibull_input[0 <= data_weibull_input & data_weibull_input <= interval_time_input[1]])
risk_death_during_interval_output[t] = num_deaths_during_interval_output[t]/num_at_risk_during_interval_output[t]
prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t]
s_values_output[t] = 1*prob_surv_during_interval_output[t]
}
if(t != 1)
{
if(num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1] > 0)
{
num_at_risk_during_interval_output[t] = num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1]
num_deaths_during_interval_output[t] = length(data_weibull_input[interval_time_input[t-1] < data_weibull_input & data_weibull_input <= interval_time_input[t]])
risk_death_during_interval_output[t] = num_deaths_during_interval_output[t]/num_at_risk_during_interval_output[t]
prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t]
s_values_output[t] = s_values_output[t-1]*prob_surv_during_interval_output[t]
}
if(num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1] <= 0)
{
num_at_risk_during_interval_output[t] = 0
num_deaths_during_interval_output[t] = 0
risk_death_during_interval_output[t] = 1
prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t]
s_values_output[t] = s_values_output[t-1]*prob_surv_during_interval_output[t]
}
}
}
string_time_interval_names = matrix(NA, nrow = 1, ncol = length(interval_time_input))
i = 1
for(t in 1:length(interval_time_input))
{
if(t == 1)
{
string_time_interval_names[1] = paste(0, "-", 12, " mo", " (", 0, "-", 1, " yr)", sep = "")
if(num_deaths_during_interval_output[t] > 0)
{
for(j in 1:num_deaths_during_interval_output[t])
{
left_endpoint_output[i] = 0
right_endpoint_output[i] = 12
i = i + 1
}
}
}
if(t != 1)
{
string_time_interval_names[t] = paste(interval_time_input[t-1], "-", interval_time_input[t], " mo", " (", interval_time_input[t-1]/12, "-", interval_time_input[t]/12, " yr)", sep = "")
if(num_deaths_during_interval_output[t] > 0)
{
for(j in 1:num_deaths_during_interval_output[t])
{
left_endpoint_output[i] = interval_time_input[t-1]
right_endpoint_output[i] = interval_time_input[t]
i = i + 1
}
}
}
}
life_table_df = data.frame(t(string_time_interval_names), t(num_at_risk_during_interval_output), t(num_deaths_during_interval_output), t(risk_death_during_interval_output), t(prob_surv_during_interval_output), t(s_values_output))
names(life_table_df)[1] = "Time interval"
names(life_table_df)[2] = "Number of people at risk in the interval"
names(life_table_df)[3] = "Number of deaths that occur in the interval"
names(life_table_df)[4] = "Risk of death in the interval"
names(life_table_df)[5] = "Probability of surviving in the interval"
names(life_table_df)[6] = "Survival probability"
interval_censored_df = data.frame(t(obs_num_output), t(left_endpoint_output), t(right_endpoint_output))
names(interval_censored_df)[1] = "Interval-censored mortality time observation number"
names(interval_censored_df)[2] = "Interval left bound"
names(interval_censored_df)[3] = "Interval right bound"
return(list("life_table_df" = life_table_df, "interval_censored_df" = interval_censored_df))
}
## ----eval=TRUE----------------------------------------------------------------
num_ran_samples = 50
data_weibull = rweibull(num_ran_samples, shape = a_shape, scale = sigma_scale)
info_use = create_life_table_info(time_use, num_ran_samples, data_weibull)
knitr::kable(info_use$life_table_df, caption = "Life table data", format = "pipe", padding = 0)
## ----eval=TRUE----------------------------------------------------------------
knitr::kable(info_use$interval_censored_df, caption = "Interval-censored data", format = "pipe", padding = 0)
## ----eval=TRUE----------------------------------------------------------------
#Plot the survival probability from the life table data
plot(time_use, info_use$life_table_df[[6]], xlab="Months (t)", ylab="Survival probability (S)", type = "s")
## ----eval=TRUE----------------------------------------------------------------
logp <- function(param)
{
a_shape_use = param[1]
sigma_scale_use = param[2]
logp_val = dunif(a_shape_use, min = 1e-2, max = 1e2, log = TRUE) + dunif(sigma_scale_use, min = 1, max = 1e4, log = TRUE)
return(logp_val)
}
logl <- function(param)
{
a_shape_use = param[1]
sigma_scale_use = param[2]
logl_val = 0
interval_l_bound = info_use$interval_censored_df[[2]]
interval_r_bound = info_use$interval_censored_df[[3]]
for(i in 1:num_ran_samples)
{
logl_val = logl_val + log(exp(-1*((interval_l_bound[i]/sigma_scale_use)^a_shape_use)) - exp(-1*((interval_r_bound[i]/sigma_scale_use)^a_shape_use)))
}
if(is.na(logl_val) || is.infinite(logl_val))
{
logl_val = -Inf
}
return(logl_val)
}
logfuns = list(logp = logp, logl = logl)
## ----eval=TRUE----------------------------------------------------------------
num_par = 2
num_chains = 2*num_par
theta0 = matrix(0, nrow = num_par, ncol = num_chains)
temp_val = 0
j = 0
while(j < num_chains)
{
initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4))
temp_val = logl(initial) + logp(initial)
while(is.na(temp_val) || is.infinite(temp_val))
{
initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4))
temp_val = logl(initial) + logp(initial)
}
j = j + 1
message(paste('j:', j))
theta0[1,j] = initial[1]
theta0[2,j] = initial[2]
}
## ----eval=TRUE----------------------------------------------------------------
num_chain_iterations = 1e5
thin_val_par = 10
floor(num_chain_iterations/thin_val_par)*num_chains
## ----eval=TRUE----------------------------------------------------------------
Weibull_Surv_Quad_result = ensemblealg(theta0, logfuns, T_iter = num_chain_iterations, Thin_val = thin_val_par, ShowProgress = TRUE, ReturnCODA = TRUE)
## ----eval=TRUE----------------------------------------------------------------
my_samples = Weibull_Surv_Quad_result$theta_sample
my_log_prior = Weibull_Surv_Quad_result$log_prior_sample
my_log_like = Weibull_Surv_Quad_result$log_like_sample
my_mcmc_list_coda = Weibull_Surv_Quad_result$mcmc_list_coda
## ----eval=TRUE----------------------------------------------------------------
varnames(my_mcmc_list_coda) = c("a_shape", "sigma_scale")
plot(my_mcmc_list_coda)
## ----eval=TRUE----------------------------------------------------------------
my_samples_burn_in = my_samples[,,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]
my_log_prior_burn_in = my_log_prior[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]
my_log_like_burn_in = my_log_like[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]
## ----eval=TRUE----------------------------------------------------------------
diagnostic_result = psrfdiagnostic(my_samples_burn_in, 0.05)
diagnostic_result$p_s_r_f_vec
## ----eval=TRUE----------------------------------------------------------------
log_un_post_vec = as.vector(my_log_prior_burn_in + my_log_like_burn_in)
k1 = as.vector(my_samples_burn_in[1,,])
k2 = as.vector(my_samples_burn_in[2,,])
## ----eval=TRUE----------------------------------------------------------------
#a_shape posterior median and 95% credible interval
median(k1)
hpdparameter(k1, 0.05)
#(The 95% CI is narrower than the prior distribution and it contains the true
#value of a_shape, 8)
#sigma_scale posterior median and 95% credible interval
median(k2)
hpdparameter(k2, 0.05)
#(The 95% CI is narrower than the prior distribution and it contains the true
#value of sigma_scale, 1100)
#a_shape, sigma_scale, and stand_dev maximum posterior
k1[which.max(log_un_post_vec)]
k2[which.max(log_un_post_vec)]
## ----eval=TRUE----------------------------------------------------------------
plot(k1, exp(log_un_post_vec), xlab="a_shape", ylab="unnormalized posterior density")
## ----eval=TRUE----------------------------------------------------------------
plot(k2, exp(log_un_post_vec), xlab="sigma_scale", ylab="unnormalized posterior density")
## ----eval=TRUE----------------------------------------------------------------
num_samples = 10000
w_predict = matrix(NA, nrow = num_samples, ncol = num_ran_samples)
s_predict = matrix(NA, nrow = num_samples, ncol = length(time_use))
discrepancy_s = matrix(NA, nrow = num_samples, ncol = 1)
discrepancy_s_pred = matrix(NA, nrow = num_samples, ncol = 1)
ind_pred_exceeds_s = matrix(NA, nrow = num_samples, ncol = 1)
for (i in (length(log_un_post_vec) - num_samples + 1):length(log_un_post_vec))
{
l = i - (length(log_un_post_vec) - num_samples)
w_predict[l,] = rweibull(num_ran_samples, shape = k1[i], scale = k2[i])
data_weibull_temp = w_predict[l,]
info_use_predict = create_life_table_info(time_use, num_ran_samples, data_weibull_temp)
s_predict[l,] = t(info_use_predict$life_table_df[[6]])
s_data = t(info_use$life_table_df[[6]])
my_s_mean = exp(-1*((time_use/k2[i])^k1[i]))
discrepancy_s[l,1] = sum(((s_data - my_s_mean)^2))
discrepancy_s_pred[l,1] = sum(((s_predict[l,] - my_s_mean)^2))
if(discrepancy_s_pred[l,1] > discrepancy_s[l,1])
{
ind_pred_exceeds_s[l,1] = 1
}
else
{
ind_pred_exceeds_s[l,1] = 0
}
svMisc::progress(l,num_samples,progress.bar = TRUE,init = (l == 1))
}
## ----eval=TRUE----------------------------------------------------------------
Bayes_p_value = sum(ind_pred_exceeds_s)/length(ind_pred_exceeds_s)
Bayes_p_value
## ----eval=TRUE----------------------------------------------------------------
s_predict_lower = matrix(NA, nrow = 1, ncol = length(time_use))
s_predict_upper = matrix(NA, nrow = 1, ncol = length(time_use))
for(j in 1:length(time_use))
{
s_predict_lower[j] = quantile(s_predict[,j], probs = 0.025)
s_predict_upper[j] = quantile(s_predict[,j], probs = 0.975)
}
## ----eval=TRUE----------------------------------------------------------------
plot(time_use, info_use$life_table_df[[6]], main="Weibull survival curve (a = 8, sigma = 1100) Example", cex.main = 1, xlab="Months (t)", ylab="Survival probability (S)", type = "s", ylim = c(0, 1))
lines(time_use, exp(-1*((time_use/sigma_scale)^a_shape)), type = "l", lty = 1, col = "red")
lines(time_use, colMeans(s_predict), type = "l", lty = 1, col = "blue")
lines(time_use, s_predict_lower, type = "l", lty = 2, col = "blue")
lines(time_use, s_predict_upper, type = "l", lty = 2, col = "blue")
legend("bottomleft", legend = c("Data", "True model", "Posterior predictive mean", "95% prediction interval"), lty=c(1,1,1,2), col = c("black", "red", "blue", "blue"), pch=c(NA,NA,NA,NA))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.