Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(SBMTrees)
library(mitml)
library(lme4)
## ----prediction_sim-----------------------------------------------------------
# Simulate data
data <- simulation_prediction_conti(
train_prop = 0.5,
n_subject = 20,
n_obs_per_sub = 5,
nonlinear = TRUE,
residual = "normal",
randeff = "skewed_MVN",
seed = 123)
## ----prediction---------------------------------------------------------------
# Fit the predictive model
model <- BMTrees_prediction(
X_train = data$X_train,
Y_train = data$Y_train,
Z_train = data$Z_train,
subject_id_train = data$subject_id_train,
X_test = data$X_test,
Z_test = data$Z_test,
subject_id_test = data$subject_id_test,
model = "BMTrees",
binary = FALSE,
nburn = 1L, npost = 1L, skip = 1L, verbose = FALSE, seed = 1234
)
# Posterior expectation for the testing dataset
posterior_predictions <- model$post_predictive_y_test
head(colMeans(posterior_predictions))
## ----prediction_evaluation----------------------------------------------------
point_predictions = colMeans(posterior_predictions)
# Compute MAE
mae <- mean(abs(point_predictions - data$Y_test))
cat("Mean Absolute Error (MAE):", mae, "\n")
# Compute MSE
mse <- mean((point_predictions - data$Y_test)^2)
cat("Mean Squared Error (MSE):", mse, "\n")
# Compute 95% credible intervals
lower_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.025)
upper_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.975)
# Check if true values fall within the intervals
coverage <- mean(data$Y_test >= lower_bounds & data$Y_test <= upper_bounds)
cat("95% Posterior Predictive Interval Coverage:", coverage * 100, "%\n")
plot(data$Y_test, point_predictions,
xlab = "True Values",
ylab = "Predicted Values",
main = "True vs Predicted Values")
abline(0, 1, col = "red") # Add a 45-degree reference line
## ----imputation_sim-----------------------------------------------------------
# Simulate data with missing values
data <- simulation_imputation(NNY = TRUE, NNX = TRUE,
n_subject = 20, seed = 123)
## ----imputation---------------------------------------------------------------
imputed_model <- sequential_imputation(X = data$data_M[,3:14], Y = data$data_M$Y, Z = data$Z,
subject_id = data$data_M$subject_id, type = c(0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1),
outcome_model = "BMLM", binary_outcome = FALSE, model = "BMTrees", nburn = 0,
npost = 2, skip = 1, verbose = FALSE, seed = 123)
# Extract imputed data
imputed_data <- imputed_model$imputed_data
dim(imputed_data) # Dimensions: posterior samples x observations x variables
## ----imputation_evaluation----------------------------------------------------
# create structure which can be used in mitml
MI_data = list()
for (i in 1:dim(imputed_data)[1]) {
MI_data[[i]] = cbind(as.data.frame(imputed_data[i,,]), data$Z, data$data_M$subject_id)
colnames(MI_data[[i]]) = c(colnames(data$data_M[,3:14]), "Y", "Z1", "Z2", "subject_id")
}
MI_data <- as.mitml.list(MI_data) # Replace with actual datasets
# Fit the model on each imputed dataset
lmm_results <- with(MI_data, lmer(Y ~ X_1 + X_2 + X_3 + X_4 + X_5 + X_6
+ X_7 + X_8 + X_9 + X_10 + X_11 + X_12
+ (0 + Z1 + Z2 | subject_id)))
# Pool fixed effects using Rubin's Rules
pooled_results <- testEstimates(lmm_results)
# Print pooled results
print(pooled_results)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.