Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(SBMTrees)
library(mitml)
library(lme4)
## ----prediction_sim-----------------------------------------------------------
# Simulate data
data <- simulation_prediction(
n_subject = 100,
seed = 1234,
nonlinear = TRUE,
nonrandeff = TRUE,
nonresidual = TRUE
)
# Extract training and testing datasets
X_train <- data$X_train
Y_train <- data$Y_train
Z_train <- data$Z_train
subject_id_train <- data$subject_id_train
X_test <- data$X_test
Z_test <- data$Z_test
subject_id_test <- data$subject_id_test
Y_test_true <- data$Y_test_true
## ----prediction---------------------------------------------------------------
# Fit the predictive model
model <- BMTrees_prediction(
X_train, Y_train, Z_train, subject_id_train,
X_test, Z_test, subject_id_test,
model = "BMTrees",
binary = FALSE,
nburn = 3L,
npost = 4L,
skip = 1L,
verbose = FALSE,
seed = 1234
)
# Posterior expectation for the testing dataset
posterior_predictions <- model$post_predictive_y_test
head(colMeans(posterior_predictions))
## ----prediction_evaluation----------------------------------------------------
point_predictions = colMeans(posterior_predictions)
# Compute MAE
mae <- mean(abs(point_predictions - Y_test_true))
cat("Mean Absolute Error (MAE):", mae, "\n")
# Compute MSE
mse <- mean((point_predictions - Y_test_true)^2)
cat("Mean Squared Error (MSE):", mse, "\n")
# Compute 95% credible intervals
lower_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.025)
upper_bounds <- apply(posterior_predictions, 2, quantile, probs = 0.975)
# Check if true values fall within the intervals
coverage <- mean(Y_test_true >= lower_bounds & Y_test_true <= upper_bounds)
cat("95% Posterior Predictive Interval Coverage:", coverage * 100, "%\n")
plot(Y_test_true, point_predictions,
xlab = "True Values",
ylab = "Predicted Values",
main = "True vs Predicted Values")
abline(0, 1, col = "red") # Add a 45-degree reference line
## ----imputation_sim-----------------------------------------------------------
# Simulate data with missing values
data <- simulation_imputation(
n_subject = 100,
seed = 1234,
nonrandeff = TRUE,
nonresidual = TRUE,
alligned = FALSE
)
# Extract components of the dataset
X_mis <- data$X_mis # Covariates with missing values
Y_mis <- data$Y_mis # Outcomes with missing values
Z <- data$Z # Random predictors
subject_id <- data$subject_id
## ----imputation---------------------------------------------------------------
# Run the sequential imputation
imputed_model <- sequential_imputation(
X_mis, Y_mis, Z, subject_id,
type = rep(0, ncol(X_mis)),
binary_outcome = FALSE,
model = "BMTrees",
nburn = 3L,
npost = 40L,
skip = 2L,
verbose = FALSE,
seed = 1234
)
# Extract imputed data
imputed_data <- imputed_model$imputed_data
dim(imputed_data) # Dimensions: posterior samples x observations x variables
## ----imputation_evaluation----------------------------------------------------
# create structure which can be used in mitml
MI_data = list()
for (i in 1:dim(imputed_data)[1]) {
MI_data[[i]] = cbind(as.data.frame(imputed_data[i,,]), Z, subject_id)
colnames(MI_data[[i]]) = c(colnames(X_mis), "Y", "Z1", "Z2", "Z3", "subject_id")
}
MI_data <- as.mitml.list(MI_data) # Replace with actual datasets
# Fit the model on each imputed dataset
lmm_results <- with(MI_data, lmer(Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + (0 + Z1 + Z2 + Z3 | subject_id)))
# Pool fixed effects using Rubin's Rules
pooled_results <- testEstimates(lmm_results)
# Print pooled results
print(pooled_results)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.