inst/doc/Introduction_to_metaforest.R

## ----setup, include = FALSE---------------------------------------------------
run_everything = FALSE
knitr::opts_chunk$set(
  eval = nzchar(Sys.getenv("run_vignettes")),
  collapse = TRUE,
  comment = "#>"
)

## ----eval = FALSE-------------------------------------------------------------
#  # Install the metaforest package. This needs to be done only once.
#  install.packages("metaforest")
#  # Then, load the metaforest package
#  library(metaforest)
#  # Assign the fukkink_lont data, which is included in
#  # the metaforest package, to an object called "data"
#  data <- fukkink_lont
#  # Because MetaForest uses the random number generator (for bootstrapping),
#  # we set a random seed so analyses can be replicated exactly.
#  set.seed(62)

## ----echo = FALSE, message=FALSE----------------------------------------------
library(metaforest)
library(caret)
data <- fukkink_lont
set.seed(62)

## ----eval = FALSE-------------------------------------------------------------
#  # Run model with many trees to check convergence
#  check_conv <- MetaForest(yi~.,
#                          data = data,
#                          study = "id_exp",
#                          whichweights = "random",
#                          num.trees = 20000)
#  # Plot convergence trajectory
#  plot(check_conv)

## ----echo = FALSE-------------------------------------------------------------
check_conv <- readRDS("C:/Git_Repositories/S4_meta-analysis/check_conv.RData")
plot(check_conv)

## ----eval=FALSE---------------------------------------------------------------
#  # Model with 5000 trees for replication
#  mf_rep <- MetaForest(yi~.,
#                          data = data,
#                          study = "id_exp",
#                          whichweights = "random",
#                          num.trees = 5000)
#  # Run recursive preselection, store results in object 'preselect'
#  preselected <- preselect(mf_rep,
#                           replications = 100,
#                           algorithm = "recursive")
#  # Plot the results
#  plot(preselected)
#  # Retain only moderators with positive variable importance in more than
#  # 50% of replications
#  retain_mods <- preselect_vars(preselected, cutoff = .5)

## ----echo = FALSE-------------------------------------------------------------
preselected <- readRDS("C:/Git_Repositories/S4_meta-analysis/preselected.RData")
retain_mods <- preselect_vars(preselected, cutoff = .5)

## ----eval = FALSE-------------------------------------------------------------
#  # Load the caret library
#  library(caret)
#  # Set up 10-fold grouped (=clustered) CV
#  grouped_cv <- trainControl(method = "cv",
#                             index = groupKFold(data$id_exp, k = 10))
#  
#  # Set up a tuning grid for the three tuning parameters of MetaForest
#  tuning_grid <- expand.grid(whichweights = c("random", "fixed", "unif"),
#                         mtry = 2:6,
#                         min.node.size = 2:6)
#  
#  # X should contain only retained moderators, clustering variable, and vi
#  X <- data[, c("id_exp", "vi", retain_mods)]
#  
#  # Train the model
#  mf_cv <- train(y = data$yi,
#                 x = X,
#                 study = "id_exp", # Name of the clustering variable
#                 method = ModelInfo_mf(),
#                 trControl = grouped_cv,
#                 tuneGrid = tuning_grid,
#                 num.trees = 5000)
#  # Examine optimal tuning parameters
#  mf_cv$results[which.min(mf_cv$results$RMSE), ]

## ----echo = FALSE, warning=FALSE----------------------------------------------
mf_cv <- readRDS("C:/Git_Repositories/S4_meta-analysis/mf_cv.RData")
mf_cv$results[which.min(mf_cv$results$RMSE), ]
# Extract R^2_{cv} for the optimal tuning parameters
r2_cv <- mf_cv$results$Rsquared[which.min(mf_cv$results$RMSE)]

## -----------------------------------------------------------------------------
# For convenience, extract final model
final <- mf_cv$finalModel
# Extract R^2_{oob} from the final model
r2_oob <- final$forest$r.squared
# Plot convergence
plot(final)

## -----------------------------------------------------------------------------
# Plot variable importance
VarImpPlot(final)
# Sort the variable names by importance, so that the
# partial dependence plots will be ranked by importance
ordered_vars <- names(final$forest$variable.importance)[
  order(final$forest$variable.importance, decreasing = TRUE)]
# Plot partial dependence
PartialDependence(final, vars = ordered_vars,
                  rawdata = TRUE, pi = .95)

Try the metaforest package in your browser

Any scripts or data that you put into this service are public.

metaforest documentation built on Jan. 8, 2020, 9:06 a.m.