Nothing
#' Generate the Sampling Distribution of Regression Parameters
#' Using the Monte Carlo Method for Data with Missing Values
#'
#' @details Multiple imputation
#' is used to deal with missing values in a data set.
#' The vector of parameter estimates
#' and the corresponding sampling covariance matrix
#' are estimated for each of the imputed data sets.
#' Results are combined to arrive at the pooled vector of parameter estimates
#' and the corresponding sampling covariance matrix.
#' The pooled estimates are then used to generate the sampling distribution
#' of regression parameters.
#' See `MC()` for more details on the Monte Carlo method.
#'
#' @author Ivan Jacob Agaloos Pesigan
#'
#' @return Returns an object
#' of class `mc` which is a list with the following elements:
#' \describe{
#' \item{call}{Function call.}
#' \item{args}{Function arguments.}
#' \item{lm_process}{Processed `lm` object.}
#' \item{scale}{Sampling variance-covariance matrix of parameter estimates.}
#' \item{location}{Parameter estimates.}
#' \item{thetahatstar}{Sampling distribution of parameter estimates.}
#' \item{fun}{Function used ("MCMI").}
#' }
#'
#' @inheritParams MC
#' @param mi Object of class `mids` (output of [mice::mice()]),
#' object of class `amelia` (output of [Amelia::amelia()]),
#' or a list of multiply imputed data sets.
#'
#' @references
#' Dudgeon, P. (2017).
#' Some improvements in confidence intervals
#' for standardized regression coefficients.
#' *Psychometrika*, *82*(4), 928–951.
#' \doi{10.1007/s11336-017-9563-z}
#'
#' MacKinnon, D. P., Lockwood, C. M., & Williams, J. (2004).
#' Confidence limits for the indirect effect:
#' Distribution of the product and resampling methods.
#' *Multivariate Behavioral Research*, *39*(1), 99-128.
#' \doi{10.1207/s15327906mbr3901_4}
#'
#' Pesigan, I. J. A., & Cheung, S. F. (2023).
#' Monte Carlo confidence intervals for the indirect effect with missing data.
#' *Behavior Research Methods*.
#' \doi{10.3758/s13428-023-02114-4}
#'
#' Preacher, K. J., & Selig, J. P. (2012).
#' Advantages of Monte Carlo confidence intervals for indirect effects.
#' *Communication Methods and Measures*, *6*(2), 77–98.
#' \doi{10.1080/19312458.2012.679848}
#'
#' @examples
#' # Data ---------------------------------------------------------------------
#' data("nas1982", package = "betaMC")
#' nas1982_missing <- mice::ampute(nas1982)$amp # data set with missing values
#'
#' # Multiple Imputation
#' mi <- mice::mice(nas1982_missing, m = 5, seed = 42, print = FALSE)
#'
#' # Fit Model in lm ----------------------------------------------------------
#' ## Note that this does not deal with missing values.
#' ## The fitted model (`object`) is updated with each imputed data
#' ## within the `MCMI()` function.
#' object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982_missing)
#'
#' # Monte Carlo --------------------------------------------------------------
#' mc <- MCMI(
#' object,
#' mi = mi,
#' R = 100, # use a large value e.g., 20000L for actual research
#' seed = 0508
#' )
#' mc
#' # The `mc` object can be passed as the first argument
#' # to the following functions
#' # - BetaMC
#' # - DeltaRSqMC
#' # - DiffBetaMC
#' # - PCorMC
#' # - RSqMC
#' # - SCorMC
#'
#' @family Beta Monte Carlo Functions
#' @keywords betaMC mc
#' @export
MCMI <- function(object,
mi,
R = 20000L,
type = "hc3",
g1 = 1,
g2 = 1.5,
k = 0.7,
decomposition = "eigen",
pd = TRUE,
tol = 1e-06,
fixed_x = FALSE,
seed = NULL) {
lm_process <- .ProcessLM(object)
stopifnot(
type %in% c(
"adf",
"hc0",
"hc1",
"hc2",
"hc3",
"hc4",
"hc4m",
"hc5",
"mvn"
)
)
stopifnot(0 < k & k < 1)
constant <- k
call0 <- stats::getCall(object)
call1 <- call0
data0 <- eval(
call0$data,
envir = parent.frame()
)
for (i in seq_along(call0)) {
call1[[i]] <- eval(
expr = call0[[i]],
envir = parent.frame()
)
}
if (fixed_x) {
# check if the X matrix has complete data
n_data <- dim(data0)[1]
n_x <- lm_process$x[, -1]
n_x <- dim(
n_x[
stats::complete.cases(n_x), ,
drop = FALSE
]
)[1]
if (n_data - n_x > 0) {
stop(
paste0(
"\n",
"There are missing cases in the matrix of regressors.",
"\n",
"Consider using \'fixed_x = FALSE\'.",
"\n"
)
)
}
}
if (
inherits(
mi,
"mids"
)
) {
imp <- mice::complete(
mi,
action = "all"
)
} else if (
inherits(
mi,
"amelia"
)
) {
imp <- mi$imputations
} else if (
inherits(
mi,
"list"
)
) {
imp <- mi
} else {
stop("Invalid \'mi\' argument.")
}
fits <- lapply(
X = imp,
FUN = function(x) {
call1$data <- x
return(
eval(expr = call1)
)
}
)
lm_processes <- lapply(
X = fits,
FUN = .ProcessLM
)
coefs <- lapply(
X = lm_processes,
FUN = function(x) {
x$theta
}
)
theta <- colMeans(
do.call(
what = "rbind",
args = coefs
)
)
vechsigmacapx <- theta[
(lm_process$k + 1):lm_process$q
]
if (fixed_x) {
coefs <- lapply(
X = coefs,
FUN = function(x) {
x[seq_len(lm_process$k)]
}
)
}
vcovs <- lapply(
X = lm_processes,
FUN = .Cov,
type = type,
g1 = g1,
g2 = g2,
k = constant,
jcap = NULL,
fixed_x = fixed_x
)
pooled <- .MICombine(
coefs = coefs,
vcovs = vcovs,
M = length(coefs),
k = length(coefs[[1]]),
adj = TRUE
)
# use default total covariance
scale <- pooled$total
location <- pooled$est
mi_output <- list(
mi = mi,
lm = fits,
lm_process = lm_processes
)
out <- list(
call = match.call(),
args = list(
object = object,
mi = mi,
R = R,
type = type,
g1 = g1,
g2 = g2,
k = k,
decomposition = decomposition,
pd = pd,
tol = tol,
fixed_x = fixed_x,
seed = seed,
mi_output = mi_output
),
lm_process = lm_process,
scale = scale,
location = location,
theta = theta,
thetahatstar = .MC(
scale = scale,
location = location,
p = lm_process$p,
k = lm_process$k,
q = lm_process$q,
fixed_x = fixed_x,
vechsigmacapx = vechsigmacapx,
R = R,
decomposition = decomposition,
pd = pd,
tol = tol,
seed = seed
),
fun = "MCMI"
)
class(out) <- c(
"mc",
class(out)
)
return(
out
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.