Nothing
#' Univariate sampler function for continuous variables for prediction-based
#' imputation, assuming normality for prediction errors of random forest
#'
#' @description
#' Please note that functions with names starting with "mice.impute" are
#' exported to be visible for the mice sampler functions. Please do not call
#' these functions directly unless you know exactly what you are doing.
#'
#' For continuous variables only.
#'
#' This function is for \code{RfPred.Norm}
#' multiple imputation method, adapter for \code{mice} samplers.
#' In the \code{mice()} function, set \code{method = "rfpred.norm"} to call it.
#'
#' The function performs multiple imputation based on normality assumption using
#' out-of-bag mean squared error as the estimate for the variance.
#'
#' @details
#' \code{RfPred.Norm} imputation sampler.
#'
#' @param y Vector to be imputed.
#'
#' @param ry Logical vector of length \code{length(y)} indicating the
#' the subset \code{y[ry]} of elements in \code{y} to which the imputation
#' model is fitted. The \code{ry} generally distinguishes the observed
#' (\code{TRUE}) and missing values (\code{FALSE}) in \code{y}.
#'
#' @param x Numeric design matrix with \code{length(y)} rows with predictors for
#' \code{y}. Matrix \code{x} may have no missing values.
#'
#' @param wy Logical vector of length \code{length(y)}. A \code{TRUE} value
#' indicates locations in \code{y} for which imputations are created.
#'
#' @param num.trees.cont Number of trees to build for continuous variables.
#' The default is \code{num.trees = 10}.
#'
#' @param norm.err.cont Use normality assumption for prediction errors of random
#' forests. The default is \code{norm.err.cont = TRUE}, and normality will be
#' assumed for the distribution for the prediction errors, the variance estimate
#' equals to overall out-of-bag prediction error, i.e. out-of-bag mean squared
#' error (see Shah et al. 2014). If \code{FALSE}, then the predictions of random
#' forest are used.
#'
#' @param alpha.oob The "significance level" for individual out-of-bag
#' prediction errors used for the calculation for out-of-bag mean squared error,
#' useful when presence of extreme values.
#' For example, set alpha = 0.05 to use 95\% confidence level.
#' The default is \code{alpha.oob = 0.0}, and all the individual out-of-bag
#' prediction errors will be kept intact.
#'
#' @param pre.boot If \code{TRUE}, bootstrapping prior to imputation will be
#' performed to perform 'proper' multiple imputation, for accommodating sampling
#' variation in estimating population regression parameters
#' (see Shah et al. 2014).
#' It should be noted that if \code{TRUE}, this option is in effect even if the
#' number of trees is set to one.
#'
#' @param num.threads Number of threads for parallel computing. The default is
#' \code{num.threads = NULL} and all the processors available can be used.
#'
#' @param ... Other arguments to pass down.
#'
#' @return Vector with imputed data, same type as \code{y}, and of length
#' \code{sum(wy)}.
#'
#' @name mice.impute.rfpred.norm
#'
#' @author Shangzhi Hong
#'
#' @references
#' Shah, Anoop D., et al. "Comparison of random forest and parametric
#' imputation models for imputing missing data using MICE: a CALIBER study."
#' American journal of epidemiology 179.6 (2014): 764-774.
#'
#' @examples
#' # Users can set method = "rfpred.norm" in call to mice to use this method
#' data("airquality")
#' impObj <- mice(airquality, method = "rfpred.norm", m = 5,
#' maxit = 5, maxcor = 1.0, eps = 0,
#' remove.collinear = FALSE, remove.constant = FALSE,
#' printFlag = FALSE)
#'
#' @export
mice.impute.rfpred.norm <- function(
y,
ry,
x,
wy = NULL,
num.trees.cont = 10,
norm.err.cont = TRUE,
alpha.oob = 0.0,
pre.boot = TRUE,
num.threads = NULL,
...) {
if (is.null(wy))
wy <- !ry
yMisNum <- sum(wy)
if (isTRUE(pre.boot)) {
bootIdx <- sample(sum(ry), replace = TRUE)
yObs <- y[ry][bootIdx]
xObs <- x[ry, , drop = FALSE][bootIdx, , drop = FALSE]
} else {
yObs <- y[ry]
xObs <- x[ry, , drop = FALSE]
}
xMis <- x[wy, , drop = FALSE]
# Let ranger handle unused arguments after v0.12.3
# rfObj <- suppressWarnings(ranger(
# x = xObs,
# y = yObs,
# oob.error = TRUE,
# num.trees = num.trees.cont,
# num.threads = num.threads,
# ...))
rfObj <- rangerCallerSafe(
x = xObs,
y = yObs,
oob.error = TRUE,
num.trees = num.trees.cont,
num.threads = num.threads,
...)
misPredVal <- predictions(predict(rfObj, xMis))
if (norm.err.cont) {
oobMse <- NULL
if (isTRUE(alpha.oob > 0 && alpha.oob < 1)) {
# Get empirical OOB prediction errors
oobErr <- yObs - rfObj[["predictions"]]
# To fix "NaN"s in OOB error due to small tree number
oobErrAbs <- abs(oobErr[!is.na(oobErr)])
oobErrorAbsHi <- quantile(
oobErrAbs,
probs = (1 - alpha.oob),
na.rm = TRUE,
names = FALSE
)
oobErrAbs <- oobErrAbs[oobErrAbs < oobErrorAbsHi]
oobMse <- mean(oobErrAbs * oobErrAbs)
} else {
oobMse <- rfObj[["prediction.error"]]
}
# Use normal assumption
impVal <- rnorm(length(misPredVal),
mean = misPredVal,
sd = sqrt(rfObj[["prediction.error"]]))
return(impVal)
} else {
return(misPredVal)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.