#' Replace missing data with random values
#'
#' @param x a vector.
#' @param replace should sampling be with replacement?
#' \code{TRUE} by default.
#' @param bw the smoothing bandwidth to be used.
#' @param adjust the bandwidth used is actually \code{adjust*bw}.
#' @param preserve_var if \code{TRUE} random generation algorithm
#' preserves mean and variance of the original sample.
#'
#' @details
#'
#' \code{na_random} replaces missing data with non-missing values drawn
#' from the data with, or without replacement. \code{na_density} replaces
#' missing values with draws from Gaussian kernel density estimated on
#' the non-missing data. The \code{preserve_var} argument for \code{na_density}
#' if set to \code{TRUE} (default), leads to producing samples that have the same
#' variance as the original data.
#'
#' @seealso \code{\link{sample}}, \code{\link[stats]{density}}
#' @importFrom stats density bw.nrd0 rnorm na.omit var
#'
#' @export
na_random <- function(x, replace = TRUE) {
nas <- is.na(x)
x <- as_imputed(x)
x[nas] <- sample(x[!nas], sum(nas), replace = replace)
x
}
#' @rdname na_random
#' @export
na_density <- function(x, bw = bw.nrd0(na.omit(x)), adjust = 1,
preserve_var = TRUE) {
if (length(bw) > 1L)
message("bw has length > 1 and only the first element will be used")
if (length(adjust) > 1L)
message("adjust has length > 1 and only the first element will be used")
nas <- is.na(x)
x <- as_imputed(x)
n <- sum(nas)
bw <- bw[1L] * adjust[1L]
x[nas] <- sample(x[!nas], n, replace = TRUE)
if (!preserve_var) {
x[nas] <- rnorm(n, x[nas], bw)
} else {
mx <- mean(x[!nas])
sx <- var(x[!nas])
x[nas] <- mx + rnorm(n, x[nas] - mx, bw) / sqrt(1.0 + (bw^2)/sx)
}
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.