R/random.R

Defines functions na_random na_density

Documented in na_density na_random

#' Replace missing data with random values
#'
#' @param x            a vector.
#' @param replace      should sampling be with replacement?
#'                     \code{TRUE} by default.
#' @param bw           the smoothing bandwidth to be used.
#' @param adjust       the bandwidth used is actually \code{adjust*bw}.
#' @param preserve_var if \code{TRUE} random generation algorithm
#'                     preserves mean and variance of the original sample.
#'
#' @details
#'
#' \code{na_random} replaces missing data with non-missing values drawn
#' from the data with, or without replacement. \code{na_density} replaces
#' missing values with draws from Gaussian kernel density estimated on
#' the non-missing data. The \code{preserve_var} argument for \code{na_density}
#' if set to \code{TRUE} (default), leads to producing samples that have the same
#' variance as the original data.
#'
#' @seealso \code{\link{sample}}, \code{\link[stats]{density}}
#' @importFrom stats density bw.nrd0 rnorm na.omit var
#'
#' @export

na_random <- function(x, replace = TRUE) {
  nas <- is.na(x)
  x <- as_imputed(x)
  x[nas] <- sample(x[!nas], sum(nas), replace = replace)
  x
}

#' @rdname na_random
#' @export

na_density <- function(x, bw = bw.nrd0(na.omit(x)), adjust = 1,
                       preserve_var = TRUE) {

  if (length(bw) > 1L)
    message("bw has length > 1 and only the first element will be used")
  if (length(adjust) > 1L)
    message("adjust has length > 1 and only the first element will be used")

  nas <- is.na(x)
  x <- as_imputed(x)
  n <- sum(nas)
  bw <- bw[1L] * adjust[1L]
  x[nas] <- sample(x[!nas], n, replace = TRUE)

  if (!preserve_var) {
    x[nas] <- rnorm(n, x[nas], bw)
  } else {
    mx <- mean(x[!nas])
    sx <- var(x[!nas])
    x[nas] <- mx + rnorm(n, x[nas] - mx, bw) / sqrt(1.0 + (bw^2)/sx)
  }

  x
}
twolodzko/misster documentation built on May 24, 2019, 2:54 p.m.