R/induce_missing.R

#' Induce Missing Values (NA) at Random
#'
#' @export
#'
#' @author David Navega
#'
#' @param x a matrix or data.frame
#' @param amount a float greater than 0 and smaller than 1 defining the amount
#' of missing entries (NA).
#'
#' @return a copy of x with induced NA
#'
#' @examples
#' iris_missing <- induce_missing(iris)
#' print(head(iris_missing))
#'
induce_missing <- function(x, amount = 0.1) {

  n <- nrow(x)
  p <- ncol(x)

  # creating missing mask
  missing_vector <- rep(FALSE, times = n * p)
  missing_entry <- sample(x = seq_len(n * p), size = floor(n * p * amount))
  missing_vector[missing_entry] <- TRUE
  missing_matrix <- matrix(data = missing_vector, nrow = n, ncol = p)

  # induce missing
  x[missing_matrix] <- NA

  # return ----
  rout <- x
  return(rout)

}
dsnavega/imputeForest documentation built on May 8, 2019, 2:43 p.m.