R/Generate_NA.R

Defines functions NA_Generator

Documented in NA_Generator

#' Function to introduce additional NAs for prediction error calculation.
#'
#' @param SNP_df Original data of SNP. It may contain some SNP
#' @param percent The final percentage of total NA's we want in the dataset.
#'
#' @return a list of variables:
#' 1. SNP_NA_df: the dataset with additional SNPs.
#' 2. NA_percent_orig: original missing percentable in the dataset.
#' 3. NA_percent_generate: introduced NA percentage.
#' 4. NP_generate_positions: the positions of the introduced NA's.
#' @export
#'
#' @examples
#' data("SNP_orig_sub")
#' ## original NA ratio is 0.018
#' SNP_NA_df02 <- NA_Generator(SNP_orig_sub, 0.2)
#' ## Introduced another 18% of NAs.
#'
#' ## SNP_NA_df <- NA_Generator(SNP_orig_sub, 0.01)
#' ## Should report an error: alread has 1.8% NA's, higher than the
#' ## target percentage.
#'
NA_Generator <- function(SNP_df, percent) {
  m <- nrow(SNP_df)
  n <- ncol(SNP_df)
  ## Get the NA positions
  NA_positions <- which(is.na(SNP_df))

  ## Get the not NA positions and counts.
  notNA_positions <- which(!is.na(SNP_df))
  notNA_len <- length(notNA_positions)

  ## Calculate the original percentage of NA's
  NA_percent_orig <- length(NA_positions) / (m * n)

  ## Calculate the percentage and counts of NA we need to generate.
  NA_percent_generate <- percent - NA_percent_orig
  size = notNA_len * NA_percent_generate

  ## If the size of the number of NA to be generated is smaller than 1, then
  ## we cannot introduce any more NA's under current required total percentage.
  if(size < 1) stop(paste("There are already", NA_percent_orig * 100,
                  "% of NAs in the dataset. Cannot generate more."))

  ## Introduce random NAs to the original dataset.
  NP_generate_positions <- sample(notNA_positions, size = size)
  SNP_NA_df <- SNP_df
  SNP_NA_df[NP_generate_positions] <- NA

  ## Return the list of related information.
  return(list(SNP_NA_df = SNP_NA_df, NA_percent_orig = NA_percent_orig,
              NA_percent_generate = NA_percent_generate, NP_generate_positions = NP_generate_positions))
}
GaoGN517/689_SNPFastImpute_Mac documentation built on Dec. 8, 2019, 12:33 a.m.