R/check_samples.R

Defines functions check_samples

Documented in check_samples

#' Check Sample Names
#'
#' Check to see if your sample IDs are in the database \cr
#' Outputs Sample IDs not found so you may check for typos. \cr
#' Optional `used_sample` argument so one may see if the sample
#' ID has previously been used.
#'
#' @param DB RSQLite database generated by `make_database`
#' @param females Sample IDs of the females you intend to compare to the males.
#' @param males Sample IDs of the males you intend to compare to the females.
#' @param used Vector of sample IDs already used. Optional argument.
#' @return List or message
#' @export
#' @examples
#' \dontrun{
#' check_coverage(DBs, females, males, used_samples)
#' }
#' @export
#' @import dplyr
#' @import tibble
#' @import tidyr
#' @import tidyverse

check_samples <- function(DB, females, males, used=FALSE){
  df <- RSQLite::dbReadTable(DB, "meta_data")
  # counter
  cc <- vector()
  # find sample IDs not present in DB
  np_f <- females[!(females %in% df$Sample)]
  if (length(np_f) > 0 ){
    message(paste0("Not all female IDs found in database"))
    np_f_o <- np_f
    cc <- c(cc, length(np_f))
  } else if (length(np_f) == 0){
    np_f_o <- "All female IDs present in database"
  }
  np_m <- males[!(males %in% df$Sample)]
  if (length(np_m) > 0 ){
    message(paste0("Not all male IDs found in database"))
    np_m_o <- np_m
    cc <- c(cc, length(np_m))
  } else if (length(np_m) == 0){
    np_m_o <- "All female IDs present in database"
  }
  outs <- list(females = np_f_o,
               males = np_m_o)
  # final output and check repeats
  ## No repeats
  if(used == FALSE){
    message(paste0("used set to: ", used, ". Not checking for repeats"))
    if(length(cc) > 0){
      return(outs)
    } else if (length(cc) == 0){
      message(paste0("All sample IDs present in supplied database"))
    }
  } else if (used != FALSE){ # check for repeats
    message(paste0("used parameter set. Checking for repeated Sample IDs..."))
    rp <- c(females, males)
    rp_f <- rp[(rp %in% used)]
    if (length(rp_f) > 0 ){
      message(paste0("Repeated Sample IDs found"))
      rp_f_o <- rp_f
      cc <- c(cc, length(rp_f))
    } else if (length(rp_f) == 0){
      rp_f_o <- "No repeated Sample IDs"
    }
    outs$repeates <- rp_f_o
    # final output checking for repeats
    if(length(cc) > 0){
      return(outs)
    } else if (length(cc) == 0){
      message(paste0("All sample IDs present in supplied database and not found in `used` argument."))
    }
  }
}
danagibbon/MultifacetedCHOICE documentation built on Dec. 19, 2021, 8:05 p.m.