R/check_args.R

Defines functions check_character_table check_is_dna_char check_sample_table check_marker_table check_marker_panel check_is_distinct check_fasta_has_seq check_barcodes_fasta check_dir_creation

#' @importFrom rlang abort
check_character_table <- function(table, req_cols, arg_name) {

  if (!is.data.frame(table)) {
    abort(str_c(arg_name, ' must be a data.frame'))
  }

  if (!all(req_cols %in% colnames(table))) {
    abort(str_c(arg_name, ' must contain columns ', str_c(req_cols, collapse = ', ')))
  }

  if (nrow(table) == 0) {
    abort(str_c(arg_name, ' has zero rows'))
  }

  for (cn in req_cols) {
    if (!is.character(table[[cn]])) {
      abort(str_c(str_c(arg_name, '$', cn), ' must be a character vector'))
    }
  }
}

#' @importFrom stringr str_detect
check_is_dna_char <- function(char, arg_name) {

  if(any(str_detect(char, '[^ACTG]'))) {
    abort(str_c(arg_name, ' must only contain nucleic acid code characters A, C, T or G'))
  }
}

#' @importFrom dplyr count pull group_by select distinct "%>%"
#' @importFrom rlang abort
#' @importFrom stringr str_c
check_sample_table <- function(table) {

  req_cols <- c("SampleID", "BarcodeID_F", "BarcodeID_R", "SampleName")
  arg_name <- 'sample_table'
  check_character_table(table, req_cols, arg_name)

  nmax <-
    select(table, "SampleID", "BarcodeID_F", "BarcodeID_R") %>%
    distinct() %>%
    group_by(BarcodeID_F, BarcodeID_R) %>%
    count() %>% pull(n) %>% max(na.rm = T)

  if (nmax > 1) {
    abort(str_c(arg_name, ' distinct barcode pairs must be associated with a single SampleID'))
  }

}

check_marker_table <- function(table) {

  req_cols <- c("MarkerID", "Forward", "Reverse", "ReferenceSequence")
  arg_name <- 'marker_table'
  check_character_table(table, req_cols, arg_name)
  for (cn in c("Forward", "Reverse", "ReferenceSequence")){
    check_is_dna_char(table[[cn]], str_c(arg_name, '$', cn))
  }
  check_is_distinct(table$MarkerID, 'marker_table$MarkerID')
}

check_marker_panel <- function(table) {

  req_cols <- c("MarkerID", "Haplotype", "Sequence")
  arg_name <- 'marker_panel'
  check_character_table(table, req_cols, arg_name)
  check_is_dna_char(table$Sequence, 'marker_panel$Sequence')
  check_is_distinct(table$Haplotype, 'marker_panel$Haplotype')
}

check_is_distinct <- function(x, arg_name) {
  if (length(x) != length(unique(x))) {
    abort(str_c(arg_name, ' must all be unique'))
  }
}

#' @importFrom rlang abort
#' @importFrom Biostrings readDNAStringSet
check_fasta_has_seq <- function(fasta_fn, arg_name, req_seq_names) {

  if (!file.exists(fasta_fn)) {
    abort(str_c(arg_name, ' file does not exist at path "', fasta_fn, '"'))
  }

  dna_ss <-
    tryCatch(readDNAStringSet(fasta_fn),
             error = function(e) abort(str_c('failed to read fasta file ', arg_name, ' at "', fasta_fn, ' "')))

  if (!all(req_seq_names %in% names(dna_ss))) {
    abort(str_c(arg_name, ' does not contain all barcode ids present in sample_table'))
  }
}

check_barcodes_fasta <- function(barcodes_fwd, barcodes_rev, sample_table) {

  check_fasta_has_seq(barcodes_fwd, "barcodes_fwd", sample_table$BarcodeID_F)
  check_fasta_has_seq(barcodes_rev, "barcodes_rev", sample_table$BarcodeID_R)
}

#' @importFrom rlang abort is_string is_bool
check_dir_creation <- function(dir_path, arg_name=deparse(substitute(dir_path)), overwrite = FALSE) {

  stopifnot(is_string(dir_path), is_bool(overwrite))

  if (dir.exists(dir_path)) {
    if (overwrite) {
      unlink(dir_path, recursive = TRUE)
      if (dir.exists(dir_path)) {
        abort(str_c(arg_name, ' failed to remove directory at "', dir_path, '"'))
      }
    } else {
      abort(str_c(arg_name, ' already exists at "', dir_path, '"'))
    }
  }

  dir.create(dir_path, recursive = T)

  if (!dir.exists(dir_path)) {
    abort(str_c('failed to create ',  arg_name, ' at "', dir_path, '"'))
  }
}
bahlolab/HaplotypReportR documentation built on Dec. 2, 2019, 7:36 p.m.