R/option_parsers.R

Defines functions parse_seq_input get_taxmap_other_cols get_taxmap_cols get_taxmap_table get_taxmap_data verify_taxmap

Documented in get_taxmap_cols get_taxmap_data get_taxmap_other_cols get_taxmap_table parse_seq_input verify_taxmap

#' Check that an object is a taxmap
#' 
#' Check that an object is a taxmap
#' This is intended to be used to parse options in other functions.
#' 
#' @param obj A taxmap object
#' 
#' @family option parsers
#' 
#' @keywords internal
verify_taxmap <- function(obj) {
  if (! "Taxmap" %in% class(obj)) {
    stop(paste0('The object supplied is not a taxmap object. ',
                'It appears to be of type "', class(obj)[1], '"'), 
         call. = FALSE)
  }
}


#' Get a data set from a taxmap object
#' 
#' NOTE: This will be replaced by the function `get_dataset` in the `taxa`
#' package. Get a data set from a taxmap object and complain if it does not
#' exist. This is intended to be used to parse options in other functions.
#' 
#' @param obj A taxmap object
#' @param data Which data set to use. Can be any of the following:
#'   \describe{
#'     \item{Name}{The name of the data set to use.}
#'     \item{Index}{The index of the data set to use.}
#'     \item{TRUE/FALSE vector}{A TRUE/FALSE vector the same length as the
#'     number of datasets, with exactly one TRUE corresponding to the
#'     selected data set.}
#'   }
#' 
#' @family option parsers
#' 
#' @keywords internal
#' 
#' @examples
#' \dontrun{
#' # Parse data
#' x = parse_tax_data(hmp_otus, class_cols = "lineage", class_sep = ";",
#'                    class_key = c(tax_rank = "taxon_rank", tax_name = "taxon_name"),
#'                    class_regex = "^(.+)__(.+)$")
#'                    
#' # Get data set by name
#' print(metacoder:::get_taxmap_table(x, "tax_data"))
#' print(metacoder:::get_taxmap_table(x, "invalid"))
#' 
#' # Get data set by index
#' print(metacoder:::get_taxmap_table(x, 1))
#' print(metacoder:::get_taxmap_table(x, 3)) # invalid
#' 
#' # Get data set by T/F vector
#' print(metacoder:::get_taxmap_table(x, c(T, F)))
#' print(metacoder:::get_taxmap_table(x, c(T, T))) # invalid
#' print(metacoder:::get_taxmap_table(x, c(T, F, F))) # invalid
#'                    
#' }
get_taxmap_data <- function(obj, data) {
  # Check that obj is a taxmap object
  verify_taxmap(obj)
  
  # Convert logicals to numerics 
  if (is.logical(data)) {
    if (length(data) != length(obj$data)) {
      stop("When using a TRUE/FALSE vector to specify the data set, it must be the same length as the number of data sets",
           call. = FALSE)
    } else {
      data <- which(data)
    }
  }
  
  # Check for multiple/no values
  if (length(data) == 0) {
    stop('No dataset specified.', call. = FALSE)
  }
  if (length(data) > 1) {
    stop('Only one dataset can be used.', call. = FALSE)
  }
  
  # Check that data exists
  error_msg <- paste0('The dataset "', data,
                      '" is not in the object supplied. Datasets found include:\n  ',
                      limited_print(paste0("[", seq_along(obj$data), "] ", names(obj$data)),
                                    type = "silent"))
  if (is.character(data)) {
    if (! data %in% names(obj$data)) {
      stop(error_msg, call. = FALSE)
    }
  } else if (is.numeric(data)) {
    if (! data %in% seq_along(obj$data)) {
      stop(error_msg, call. = FALSE)
    }
  }
  
  # Return without printing
  return(invisible(obj$data[[data]]))
}


#' Get a table from a taxmap object
#' 
#' Get a table from a taxmap object and complain if it does not exist.
#' This is intended to be used to parse options in other functions.
#' 
#' @inheritParams get_taxmap_data
#' 
#' @return A table
#' 
#' @family option parsers
#' 
#' @keywords internal
get_taxmap_table <- function(obj, data) {
  # Get the data set and do checks
  table <- get_taxmap_data(obj, data)
  
  # Check that the data is a table
  if (! is.data.frame(table)) {
    stop(paste0('The dataset "', data,  '" is not a table.'), call. = FALSE)
  }
  
  # Return without printing
  return(invisible(table))
}


#' Get a column subset 
#' 
#' Convert logical, names, or indexes to column names and check that they exist.
#' 
#' @param obj A taxmap object
#' @param data The name of a table in \code{obj} that contains counts.
#' @param cols The columns in the data set to use. Takes one of
#'   the following inputs:
#'   \describe{
#'     \item{TRUE/FALSE:}{All non-target columns will be preserved or not.}
#'     \item{Vector of TRUE/FALSE of length equal to the number of columns:}{Preserve the columns
#'   corresponding to \code{TRUE} values.}
#'     \item{Character vector:}{The names of columns to preserve}
#'     \item{Numeric vector:}{The indexes of columns to preserve}
#'   }
#' 
#' @keywords internal
#' 
#' @family option parsers
#' 
#' @examples
#' \dontrun{
#' # Parse data
#' x = parse_tax_data(hmp_otus, class_cols = "lineage", class_sep = ";",
#'                    class_key = c(tax_rank = "taxon_rank", tax_name = "taxon_name"),
#'                    class_regex = "^(.+)__(.+)$")
#'                    
#' # Get all col names
#' metacoder:::parse_taxmap_cols(x, "tax_data")
#' 
#' # Get col names by index
#' metacoder:::parse_taxmap_cols(x, "tax_data", 2:4)
#' 
#' # Get col names by name (i.e. verify)
#' metacoder:::parse_taxmap_cols(x, "tax_data", c("taxon_id", "lineage"))
#' metacoder:::parse_taxmap_cols(x, "tax_data", c("taxon_id", "not_valid"))
#' 
#' # Get colnames by TRUE/FALSE vector
#' metacoder:::parse_taxmap_cols(x, "tax_data", startsWith(colnames(x$data$tax_data), "7"))
#'                    
#' }
get_taxmap_cols <- function(obj, data, cols = NULL) {
  # Get table used. This checks the obj as well
  my_table <- get_taxmap_table(obj, data)
  
  # If NULL, return all cols
  if (is.null(cols)) {
    cols <- TRUE
  }
  
  # Convert factors to characters
  if (is.factor(cols)) {
    cols <- as.character(cols)
  }
  
  # Convert logical/numeric to column names
  if (is.logical(cols)) {
    if (length(cols) == ncol(my_table)) { # Is a TRUE/FALSE vector
      result <- colnames(my_table)[cols]
    } else if (length(cols) == 1) { # Is a single TRUE/FALSE
      if (cols) {
        result <- colnames(my_table)
      } else {
        result <- character(0)
      }
    } else { # Incorrect length of TRUE/FALSE vector
      stop(paste0("When specifying columns with a TRUE/FALSE vector, it must be", 
                  "either a single value or have a length equal to the number of", 
                  "columns in '", data, "'."),
           call. = FALSE)
    }
  } else if (is.character(cols)) { # Is already column names
    invalid_cols <- cols[! cols %in% colnames(my_table)]
    if (length(invalid_cols) == 0) {
      result <- cols
    } else {
      stop(paste0('The following ', length(invalid_cols), ' column(s) are not in "', data, '":\n  ',
                  limited_print(invalid_cols, type = "silent")),
           call. = FALSE)
    }
  } else if (is.numeric(cols)) { # If column indexes
    invalid_cols <- cols[cols > ncol(my_table) | cols < 1]
    if (length(invalid_cols) == 0) {
      result <- colnames(my_table)[cols]
    } else {
      stop(paste0('The following ', length(invalid_cols), ' column indexes are not valid for "', data, '":\n  ',
                  limited_print(invalid_cols, type = "silent")),
           call. = FALSE)
    }
  } else {
    stop(paste0("`cols` is of the invalid type: ", class(cols), ".\n", 
                'The "cols" option must either be TRUE/FALSE or a vector of valid column names/indexes.'),
         call. = FALSE)
  }
  
  # Retrun result
  return(result)
}


#' Parse the other_cols option
#'
#' Parse the other_cols option used in many calculation functions.
#'
#' @param obj A taxmap object
#' @param data The name of a table in \code{obj} that contains counts.
#' @param cols The names/indexes of columns in \code{data} to use. Takes one
#'   of the following inputs:
#'   \describe{
#'     \item{TRUE/FALSE:}{All columns will used.}
#'     \item{Vector of TRUE/FALSE of length equal to the number of columns:}{Use the columns
#'   corresponding to \code{TRUE} values.}
#'     \item{Character vector:}{The names of columns to use}
#'     \item{Numeric vector:}{The indexes of columns to use}
#'   }
#' @param other_cols Preserve in the output non-target columns present in the
#'   input data. The "taxon_id" column will always be preserved. Takes one of
#'   the following inputs:
#'   \describe{
#'     \item{TRUE/FALSE:}{All non-target columns will be preserved or not.}
#'     \item{Vector of TRUE/FALSE of length equal to the number of columns:}{Preserve the columns
#'   corresponding to \code{TRUE} values.}
#'     \item{Character vector:}{The names of columns to preserve}
#'     \item{Numeric vector:}{The indexes of columns to preserve}
#'   }
#'   
#' @keywords internal
#' 
#' @family option parsers
#' 
#' @examples
#' \dontrun{
#' # Parse data for examples
#' x = parse_tax_data(hmp_otus, class_cols = "lineage", class_sep = ";",
#'                    class_key = c(tax_rank = "taxon_rank", tax_name = "taxon_name"),
#'                    class_regex = "^(.+)__(.+)$")
#' 
#' # If all cols are used, there are no other cols, only "taxon_id"
#' metacoder:::get_taxmap_other_cols(x, data = "tax_data", cols = TRUE)
#' 
#' # If a subset of target columns is specified, the rest are returned 
#' metacoder:::get_taxmap_other_cols(x, data = "tax_data", cols = 2:3)
#' 
#' # Additionally, a subset of other columns can be specified
#' metacoder:::get_taxmap_other_cols(x, data = "tax_data", cols = 2:3,
#'                                   other_cols = 4:5)
#'                    
#' }
get_taxmap_other_cols <- function(obj, data, cols, other_cols = NULL) {
  # Get table used 
  my_table <- get_taxmap_table(obj, data)
  
  # Get target cols
  cols <- get_taxmap_cols(obj, data, cols)
  
  # Get other cols
  other_cols <- get_taxmap_cols(obj, data, other_cols)
  
  # Remove target cols if present
  in_both <- other_cols %in% cols
  if (sum(in_both) > 0) {
    warning(paste0("The following columns will be replaced in the output:\n  ",
                   limited_print(other_cols[in_both], type = "silent")),
            call. = FALSE)
  }
  result <- other_cols[! in_both]
  
  # Add taxon id column regardless
  if (! "taxon_id" %in% result) {
    result <- c("taxon_id", result)
  }
  
  return(result)
}


#' Read sequences in an unknown format
#'
#' Read sequences in an unknown format. This is meant to parse the sequence
#' input arguments of functions like \code{\link{primersearch}}.
#' 
#' @param input (\code{character}) One of the following: 
#' \describe{
#'   \item{A character vector of sequences}{See the example below for what this
#'   looks like. The parser \code{\link{read_fasta}} produces output like this.}
#'   \item{A list of character vectors}{Each vector should have one base per element.}
#'   \item{A "DNAbin" object}{This is the result of parsers like
#'   \code{\link[ape]{read.FASTA}}.}
#'   \item{A list of "SeqFastadna" objects}{This is the result of parsers like
#'   \code{\link[seqinr]{read.fasta}}.}
#'   Either "input" or "file" must be supplied but not both.
#' }
#' @param file The path to a FASTA file containing sequences to use. Either
#'   "input" or "file" must be supplied but not both.
#' @param output_format The format of the sequences returned. Either "character" or "DNAbin".
#' @param u_to_t If `TRUE`, then "U" in the sequence will be converted to "T".
#' 
#' @return A named character vector of sequences
#' 
#' @keywords internal
parse_seq_input <- function(input = NULL, file = NULL, output_format = "character", u_to_t = FALSE) {
  # Check parameters
  if (sum(! c(is.null(file), is.null(input))) != 1) {
    stop(call. = FALSE,
         "Either `file` or `input` must be supplied, but not both.")
  }
  
  if (! is.null(file) && (! is.character(file) || length(file) != 1)) {
    stop(call. = FALSE,
         "`file` must be a character vector of length 1 that is a valid path to a file.")
  }
  
  # Convert to common format
  if (output_format == "character") {
    if (! is.null(file)) {
      result <- read_fasta(file)
    } else if (length(input) == 0 || inherits(input, "character")) {
      result <- input
    } else if (inherits(input,"DNAbin")) {
      result <- toupper(vapply(as.character(input), paste, character(1), collapse = ""))
    } else if (inherits(input[[1]], "SeqFastadna") || inherits(input, "list")) {
      result <- vapply(input, paste, character(1), collapse = "")
    } else {
      stop(paste0('Could not parse sequence information of class "', class(input), '".'),
           call. = FALSE)
    }
    
    if (u_to_t) {
      result <- vapply(result, FUN = gsub, FUN.VALUE = character(1),
                       pattern = "U", replacement = "T", fixed = TRUE)
      result <- vapply(result, FUN = gsub, FUN.VALUE = character(1),
                       pattern = "u", replacement = "t", fixed = TRUE)
    }
    
  } else if (output_format == "DNAbin") {
    if (! is.null(file)) {
      if (u_to_t) {
        file <- make_fasta_with_u_replaced(file)
      }
      result <- ape::read.FASTA(file)
    } else if (length(input) == 0 || inherits(input, "character")) {
      if (u_to_t) {
        input <- vapply(input, FUN = gsub, FUN.VALUE = character(1),
                        pattern = "U", replacement = "T", fixed = TRUE)
        input <- vapply(input, FUN = gsub, FUN.VALUE = character(1),
                        pattern = "u", replacement = "t", fixed = TRUE)
      }
      result <- ape::as.DNAbin(strsplit(input, split = ""))
    } else if (inherits(input,  "DNAbin")) {
      result <- input
    } else if (inherits(input[[1]], "SeqFastadna") || inherits(input, "list")) {
      input <- lapply(input, function(x) {
        attributes(x) <- NULL
        return(x)
      })
      if (u_to_t) {
        input <- lapply(input, gsub, pattern = "U", replacement = "T", fixed = TRUE)
        input <- lapply(input, gsub, pattern = "u", replacement = "t", fixed = TRUE)
      }
      result <- ape::as.DNAbin(input)
    } else {
      stop(paste0('Could not parse sequence information of class "', class(input), '".'),
           call. = FALSE)
    }
  } else {
    stop(paste0('Invalid output format "', output_format, '".'))
  }
  
  return(result)
}

Try the metacoder package in your browser

Any scripts or data that you put into this service are public.

metacoder documentation built on April 4, 2023, 9:08 a.m.