equinizeR: Manage Equine Marker Gene Data

#' Read in ASV table output from DADA2 pipeline
#' @param path The path of ASV table
#' @export
read_asv_table <- function(path)
  read.table(path, sep=",", header=T, row.names = 1)

#' Presence of ASVs over a certain percentage
#' This function checks for ASVs that are above a proportional abundance (in percentage)
#' at a user specified confidence threshold (also as a percentage)
#' @param asv_table The custom ASV table generated by the DADA2 pipeline
#' @param percentage The minimum proportional abundance for an ASV as a percentage
#' @param confidence The confidence threshold for checking ASV abundances
#' @sample_indices The indices of all samples and their ASV counts in the asv_table
#' @export
presence_of_asvs_over_percentage <- function(asv_table, percentage, confidence, sample_indices) {
  asv_table$proportion <- rowSums(asv_table[sample_indices]/sum(rowSums(asv_table[sample_indices])))
  asv_table$percent <- asv_table$proportion * 100
  match_criteria <- asv_table %>% filter(percent > percentage & species.conf < confidence)
  asv_table$proportion <- NULL ; asv_table$percent <- NULL
  return(match_criteria)
}

#' A function to filter the custom ASV table geenerated by the DADA2 pipeline
#' filters based on sample identifier and confidence threshold
#' @param asv_table The custom ASV table geneated by the DADA2 pipeline
#' @param samples String identifier to select desired samples (ie. by population, default is all)
#' @param confidence The desired confidence threshold
#' @export
filter_asv_tab <- function(asv_table, samples = "all", confidence) {
  if (samples != "all") {
    filtered_table <- asv_table %>% filter(species.conf >= confidence) %>% select(c("species", contains(samples)))}
  else {
    filtered_table <- asv_table %>% filter(species.conf >= confidence)
  }
  return(filtered_table)
}

#' A function to compute the number of species per sample in a filtered ASV table
#' NOTE: only works when ASV table is filtered by a specific population, not with default output using samples = "all"
#' @param asv_table filtered ASV table output from filter_asv_tab()
#' @export
number_of_species_per_sample <- function(asv_table){
  zero_rows <- which(rowSums(asv_table[-1])==0)
  asv_table_filtered <- asv_table[-c(zero_rows),]
  aggregated <- aggregate(.~species, data=asv_table[-c(zero_rows),], FUN=sum)
  species_per_sample <- colSums(aggregated[-1] != 0)
  species_per_sample
  return(species_per_sample)
}

#' A function that returns a percentage of the number of reads with assigned taxonomy
#' past a specified confidence threshold (per sample)
#' @param asv_table ASV table output
#' @param samples Character string specifying population of interest
#' @param confidence Confidence threshold to test for
#' @export
percent_reads_assigned_taxa <- function(asv_table, samples="all", confidence){
  tab_0 <- filter_asv_tab(asv_table = asv_table, samples=samples, confidence=0)
  tab_filt <- filter_asv_table(asv_table = asv_table, samples=samples, confidence=0)

  return(colSums(tab_filt[2:ncol(tab_filt)]) / colSums(tab_0[2:ncol(tab_0)]))
}