tidy.micro: A Pipeline For Microbiome Analysis And Visualization

Documented in otu_filter

#' @title A function to aggregate low prevalence, abundance, or unwanted taxa together
#' @name otu_filter
#' @description Will take a tidi_micro set and aggregate the raw counts of taxa with a low prevalence and/or abundance into a new "Other" taxa. Can also find specific taxa you'd like to include in the "Other" taxa counts. Once the counts are aggregated taxa relative abundance, centered log ratio (CLR) transformations, and presence will be recalculated. This recalculation will only change the "Other" category
#' @param micro_set A tidy_micro data set
#' @param prev_cutoff Minimum percent of subjects with OTU counts above 0
#' @param ra_cutoff At leat one subject must have RA above this subject
#' @param exclude_taxa A character vector of OTU names that you would like filter into your "Other" category
#' @details \eqn{\frac{1}{Total}}{1/Total} will be added to each taxa count for CLR tranformations in order to avoid issues with log(0)
#' @return Returns a tidy_micro set
#' @author Charlie Carpenter and Dan Frank
#' @examples
#' data(phy); data(cla); data(ord); data(fam); data(met)
#'
#' otu_tabs = list(Phylum = phy, Class = cla, Order = ord, Family = fam)
#' set <- tidy_micro(otu_tabs = otu_tabs, meta = met) %>%
#' filter(day == 7) ## Only including the first week
#'
#' filter_set <- set %>%
#' otu_filter(prev_cutoff = 5, ## 5% of subjects must have this bug, or it is filtered
#'   ra_cutoff = 1, ## At least 1 subject must have RA of 1, or it is filtered
#'   excluted_taxa = c("Unclassified", "Bacteria") ## Unclassified taxa we don't want
#' )
#' @export
otu_filter <- function(micro_set, prev_cutoff = 0, ra_cutoff = 0, exclude_taxa = NULL){

  ## Table, Lib, Taxa name, meta data left over
  met <- micro_set %>% dplyr::select(-dplyr::matches("Table"), -dplyr::matches("Taxa"),
                                    -dplyr::matches("Total"), -dplyr::matches("bin"),
                                    -dplyr::matches("cts"), -dplyr::matches("clr"),
                                    -dplyr::matches("ra")
  ) %>%
    dplyr::distinct(Lib, .keep_all = T)

  ## Applies function to each rank
  micro_set %>%
    plyr::ddply(plyr::.(Table), function(set, meta){

      ## Pulls out Lib, Taxa, and counts and spreads to original otu structure
      otu <- set %>%
        dplyr::select(Lib, Taxa, cts) %>%
        dplyr::filter(!is.na(Taxa)) %>%
        tidyr::spread(Taxa, cts)

      Lib <- otu$Lib; total <- rowSums(otu[,-1]); rr <- unique(as.character(set$Table))

      ## applies all filters specified
      otu.f <- mul_filter(otu[,-1], prev_cutoff, ra_cutoff, total = total,
                          ex = exclude_taxa, rr = rr)

      ## recalculates ra, clr, and bin (just to recalculate the "Other")
      ra <- my_ra(otu.f, total) %>% dplyr::mutate(Lib = Lib)
      clr <- my_clr(otu.f, total) %>% dplyr::mutate(Lib = Lib)
      bin <- my_bin(otu.f, total) %>% dplyr::mutate(Lib = Lib)

      ## Recalculating totals (for "Other" category) and remaking cts
      Total <- rowSums(otu.f)
      cts <- otu.f %>% dplyr::mutate(Lib = Lib, Total = Total)

      ## Melted data
      m.ra <- ra %>% tidyr::gather(Taxa, ra, -Lib)
      m.clr <- clr %>% tidyr::gather(Taxa, clr, -Lib)
      m.bin <- bin %>% tidyr::gather(Taxa, bin, -Lib)
      m.cts <- cts %>% tidyr::gather(Taxa, cts, -Lib, -Total)

      ## Joining all counts and transformations
      long_OTU <- dplyr::left_join(m.bin, m.cts, by = c("Lib", "Taxa")) %>%
        dplyr::left_join(m.clr, by = c("Lib", "Taxa")) %>%
        dplyr::left_join(m.ra, by = c("Lib", "Taxa")) %>%
        dplyr::mutate(Table = rr) %>%
        dplyr::left_join(meta, by = "Lib")

      return(long_OTU)
    }, meta = met) %>%

    ## Reordering to original order
    dplyr::select(Table, Lib, Taxa, Total, bin, cts, clr, ra, everything()) %>%
    dplyr::filter(!is.na(Total)) %>%
    return()
}

## Filter functions to be applied within tidy_mibi
mul_filter <- function(otu, prev_cutoff, ra_cutoff, total, ex, rr){

  ## Prints out curent filtering Table if any filtering is done
  if( any( c(!is.null(ex), prev_cutoff > 0, ra_cutoff > 0) ) ){
    cat("Filter for", rr, "counts\n\n")
  }
  ## Different steps
  if(!is.null(ex)) otu %<>% ex_filter(ex)
  if(prev_cutoff > 0) otu %<>% prev_filter(prev_cutoff)
  if(ra_cutoff > 0) otu %<>% ra_filter(ra_cutoff, total)
  return(otu)
}

prev_filter <- function(otu, prev_cutoff){
  if ((prev_cutoff > 100) | (prev_cutoff < 0)) stop("Prevalence Cutoff must be between 0 and 100")

  starting_cts <- sum(otu)
  prevs <- apply(otu, 2, function(x) 100*sum(x > 0)/length(x))
    ## calculating prevalence and converting to a percent

  cat("Prevalence cutoff: ", prev_cutoff, "% (i.e., at least ", prev_cutoff,
      "% of libaries must be represented to keep OTU)\n",
      sep = "")
  cat("Found", ncol(otu), "OTUs.\n")

  if(sum(prevs < prev_cutoff) == ncol(otu)) stop("All OTUs would be excluded with this cutoff. Halting operation")
  if(sum(prevs < prev_cutoff) > 0) {
    include <- otu[,prevs >= prev_cutoff]
    exclude <- otu[,prevs < prev_cutoff]

    if(class(exclude) %in% c("matrix", "data.frame")) other <- rowSums(exclude)
    else if(is.numeric(exclude)) other <- exclude

    if ("Other" %in% names(include)) {
      include[,"Other"] <- include[,"Other"] + other
      cat("Found 'Other' category in input data.\n")
    } else {
      include$Other <- other
      cat("Created new 'Other' Taxa in OTU table for", ncol(otu)+1, "counts.\n")
    }

    if("Other" %in% names(exclude)){
      cat("Collapsed",sum(prevs < prev_cutoff)-1,"OTUs into 'Other' in OTU table.\n")
    }

    if("Other" %nin% names(exclude)){
      cat("Collapsed",sum(prevs < prev_cutoff),"OTUs into 'Other' in OTU table.\n")
    }

    cat("Converted",sum(exclude),"counts to 'Other' in otu category.\n")
    cat("Remaining OTUs:",ncol(include)," (Including 'Other').\n\n")

    ending_cts <- sum(include)
    if (ending_cts != starting_cts) stop("Ending counts not equal to starting counts!\n")

    return(include)
  }
  else {
    cat("No OTUs with prevalence < ", prev_cutoff, "\n\n", sep = "")
    return(otu)
  }
}

ra_filter <- function(otu, ra_cutoff, total) {
  if ((ra_cutoff > 100) | (ra_cutoff < 0)){
    stop("Relative abundance cutoff must be between 0 and 100 (%)")
  }

  starting_cts <- sum(otu) ## Starting counts

  cat("Relative abundance cutoff:",ra_cutoff,
      "\b% (i.e., at least one library must have RA >",ra_cutoff,
      "\b% to keep OTU).\n")
  cat("Found", ncol(otu), "OTUs.\n")

  ra <- my_ra(otu,total) ## Relative abundance
  ras <- apply(ra, 2, max) ## Max RA
  below_cutoff <- sum(ras < ra_cutoff) #store TRUE/FALSE for whether an OTU has a max abundance < cutoff
  if(below_cutoff == ncol(ra)) stop("All OTUs would be excluded with this RA cutoff. Halting operation!")

  if(below_cutoff > 0) {
    include <- otu[,ras >= ra_cutoff]  ## OTUs above ra_cutoff
    exclude <- otu[,ras < ra_cutoff]   ## OTUs below ra_cutoff

    if(class(exclude) %in% c("data.frame", "matrix")) other <- rowSums(exclude)
    else if(is.numeric(exclude)) other <- exclude

    if("Other" %in% names(otu)){
      cat("Found 'Other' category in input data.\n")
    } else cat("Created new 'Other' Taxa in OTU table for", ncol(otu)+1, "counts.\n")

    if ("Other" %in% names(include)) {
      include[,"Other"] <- include[,"Other"] + other
    } else include$Other <- other

    ending_cts <- sum(include)
    if (ending_cts != starting_cts) stop("Ending counts not equal to starting counts!\n")

    if("Other" %in% names(exclude)){
      cat("Collapsed",below_cutoff - 1,"OTUs into 'Other' in OTU table.\n")
    }

    if("Other" %nin% names(exclude)){
      cat("Collapsed",below_cutoff,"OTUs into 'Other' in OTU table.\n")
    }

    cat("Converted",sum(exclude),"counts to 'Other' otu category.\n")
    cat("Remaining OTUs:",ncol(include)," (Including 'Other').\n\n")

    return(include)
  }
  else {
    cat("No OTUs with max relative abundance <", ra_cutoff, "\b%.\n\n")
    return(otu)
  }
}

ex_filter <- function(otu, ex){
  starting_cts <- sum(otu)  ## Starting counts
  starting_nms <- names(otu) ## Starting names

  if("Other" %in% ex){
    warning("Can not remove 'Other' category from OTU tables. 'Other' will be the aggregated counts of filtered OTUs")
    ex <- ex[ex != "Other"] ## Removing "Other" from ex
    }

  if(any(ex %in% colnames(otu))){

    ## Printing the unwanted taxa that were found
    unc_sum <- 0
    for(i in 1:length(ex)) {

      if(ex[i] %in% names(otu)){
        cat("Found '", ex[i], "' category in input data.\n", sep = "")

        uncounts <- otu[,ex[i]] ## Pulling out the unwanted taxa
        unc_sum <- unc_sum + uncounts ## Running counter for counts collapsed

        if("Other" %in% colnames(otu)) {  ## Combining unclassified with other if Other exists
          if(class(uncounts) %in% c("data.frame", "matrix")){
            otu[ ,"Other"] <- otu[ ,"Other"] + rowSums(uncounts) ## rowSums if many unclassified
          } else otu[ ,"Other"] <- otu[ ,"Other"] + uncounts ## for only a vector of unclassified
         # cat("Found 'Other' category in input data.\n")
        } else {
          if(class(uncounts) %in% c("data.frame", "matrix")){
            otu$Other <- rowSums(uncounts) ## rowSums if many unclassified
          } else otu$Other <- uncounts  ## Making Other column if one doesn't exist
          cat("Created new 'Other' category.\n")
        }
      } else{
        cat("'", ex[i], "' not found in input data.","'", ex[i], "' might have been misspelled.
            \n", sep = "")
      }
    }

    otu <- otu[,names(otu) %nin% ex] ## Taking out ex so that it isn't there twice

    cat("Found", length(starting_nms), "OTUs.\n")
    cat("Collapsed",sum(ex %in% starting_nms),"OTUs into 'Other' in OTU table.\n")
    cat("Converted",sum(unc_sum),"counts to 'Other' otu category.\n")
    cat("Remaining OTUs:", ncol(otu)," (Including 'Other').\n\n")

  } else {
    cat("No taxa found in 'exlude_taxa' found: OTU counts unchanged.\n\n")
  }

  ending_cts <- sum(otu)

  if (ending_cts != starting_cts) stop("Ending counts not equal to starting counts!\n")
  otu
}

sum_fun <- function(Exp, obj, r){

  if(!is.numeric(Exp[,obj])) stop("obj from tidy_mibi must be numeric")
  if(!is.character(r)) stop("Table must be a character")

  Exp <- Exp %>% filter(Table==r)
  n <- tapply(Exp[,obj], Exp$Taxa, median) %>% names()
  m <- tapply(Exp[,obj],Exp$Taxa,median) %>% as.data.frame()
  i <- tapply(Exp[,obj],Exp$Taxa,IQR) %>% as.data.frame()
  Q2 <- tapply(Exp[,obj],Exp$Taxa,quantile,0.025) %>% as.data.frame()
  Q9 <- tapply(Exp[,obj],Exp$Taxa,quantile,0.975) %>% as.data.frame()

  if(r != "phylum") cat("Note: Phylum level names are unclassified taxa.")
  data.frame(
    n = n[!is.na(m)],
    Median = m[!is.na(m)],
    IQR = i[!is.na(m)],
    Quantile_2.5 = Q2[!is.na(Q2)],
    Quantile_97.5 = Q9[!is.na(Q9)]
  ) %>% return()
}

CharlieCarpenter/tidy.micro documentation built on Jan. 19, 2020, 6:28 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CharlieCarpenter/tidy.micro
A Pipeline For Microbiome Analysis And Visualization

R/Filter_Funs.R
In CharlieCarpenter/tidy.micro: A Pipeline For Microbiome Analysis And Visualization

Defines functions otu_filter

Documented in otu_filter

R Package Documentation

Browse R Packages

We want your feedback!

CharlieCarpenter/tidy.micro A Pipeline For Microbiome Analysis And Visualization

R/Filter_Funs.R In CharlieCarpenter/tidy.micro: A Pipeline For Microbiome Analysis And Visualization

Defines functions otu_filter

Documented in otu_filter

R Package Documentation

Browse R Packages

We want your feedback!

CharlieCarpenter/tidy.micro
A Pipeline For Microbiome Analysis And Visualization

R/Filter_Funs.R
In CharlieCarpenter/tidy.micro: A Pipeline For Microbiome Analysis And Visualization