R/filtR_function.R

#' Filters OTUs Generated by PCR or Sequencing Error from Count Tables
#'
#' Using propr and ALDEx2 functions, filtR removes OTUs generated by PCR and sequencing error from count tables. The input for this package is a count table with the columns as samples and the rows as OTUs/ASVs.
#' @param count_file Input your saved count table file.
#' @param rho_CO Input your rho value cutoff. Range of 0 to 1. Higher value results in a tighter filter with fewer OTUs being discarded. Defaults to 0.7.
#' @param clr_CO Input your centered log ratio difference cutoff. Higher value results in a tighter filter with fewer OTUs being discarded. Defaults to 5.
#' @param plot Set to TRUE to create scatter plots of the CLRs of the filtered OTU plotted against the CLRs of the OTU from which they are derived. The black, solid line is the ideal slope of the line for association (slope of 1) and the red dashed line is the slope of the linear model of the data. Defaults to FALSE.
#' @return Count table with error-derived OTUs removed.
#' @export
#' @examples
#' function(count_file='SRP058027_taxonomy_abundances_SSU_v4.1.tsv', rho_CO=0.7, clr_CO=5, plot=TRUE)
filtR <- function(count_file, rho_CO=0.7, clr_CO=5, plot=FALSE) {
  m <- read.table(count_file)
  taxcol <- c()
  for(i in 1:ncol(m)) {
    if(class(m[,i]) == "factor") {
    taxcol <- c(taxcol, i)
    }
    else {
      next
    }
  }
  if(length(taxcol) == 0) {
    m.n0 <- m
  }
  else {
    m.n0 <- m[,-taxcol]
  }
  rownames(m.n0) <- 1:nrow(m.n0)
  m.clr <- aldex.clr(m.n0, conds = rep("x", ncol(m.n0)))
  m.propr <- aldex2propr(m.clr)
  m.propr <- m.propr[">", rho_CO]
  if(length(m.propr@pairs) == 0) {
    stop('Rho cutoff resulted in no pairs of associated features being identified. Consider relaxing the rho cutoff.', call. = FALSE)
    }
  else {
    m.propr.simp <- simplify(m.propr)
    }
  rhopairs <- data.frame()
  coords <- for(i in m.propr.simp@pairs) {
       if(i %% ncol(m.propr.simp@logratio) == 0) {
           rhopairs <- rbind(rhopairs, c(ncol(m.propr.simp@logratio), i %/% ncol(m.propr.simp@logratio)))
       }
       else {
           rhopairs <- rbind(rhopairs, c(i %% ncol(m.propr.simp@logratio) , i %/% ncol(m.propr.simp@logratio) +1))
       }
  }
  clrmeans <- colMeans(m.propr.simp@logratio)
  clrmeans <- as.vector(clrmeans)
  clrdiffer <- function(x) {
    clrmeans[rhopairs[,1]] - clrmeans[rhopairs[,2]]
  }
  clrdiff <- sapply(1, clrdiffer)
  taxpairs <- as.numeric(colnames(m.propr.simp@logratio))
  taxa <- m[taxpairs,1]
  taxa <- as.character(taxa)
  final1 <- cbind(colnames(m.propr.simp@logratio[,rhopairs[,1]]), colnames(m.propr.simp@logratio[,rhopairs[,2]]), format(round(m.propr.simp@matrix[m.propr.simp@pairs], 4), nsamll=4), format(round(clrdiff, 4), nsmall=4), taxa[rhopairs[,1]], taxa[rhopairs[,2]])
  colnames(final1) <- c("OTU/ASV_1", "OTU/ASV_2", "Rho_Value", "CLR_Difference", "Taxonomy_of_OTU/ASV_1", "Taxonomy_of_OTU/ASV 2")
  final5 <- final1[which(abs(as.numeric(final1[,4])) > clr_CO),]
  if(class(final5) == "character"){
    final5 <- as.matrix(final5)
    final5 <- t(final5)
  }
  m.neg <- as.numeric(final5[which(as.numeric(final5[,4]) < 0), 1])
  m.pos <- as.numeric(final5[which(as.numeric(final5[,4]) > 0), 2])
  m.remove <- unique(sort(c(m.pos, m.neg)))
  if(length(m.remove) == 0) {
    stop('CLR cutoff resulted in no OTUs/ASVs being discarded. Consider relaxing the CLR cutoff.', call. = FALSE)
  }
  print(paste('Number of OTUs/ASVs filtered from count table: ', length(m.remove)))
  filtR_table <- m[-m.remove,]
  if(plot == TRUE) {
    for(r in 1:nrow(final5)){
      plot.default(m.propr@logratio[,as.numeric(final5[r,1])], m.propr@logratio[,as.numeric(final5[r,2])], xlab = paste('CLR of OTU', final5[r,1]), ylab = paste('CLR of OTU', final5[r,2]), main = paste('Rho:', round(as.numeric(final5[r,3]), digits=3), ',', 'CLR Difference:', round(as.numeric(final5[r,4]), digits=3), ',', 'Slope:', round(lm(m.propr@logratio[,as.numeric(final5[r,2])]~m.propr@logratio[,as.numeric(final5[r,1])])$coefficients[2], digits=3)))
      abline(lm(m.propr@logratio[,as.numeric(final5[r,2])]~m.propr@logratio[,as.numeric(final5[r,1])]), lty=2, col="red")
      abline(lm(m.propr@logratio[,as.numeric(final5[r,2])]~m.propr@logratio[,as.numeric(final5[r,1])])$coefficients[1], 1)
    }
  }

  return(filtR_table)
}
bjoris33/filtR documentation built on May 31, 2019, 7:40 a.m.