R/taxonomy_functions.R
In RAM: R for Amplicon-Sequencing-Based Microbial-Ecology

valid.taxonomy <- function(data) {
  prefix <- c("k__", "p__", "c__", "o__", "f__", "g__", "s__")

  if ( class(data) != "list" ) {
    stop("please provide input data as a list, see ?RAM.input.formatting")
  }

  labels <- names(data)
  alert.list <- list()
  for ( i in 1:length(data) ) {
    otu <- data[[i]]
    if ( is.null(otu) ) { break }
    label <- names(data)[i]
    # check whether missing prefix for each rank
    valid.OTU(otu)
    # need to convert all taxonomy to character, NOT factor, 
    # otherwise, gsub wouldn't work
    otu$taxonomy <- as.character(otu$taxonomy)
    alert <- vector()
    for ( j in 1:nrow(otu) ) {
       # remove the ; at the end of the lineage
       if ( grepl(";[ ]+$", otu[j,"taxonomy"], perl=TRUE) ) {         
           otu[j,"taxonomy"] <- gsub(";[ ]+$", "", otu[j,"taxonomy"], 
                                   perl=TRUE)
       } else if ( grepl("[;]+$", otu[j,"taxonomy"], perl=TRUE) ) { 
          otu[j,"taxonomy"] <- gsub("[;]+$", "", otu[i,"taxonomy"], 
                                       perl=TRUE)
       } else {
           otu[j,"taxonomy"] <- otu[j,"taxonomy"]
       }
        # now check # of '; ' and ';'
      count1 <- sapply(regmatches("; ", gregexpr("; ", 
                          otu[j,"taxonomy"])), length) 
      count2 <- sapply(regmatches(";", gregexpr(";", 
                          otu[j,"taxonomy"])), length)
      if ( count1 == 0 && count2 ==0 ) {
         # only kingdom information avaiable 
         # otuID  k__fungi
         next
      } 
      if (count1 == 0 && count2 > 0 ) {
         # otu was separated by ';', not '; '
         alert <- c(alert, "taxonomy lineages are not properly formatted, please check ?RAM.rank.formatting; taxonomic ranks should be separated by '; ', i.e. ';' and a white space")
      } 
      if ( count2 >=7 ) {
         alert <- c(alert, "RAM accept 7 taxonomic ranks, see ?RAM.rank.formatting; the otu table has more than 7 taxonomic ranks") 
      }
      miss_pre <- vector()
      miss_rk <- vector()
      for ( l in prefix ) {
         if ( !grepl(l, otu[j,"taxonomy"]) ) {
              miss_pre <- c(miss_pre, l)
              miss_rk <- c(miss_rk, .get.rank.name(l))
           }
       }
       if ( length(miss_pre) != 0 ) {
         alert<- c(alert, paste("missing prefix or missing taxonomy ranks in the lineages; 1) if missing prefix, consider reformatting the taxonomy; 2) if missing ranks only, it's probably ok" ))
       } else {
         next;
       }
     }
     len <- unique(alert)
     alert <- paste(unique(alert), collapse="\n")

     if ( len == 0 || length(len) == 0 ) {
        warning("Format of the taxonomy column is good")
     } else {
        y <- "Consider reformatting the OTU table's taxonomy, check ?RAM::reformat.taxonomy"
        warning(paste0(paste("For ", label, ": ", sep=""), alert, "\n", y, sep="; "))
     }  
   }
}


.capitalize.vec <- function(vec) {
  output <- vector()
  for ( i in vec ) {
    output <- c(output, .capitalize(i))
  }
  return(output)
}


reformat.taxonomy <- function(data, input.ranks=NULL, sep="; ") {
  
  if ( class(data) != "list" ) { 
    stop("provide all otu tables as list. See ?RAM.input.formatting")
  }
  new.otu <- list()
  labels <- names(data)
  # process each otu
  for (i in 1:length(data) ) {
    label <- names(data)[i]
    otu <- data[[i]]
    if (is.null(otu)) {
      break
    }
    valid.OTU(otu)
    ls <- list()
    ls[[label]] <- otu
    valid.taxonomy(data=ls)
    
    # default outputs for RAM taxonomy
    prefix <- c("k__", "p__", "c__", "o__", "f__", "g__", "s__")
    names <- c("kingdom", "phylum", "class", "order", "family",
               "genus", "species")
    otu$taxonomy <- as.character(otu$taxonomy)
    
    # reformat 
    # check whether the 'input.ranks include names
    input.ranks <- tolower(input.ranks)
    if ( is.null(input.ranks) || !any(input.ranks %in% names) ) {
      stop("please provide ALL taxonomic ranks in the input OTU tables, see ?reformat.taxonomy")
    } 
    
    # split the current taxonomy column
    # remove the last ";" at the end of line
    otu$taxonomy <- gsub("[; ]+$", "", otu$taxonomy, perl=TRUE)
    suppressWarnings(otu.split <- col.splitup(otu, col="taxonomy", 
                                              sep=sep, drop=TRUE,
                                              names=input.ranks))
    
    # select columns only belong to 
    n <- which(colnames(otu.split) %in% names)
    n.names <- intersect(colnames(otu.split), names)
    n.otu <- ncol(otu)
    otu.split <- otu.split[, c(1:(n.otu-1), n)]
    
    # replace the 'unclassified' taxonomy annotation to proper format
    for (i in n.names) {
      rank_pat <- .get.rank.pat(i)
      rank_pat2 <- paste0(rank_pat, rank_pat)
      otu.split[[i]] <- paste0(substring(i,1,1), "__", 
                               otu.split[[i]])
      otu.split[[i]] <- gsub(rank_pat2, rank_pat, otu.split[[i]])
    }
    # combine to 'taxonomy' column
    n.split <- ncol(otu.split)
    for (i in 1:nrow(otu.split) ) {
      row<-vector();
      n <- n.otu:n.split
      for (j in n) {
        row<-c(row, otu.split[i,j])
      }
      otu.split[i, "taxonomy"] <- paste(row, collapse="; ");
      otu.split[i, "taxonomy"] <- gsub(";[ ]+$", "", 
                                       otu.split[i,"taxonomy"], 
                                       perl=TRUE)
    }  
    otu.split <- otu.split[, c(1:(n.otu-1), ncol(otu.split))]
    new.otu[[label]] <- otu.split
  }      
  return(new.otu)
}
  

get.rank <- function(otu1, otu2=NULL, rank=NULL) {
  single.otu <- is.null(otu2)
  single.rank <- !is.null(rank)
  valid.OTU(otu1, otu2)
  
  tax.classes <- c("kingdom", "phylum", "class", "order", "family", 
                   "genus", "species")
  
  # if given both otu and otu2, call get.rank for both
  if (!single.otu) {
    
    output <- list()
    output$otu1 <- get.rank(otu1=otu1, rank=rank)
    # this looks ugly, but we are just calling get.rank with a single
    # OTU argument (which is named otu2)
    output$otu2 <- get.rank(otu1=otu2, rank=rank)
    
   # if (single.rank) {
   #   names(output)$otu1 <- .get.rank(.get.rank.ind(rank))
   #   names(output$otu2) <- .get.rank(.get.rank.ind(rank))
   # } else {
   #   names(output$otu1) <- tax.classes
   #   names(output$otu2) <- tax.classes
   # }
    
    return(output)
  }
  
  if (single.rank) {
    .valid.rank(rank)
    
    # select only the rows with the given taxonomic prefix in their taxonomy
    pat <- .get.rank.pat(rank)
    output <- otu1[grep(pat, otu1$taxonomy), ]
    
    # now only select the ones that are NOT on the blacklist
    remove.pat <- paste0(.blacklist(rank), "|no_taxonomy", paste0("|", pat,"$"))
    
    output <- output[!grepl(remove.pat, output$taxonomy, ignore.case = TRUE, perl=TRUE), ]
    
    if (dim(output)[1] == 0) {
      warning(paste("no OTUs classified at the", .get.rank(.get.rank.ind(rank)),
        "level."))
    }
    return(output)
    
  } else { # call get.rank for each taxonomic classification
    output <- vector(mode="list", length=length(tax.classes))
    
    for (i in tax.classes) {
      output[.get.rank.ind(i)] <- list(get.rank(otu1=otu1, rank=i))
    }
    
    names(output) <- tax.classes
    return(output)
  }
}

tax.split <- function(otu1, otu2=NULL, rank=NULL) {
  valid.OTU(otu1, otu2)
  single.otu <- is.null(otu2)
  single.rank <- !is.null(rank)
  tax.classes <- c("kingdom", "phylum", "class", "order", "family", "genus", "species")
  
  # if given otu and otu2, call tax.split for both
  if (!single.otu) {
    
    output <- list()
    
    output$otu1 <- tax.split(otu1=otu1, rank=rank)
    output$otu2 <- tax.split(otu1=otu2, rank=rank)
    
    return(output)
  }
  
  if (single.rank) {
    # get the index for rank (also does data validation for rank)
    .valid.rank(rank)
    tax.ind <- .get.rank.ind(rank)
  } 
  
  # split OTU table taxonomy information into individual columns
  suppressWarnings(otu1.split <- col.splitup(otu1, col="taxonomy", sep="; ", 
                   max=length(tax.classes), names=tax.classes, drop=TRUE))
  
  # columns from 1 to max contain the numeric data, the other taxonomic
  max <- dim(otu1)[2] - 1
  
  # we need seven taxonomy columns; if one is missing (because nothing classified
  # at that level), add empty column
  
  # while we have less columns than we should...
  #while (dim(otu1.split)[2] < max + length(tax.classes)) {
    # ...add a column filled with empty strings
  #  otu1.split <- cbind(otu1.split, rep("", times=dim(otu1.split)[1]))
  #}
  
  # strip the 'formatting' bits of taxonomic info
  otu1.split[ ,-(1:max)] <- gsub("k__|p__|c__|o__|f__|g__|s__|;", "", 
        as.matrix(otu1.split[ ,-(1:max)]))
  
  if (single.rank) {
    # add the single taxonomic column to the numeric data, return that data frame
    return(otu1.split[ , names(otu1.split) %in% c(names(otu1)[1:max], tax.classes[tax.ind])])
    
  } else {
    # set up list for output
    otu1.taxa <- vector("list", length(tax.classes))
    names(otu1.taxa) <- tax.classes 
    
    for (i in 1:length(tax.classes)) {
      # creating a list of data frames is surprisingly difficult in R;
      # if you do not use the list(df1, df2, ...) syntax, you get a list composed
      # of the first data frame in its entirety, followed be the individual columns
      # of the other data frames. Instead we wrap each data frame in a list itself 
      # before we add it, then we can access them with [[]]
      otu1.taxa[i] <- list(otu1.split[ , names(otu1.split) %in% c(names(otu1)[1:max],
                   tax.classes[i])])
    }
    
    return(otu1.taxa)
  }
}

tax.abund <- function(otu1, otu2=NULL, rank=NULL,            
                      drop.unclassified=FALSE, 
                      top=NULL, count=TRUE, mode="number") {
  
  single.otu <- is.null(otu2)
  valid.OTU(otu1, otu2)
  single.rank <- !is.null(rank)
  # data validation for top is done later in the function (when the dimensions of 
  # the taxonomy matrix are known)
  filter <- !is.null(top)
  
  if (!is.logical(drop.unclassified) || length(drop.unclassified) != 1L) {
    stop("argument 'drop.unclassified' must be a logical vector of length one.")
  }
  
  if (!is.logical(count) || length(count) != 1L) {
    stop("argument 'count' must be a logical vector of length one.")
  }
  
  if (!is.character(mode) || !any(mode %in% c("number", "percent"))) {
    stop("argument 'mode' must be one of 'number' or 'percent'.")
  }
  
  # if given otu and otu2, call tax.abund for both
  if (!single.otu) {
    
    drop <- drop.unclassified
    output <- list()
    
    output$otu1 <- tax.abund(otu1, rank=rank, drop.unclassified=drop, top=top,
         count=count, mode=mode)
    output$otu2 <- tax.abund(otu2, rank=rank, drop.unclassified=drop, top=top,
         count=count, mode=mode)
    
    return(output)
  }
  
  # get the OTU table in proper format
  if (single.rank) {
    .valid.rank(rank)
    tax.list <- list(tax.split(otu1, rank=rank))
  } else {
    tax.list <- tax.split(otu1)
  }
  
  for (i in seq(along=tax.list)) { 
    
    # update taxonomy label to "taxonomy"
    names(tax.list[[i]])[dim(tax.list[[i]])[2]] <- "taxonomy" 
    # aggregate the otu table by taxonomy rank names 
    tax.list[[i]] = stats::aggregate(tax.list[[i]][ , -dim(tax.list[[i]])[2]],  
          by = list(tax.list[[i]]$taxonomy), FUN = .sumcol) 
    # change row names to header provided by aggregate
    rownames(tax.list[[i]]) <- tax.list[[i]][ , 1] 
    # remove first column (that information is now in the row names)
    tax.list[[i]] <- tax.list[[i]][ , -1]
    # transpose table (the as.data.frame generates the V1 heading) 
    tax.list[[i]] <- as.data.frame(t(tax.list[[i]])) 
    
    # if count is false, return relative abundance
    if (!count) {
      tax.list[[i]] <- vegan::decostand(tax.list[[i]], method="total")
    }
    
    # order the table by column sums
    tax.list[[i]] <- tax.list[[i]][ , order(colSums(tax.list[[i]]), 
            decreasing = TRUE), drop=FALSE] 
    
    # remove all zero entries
    tax.list[[i]] <- tax.list[[i]][ , colSums(tax.list[[i]]) > 0, drop=FALSE]
    
    # rename the "V1" column
    if (!single.rank) {
      names(tax.list[[i]])[names(tax.list[[i]]) == "V1"] <- 
    paste("unclassified_below_", .get.rank(i - 1), sep="")
    } else {
      # if we are only processing one element, we cannot use the i index 
      # to get the correct rank
      names(tax.list[[i]])[names(tax.list[[i]]) == "V1"] <- 
    paste("unclassified_below_", .get.rank(.get.rank.ind(rank) - 1), sep="")
    }
    
    # remove unclassified columns
    if (drop.unclassified) {
      # this selects all columns NOT containing in the blacklist
      remove.pat <- gsub(.get.rank.pat(.get.rank(i)), "", paste0(.blacklist(.get.rank(i)), "|no_taxonomy"))
      
      tax.list[[i]] <- tax.list[[i]][ , !grepl(remove.pat, names(tax.list[[i]]),
               ignore.case=TRUE), 
             drop=FALSE]
    }
    
    # keep only the 'top' most abundant groups, where top is user-given 
    if (filter) {
      tax.list[[i]] <- .select.top(tax.list[[i]], top, count, mode)
    }
  }

  tax.out <- list()
  index <- 1
  for ( i in 1:length(tax.list) ) {
    tax <- tax.list[[i]]
    if ( is.null(tax) ) { break }
    lab <- names(tax.list)[i]
    if (is.null(lab)) {
      lab1 <-index
    } else {
       lab1 <- lab
    }
    names(tax) <- gsub(" ", "_", names(tax))
    tax.out[[lab1]] <- tax
    index <- index + 1
  }
  
  if (single.rank) {
    return(tax.out[[1]])
    
  } else {
    return(tax.out)
  }
}

.select.top <- function(abund, top, count, mode) {
  if (!is.numeric(top) || length(top) != 1) {
    stop("argument 'top' must be a numeric vector of length one.")
  }
  
  if (!is.character(mode) || !any(mode %in% c("number", "percent"))) {
    stop("argument 'mode' must be one of 'number' or 'percent'.")
  }
  
  if (top <= 0) {
    stop("argument 'top' must be greater than zero.")
  }
  
  # take top X samples when mode == "number"
  if (mode == "number") {
    num.groups <- dim(abund)[2]
    
    if (top > num.groups) {
      warning("argument 'top' is greater than the total number of groups, returning all groups.")
      top <- num.groups
    }
    
    abund <- abund[ ,1:top, drop=FALSE]
    
    
  # find all samples with RA > X% when mode == "percent"
  } else if (mode == "percent") {
    
    if (top > 100) {
      stop("argument 'top' must be in the range (0, 100].")
    }
    
    percent <- top / 100
    
    # we need the 'count' parameter to determine whether we need to standardize
    # the data ourselves or not
    if (count) {
      abund.stand <- vegan::decostand(abund, "total")
    } else {
      abund.stand <- abund
    }
    
    abund.max <- apply(abund.stand, MARGIN=2, FUN=max)
    # find samples w/ max relative read abundance < 'top'% and remove
    exclude <- names(which(abund.max < percent))
    
    # if all groups have been excluded
    if (length(exclude) == dim(abund)[2]) {
      stop("no taxon groups with relative abundance above 'top' percent.")
    }
    
    # select the samples above 'top'%
    abund <- abund[ ,-(which(names(abund.max) %in% exclude)), drop=FALSE]
  }
  
  abund
}

tax.fill <- function(data, downstream=TRUE) {
  
  valid.OTU(data)
  
  if (length(downstream) != 1L || !is.logical(downstream) || is.na(downstream)) {
    stop("'downstream' must be a character vector of length one.")
  }
  
  if (downstream) {
    range <- 1:7
    #belong <- "belongs_to_"
    belong <- "_sp."
  } else {
    range <- 7:1
    #belong <- "classified_to_"
    belong <- "classified_to_"
  }
  
  taxonomy <- strsplit(data$taxonomy, "; ")
  taxonomy.length <- length(taxonomy)
  
  # initialize the vector of classified names
  classified.names <- rep("no_taxonomy", times=taxonomy.length)
  # initialize list for fixed taxonomy
  new.taxonomy <- vector(mode="list", length=taxonomy.length)
  
  for (i in range) {
    # this selects the i-th element of every list and returns a character vector
    current.groups <- unlist(lapply(taxonomy, FUN="[", i))

    # any entries that are TRUE (i.e. match blacklist) need to be replaced
    # we add that pattern to match any group prefix with no name 
    # (since we split on semicolons the rank argument in .blacklist won't work)
    blacklist <- paste0(paste(.blacklist(), collapse="|"), "|[kpcofgs]__$", sep="")
    
    replace <- is.na(current.groups) | grepl(blacklist, current.groups,
             ignore.case = TRUE)
    
    num.groups <- length(current.groups)

    for (j in 1:num.groups) {
      
      # if we need to replace the classification, use last good name
      if (replace[j]) {
          if (downstream) {
              new.name <- paste0(.get.rank.pat(.get.rank(i)), 
                        gsub("[kpcofgs]__", "", classified.names[j], perl=TRUE), belong)
          } else {
             new.name <- paste0(.get.rank.pat(.get.rank(i)), belong,
             gsub("__", "_", classified.names[j]))
          }
    
          new.taxonomy[[j]][i] <- new.name
      } else { # if the classification is good, store it
          classified.names[j] <- current.groups[j]
          new.taxonomy[[j]][i] <- classified.names[j]
      }
    }

  }
  
  new.taxonomy <- lapply(new.taxonomy, FUN=paste, collapse="; ")
  
  # return a new data frame with the updated taxonomy
  # (do not mutate the old data frame)
  data.frame(data[ ,-dim(data)[2], drop=FALSE], taxonomy=unlist(new.taxonomy),
     stringsAsFactors = FALSE)
}


LCA.OTU <- function(otu, strip.format=FALSE, drop=TRUE) {

  valid.OTU(otu)
  tax.classes <- c("kingdom", "phylum", "class", "order", "family", 
                   "genus", "species")

  # split taxonomy column by ;
  suppressWarnings(otu.split <- col.splitup(otu, col="taxonomy", sep="; ", 
                 max=length(tax.classes), names=tax.classes, drop=TRUE))
  # otu table column#  
  max <- ncol(otu) - 1
  
 # strip the 'formatting' bits of taxonomic info
 blacklist<-vector()
 for ( i in c("k__", "p__", "c__", "o__", "f__", "g__", "s__") ) {
     blacklist<-c(blacklist,paste(i, .blacklist(), sep=""))
 }
 blacklist <- paste(blacklist, collapse="|")
 blacklist <- paste(blacklist, "|k__|p__|c__|o__|f__|g__|s__|;", sep="")

# remove entries matched blacklist (unclassified taxa)
  otu.split[ ,-(1:max)] <- gsub(blacklist, "", 
        as.matrix(otu.split[ ,-(1:max)]), ignore.case = TRUE)

  otu.split[ ,-(1:max)] <- gsub("^[ ]+", "", 
       as.matrix(otu.split[ ,-(1:max)]), perl=TRUE)

  # obtain LCA of each OTU
  # replace "" to NA
  otu.split[ otu.split == "" ] <- NA
  

  # whether or not strip formatting: e.g. g__
  otu.split$LCA <- apply(otu.split, 1, function(x) tail(unlist(x [ 
                        which(!is.na(x)) ]), n=1))

  if ( strip.format ) {
      otu.split$LCA <- otu.split$LCA
  } else {
      n.r <- nrow(otu.split)
      n.c <- ncol(otu.split)
      for ( i in 1:n.r ) {
           num <- which(otu.split[i, 1:(n.c-1)]==otu.split$LCA[i])
           num <- num[length(num)]
           pat <- .get.rank.pat(names(otu.split)[num])
           otu.split$LCA[i] <- paste(pat, otu.split$LCA[i], 
                                  sep="")
      }
  }
  
  # whether or not remove taxonomy split ranks
  output <- otu.split[, setdiff(colnames(otu.split),"taxonomy")]
  if ( drop ) { 
      # keep only LCA for taxonomy info
      output <- cbind(otu[, 1:max], output$LCA)
      names(output)[ncol(output)] <- "LCA"
  } else { 
      output <- cbind(output, otu$taxonomy)
      names(output)[ncol(output)] <- "taxonomy"
  }
  return(output)
}


col.splitup <- function(df, col="", sep="", max=NULL, names=NULL, drop=TRUE) { 
    # validate all inputs
    if ( sep == "" ) {
        stop(paste("\n", "    Error: separator ?  check ?col.splitup  ", "\n", sep=""))
    }
    if ( col == "" )  {
       stop(paste("\n", "    Error: column to split? check ?col.splitup  ", "\n", sep=""))
    }
    if ( !(col %in% names(df)) ) {
        stop(paste("\n", "    Error: column to be split is not in the dataframe", "\n", sep=""))
    }  
    
    # col position in df
    if ( col %in% names(df) ) {
        num.col <- which( names(df) %in% col )
    }
    
    # split the column to list;
    list <- strsplit(df[[col]], sep, fixed=FALSE); 
    vec <- vector();

    # determine max number of split columns to be remained in output
    for (i in 1:length(list) ) {
      vec<-c( vec, length(list[[i]]) );
    }

    def.max <- max(c(max, length(names)))
    maximum <- c( max, max(vec), length(names) )
    max <- max(maximum)
    
    if ( max(vec) > def.max ) {
#        warning(paste("\n", "    ", col, " can be split to: ", max(vec), " coloumns; ", 
#                 "\n", "    required to be split to ", def.max, " columns; ", "\n", 
#                  "    Will KEEP all ", max(vec), " and IGNORE user defined ",  def.max, 
#                 " columns", sep=""))
     } else if ( max(vec) < def.max )  {
            #warning(paste("\n", "    ", col, " can be split to: ", max(vec), 
#                " columns; ", "\n", "    required to be split to: ", max, 
 #               " columns; ", "\n", "    column names provided: ", length(names), "\n", 
 #               "    Will fill empty strings in the additional ", def.max-max(vec), 
 #              " column(s)", sep=""))
      } else {
        #warning(paste("\n", "    ", col, " can be split to: ", max(vec), " coloumns; ", 
#                 "\n", "    required to be split to ", def.max, " columns; ", "\n", 
#                  "    ", col, " will be split to: ", max(vec), " columns", sep=""))
     }
   

    for ( i in 1:length(list) ) {
      # since max is equal to or larger than length(list[[i]]) as defined by section above
      if ( length(list[[i]]) < max ) {
        x=rep( "", max-length(list[[i]]) );
        list[[i]] <- c(list[[i]], x)
      } else {
        list[[i]] <- list[[i]]
      }
    } 
    
    # rbind to form a data frame of split columns    
    new<-as.data.frame( do.call("rbind", list) )
    
    # names of new columns
    if ( is.null(names) ) {
       new.name <- colnames(new)
    } else {
      if ( length(names) == max ) {
        new.name <- names
      } else if ( length(names) < max ) {
      #warning(paste("\n", "    ", col, " being split to: ", max, 
      #          " columns;", "\n", "    column names provided: ", length(names), "; ", "\n",
      #          "    will only change the first ", max, " of split columns", sep=""))
        new.name <- c(names, colnames(new)[(length(names)+1):max])
      # } 
      # since max is the maximum of length(names), pre-defined max and max(vec), so 
      # following condition is not possible
      #  else if ( length(names) > max ) {
      #     warning(paste("\n", "    remained number of split columns from ", col, " is: ", max, 
      #          ";", "\n", " number of names provided: ", length(names), "; ", "\n",
      #         "     will ignore the last ", length(names) -max, " of names", sep=""))
      #  new.name <- names[1:max]
      } else {
        new.name <- colnames(new)
      }
    }
    
    colnames(new) <- new.name    
    # for data.table dt[, setdiff(colnames(dt),col), with=FALSE] 
    if ( ! drop ) {
        #warning(paste("    Keep ", col, " column in output!", sep=""))
        df.new <- cbind(df, new)
    } else {
        #warning(paste("    Drop ", col, " column in output!", sep=""))
        df.new <- cbind(df[, setdiff(colnames(df),col)], new)
    }
    return(df.new)
}
    
transpose.LCA <- function(data) {
  .valid.data(data)
  labels <- names(data)

  lca.t.list <- list()  
  for ( i in 1:length(data) ) {
    label <- names(data)[i]
    otu <- data[[i]]
    if ( is.null(otu) ) { break }
    
    lca <- LCA.OTU(otu, strip.format=FALSE, drop=TRUE)
    lca <- lca[order(rowSums(lca[,-ncol(lca)]), decreasing=TRUE),]
    rownames(lca) <- paste(lca$LCA, rownames(lca), sep="_")
    lca <- lca[, -ncol(lca)]
    lca.t <- as.data.frame(t(lca))
    lca.t.list[[label]] <- lca.t
  }
  return(lca.t.list)
}
Any scripts or data that you put into this service are public.
RAM documentation built on May 2, 2019, 3:04 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
RAM
R for Amplicon-Sequencing-Based Microbial-Ecology

R/taxonomy_functions.R
In RAM: R for Amplicon-Sequencing-Based Microbial-Ecology

Try the RAM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RAM R for Amplicon-Sequencing-Based Microbial-Ecology

R/taxonomy_functions.R In RAM: R for Amplicon-Sequencing-Based Microbial-Ecology

Try the RAM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RAM
R for Amplicon-Sequencing-Based Microbial-Ecology

R/taxonomy_functions.R
In RAM: R for Amplicon-Sequencing-Based Microbial-Ecology