This function allows to read and reformat the output taxonomy file from mothur to a data frame

read.mothur.taxonomy <- function(tax.file) {
  tbl <- read.delim(tax.file, header = FALSE, row.names = 1) %>%
  .[2:nrow(.),]
  split <- strsplit(as.character(tbl$V3), ";", fixed = TRUE)
  kingdom <- sapply(split, "[", 1) %>% sub("\\([0-9.]+\\)", "", .)
  phylum <- sapply(split, "[", 2) %>% sub("\\([0-9.]+\\)", "", .)
  class <- sapply(split, "[", 3) %>% sub("\\([0-9.]+\\)", "", .)
  order <- sapply(split, "[", 4) %>% sub("\\([0-9.]+\\)", "", .)
  family <- sapply(split, "[", 5) %>% sub("\\([0-9.]+\\)", "", .)
  genus <- sapply(split, "[", 6) %>% sub("\\([0-9.]+\\)", "", .)
  species<- sapply(split, "[", 7) %>% sub("\\([0-9.]+\\)", "", .)
  tax_df <- data.frame(Count= tbl$V2, Kingdom=kingdom, Phylum = phylum, Class = class, Order = order,Family = family, Genus = genus, Species = species) %>% 
  `rownames<-`(rownames(tbl))
  return(tax_df)
}

Example

library(devtools)
install_github("ravinpoudel/myFunctions")
library(myFunctions)
library(igraph)
library(magrittr)
library(tidyverse)
library(data.table)
# upload taxanomy file without using read.mothur.taxonomy function
tax.reg <- read.delim(Sys.glob("*.taxonomy"), sep="\t", header=F)
head(tax.reg)

# upload taxanomy file using read.mothur.taxonomy function
tax.fn <- read.mothur.taxonomy(Sys.glob("*.taxonomy"))
head(tax.fn)


ravinpoudel/myFunctions documentation built on May 9, 2020, 7:39 a.m.