#'
#' @title Classify Taxa to Taxonomic Ranks
#'
#' @description Classify the combined taxa profile (and grouped taxa profiles generated by \code{\link{ngsLCA_group}} if available) into user-defined taxonomic ranks. Results will be in "path/run/taxonomic_profiles/taxa_ranks/".
#'
#' @param path working directory, same to \code{\link{ngsLCA_profile}}.
#' @param run name of the run, default is "run01".
#' @param rank.name a comma separated vector listing the taxonomic ranks that will be used for classifying taxa profiles; default is "species,genus,family"
#'
#' @return Taxa profiles clustered into taxa ranks.
#' @importFrom utils read.csv write.table
#' @export
#'
#' @examples
#' ngsLCA_rank(path=system.file("extdata","lca_files",package="ngsLCA"),
#' run="run01",
#' rank.name="species,genus")
#'
#'
#' ## This will classify the combined taxa profile (and
#' ## grouped taxa profiles if available) of "run01" into
#' ## species and genus, by merging all taxa below a species
#' ## into that species, and all taxa below a genus into
#' ## that genus. Generated files will be in
#' ## "path/run01/taxonomic_profiles/taxa_ranks/".
#'
#'
ngsLCA_rank=function(path,
run="run01",
rank.name="species,genus,family"){
#modify input path
if (!grepl('/$', path)) {
path = paste(path,"/",sep="")
}
#local variable
taxa = NULL
#whether ngsLCA_profile performed
if (length(dir(paste(path, run, "/intermediate/", sep=""),pattern = "taxa_profile_v1.txt")) == 0) {
cat(paste("\n\n\t-> Error: required input file not found under '", run, "' folder, please run 'ngsLCA_profile' first.\n\n",
sep = ""))
stop()
}else{
cat("\n\n\t-> Cluster taxa into taxonomic ranks.\n\n")
}
#generate taxa branch
if (length(dir(paste(path, run, "/intermediate/", sep=""),pattern = "taxa_branch.txt")) > 0) {
tax.branch = read.csv(paste(path, run, "/intermediate/", "taxa_branch.txt", sep=""),
sep="\t", quote="", stringsAsFactors=F)
}else{
FileList = dir(path, pattern = ".lca$")
if(length(FileList)==0){
cat("\n\n\t-> Error: no lca files found in the appointed working directory.\n\n")
stop()
}
PATH = read.csv(paste(path, FileList[1], sep=""), header=F, sep="\t", stringsAsFactors=F,
fill=T, col.names = paste0("V",seq_len(60)), skip = 1)
PATH = PATH[,-1]
if (length(FileList)>1) {
for (i in 2:length(FileList)) {
PATH.1 = read.csv(paste(path, FileList[i], sep=""), header=F, sep="\t", stringsAsFactors=F,
fill=T, col.names = paste0("V",seq_len(60)), skip = 1)
PATH.1 = PATH.1[,-1]
PATH = rbind(PATH,PATH.1)
PATH = PATH[!duplicated(PATH[,1]),]
}
}else{
PATH = PATH[!duplicated(PATH[,1]),]
}
tax.branch = PATH
write.table(PATH, file = paste(path, run, "/intermediate/", "taxa_branch.txt", sep=""), quote=F,
row.names=F, col.names=T, sep="\t")
}
#create folders
if (length(dir(paste(path, run, "/taxonomic_profiles/", sep = ""), pattern = "taxa_ranks")) > 0) {
cat(paste("\n\n\t-> '", path, run, "/taxonomic_profiles/taxa_ranks' already exists; files inside will be overwritten.\n\n",sep = ""))
}else{
dir.create(paste(path, run, "/taxonomic_profiles/taxa_ranks", sep=""))
}
#Function for clustering taxa into user-defined ranks
Taxa.cluster = function(DF, OutName){
DF1 = data.frame(matrix(vector(), 0, dim(DF)[2]+1, dimnames=list(c(), c("taxa",colnames(DF)))),stringsAsFactors=F)
colnames(DF1)[2:dim(DF1)[2]] = colnames(DF)
j=1
for (i in 1:dim(DF)[1]) {
V1 = grep(Taxa.L, tax.branch[which(tax.branch[,1] %in% rownames(DF)[i]),])
if (length(V1)>1) {
V1=V1[1]
}
if (length(V1) !=0) {
DF1[j,] = c(tax.branch[which(tax.branch[,1] %in% rownames(DF)[i]),V1], as.numeric(DF[i,1:dim(DF)[2]]))
j = j+1
}
}
for (i in 2:dim(DF1)[2]) {
DF1[,i] = as.numeric(DF1[,i])
}
if (dim(DF1)[1] != 0) {
DF2 = aggregate(. ~ taxa, data = DF1, sum)
taxa_split = strsplit(DF2$taxa, ":")
DF2$taxa_ID = sapply(taxa_split, function(x) x[1])
DF2$taxa_name = sapply(taxa_split, function(x) paste(x[2:(length(x)-1)], collapse = ":"))
DF2$taxonomic_rank = sapply(taxa_split, function(x) x[length(x)])
DF2 = DF2[,c((dim(DF2)[2] - 2):dim(DF2)[2],2:(dim(DF2)[2] - 3))]
write.table(DF2, quote=F, row.names=F, col.names=T, sep="\t",
file = paste(path, run, "/taxonomic_profiles/taxa_ranks/", OutName, "_",taxa.level, ".txt", sep=""))
}else{
cat(paste("\n\n\t-> For ", OutName, ", no taxa clustered into ", taxa.level, ".\n\n",sep = ""))
}
}
#cluster complete taxa profile
DF = read.csv(paste(path, run, "/intermediate/", "taxa_profile_final.txt", sep=""),sep="\t",
quote="", check.names=F, stringsAsFactors=F)
rownames(DF) = DF$taxa
DF1 = as.data.frame(DF[,-1])
colnames(DF1) = colnames(DF)[-1]
rownames(DF1) = rownames(DF)
rank.name = strsplit(rank.name,",")[[1]]
for (taxa.level in rank.name) {
Taxa.L = paste(":",taxa.level,sep="")
X2 = Taxa.cluster(DF = DF1, OutName = "all_taxa")
}
#cluster on groups
if (length(dir(paste(path, run, "/intermediate/taxa_groups/", sep=""), pattern = ".txt"))>0) {
cat("\n\n\t-> Taxa groups found and will also be clustered into ranks.\n\n")
file.list = dir(paste(path, run, "/intermediate/taxa_groups/", sep=""), pattern = ".txt") #list files
for (i in 1:length(file.list)) {
DF = read.csv(paste(path, run, "/intermediate/taxa_groups/", file.list[i], sep=""),sep="\t",
quote="", check.names=F, stringsAsFactors=F)
rownames(DF) = DF$taxa
DF1 = as.data.frame(DF[,-1])
colnames(DF1) = colnames(DF)[-1]
rownames(DF1) = rownames(DF)
OutName = sub(".txt", "", file.list[i])
for (taxa.level in rank.name) {
Taxa.L = paste(":",taxa.level,sep="")
X2 = Taxa.cluster(DF = DF1, OutName = OutName)
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.