renameTaxa | R Documentation |
Function for renaming taxa in a taxonomic table, which can be
given as matrix or phyloseq object.
It comes with functionality for making unknown
and unclassified taxa unique and substituting them by the next higher known
taxonomic level, e.g., an unknown genus "g__" can automatically be renamed
to "1_Streptococcaceae(F)".
User-defined patterns determine the format of known and substituted names.
Unknown names (e.g., NAs) and unclassified taxa can be
handled separately. Duplicated names within one or more chosen ranks can
also be made unique by numbering them consecutively.
renameTaxa(
taxtab,
pat = "<r>_<name>",
substPat = "<r>_<name>_<subst_r>_<subst_name>",
unknown = c(NA, "", " ", "__"),
numUnknown = TRUE,
unclass = c("unclassified", "Unclassified"),
numUnclass = TRUE,
numUnclassPat = "<name><num>",
numDupli = NULL,
numDupliPat = "<name><num>",
ranks = NULL,
ranksAbb = NULL,
ignoreCols = NULL
)
taxtab |
taxonomic table (matrix containing the taxonomic names; columns must be taxonomic ranks) or phyloseq object. |
pat |
character specifying the pattern of new taxonomic names if the current name is KNOWN. See the examples and default value for a demo. Possible space holders are:
|
substPat |
character specifying the pattern of new taxonomic names if the
current name is UNKNOWN. The current name is substituted by the next higher
existing name.
Possible space holders (in addition to that of
|
unknown |
character vector giving the labels of unknown taxa, without
leading rank label (e.g., "g_" or "g__" for genus level). If
|
numUnknown |
logical. If |
unclass |
character vector giving the label of unclassified taxa,
without leading rank label (e.g., "g_" or "g__" for genus level). If
|
numUnclass |
logical. If |
numUnclassPat |
character defining the pattern used for numbering unclassified taxa. Must include a space holder for the name ("<name>") and one for the number ("<num>"). Default is "<name><num>" resulting e.g., in "unclassified1". |
numDupli |
character vector giving the ranks that should be made unique
by adding a number. Elements must match column names. The pattern is
defined via |
numDupliPat |
character defining the pattern used for numbering
duplicated names (if |
ranks |
character vector giving rank names used for renaming the
taxa. If |
ranksAbb |
character vector giving abbreviated rank names, which are
directly used for the place holders <r>, <subst_r>, <R>, and <subst_R>
(the former two in lower case and the latter two in upper case).
If |
ignoreCols |
numeric vector with columns to be ignored. Names remain
unchanged for these columns. Columns containing |
Renamed taxonomic table (matrix or phyloseq object, depending on the input).
#--- Load and edit data -----------------------------------------------------
library(phyloseq)
data("GlobalPatterns")
global <- subset_taxa(GlobalPatterns, Kingdom == "Bacteria")
taxtab <- global@tax_table@.Data[1:10, ]
# Add some unclassified taxa
taxtab[c(2,3,5), "Species"] <- "unclassified"
taxtab[c(2,3), "Genus"] <- "unclassified"
taxtab[2, "Family"] <- "unclassified"
# Add some blanks
taxtab[7, "Genus"] <- " "
taxtab[7:9, "Species"] <- " "
# Add taxon that is unclassified up to Kingdom
taxtab[9, ] <- "unclassified"
taxtab[9, 1] <- "Unclassified"
# Add row names
rownames(taxtab) <- paste0("OTU", 1:nrow(taxtab))
print(taxtab)
#--- Example 1 (default setting) --------------------------------------------
# Example 1 (default setting)
# - Known names are replaced by "<r>_<name>"
# - Unknown names are replaced by "<r>_<name>_<subst_r>_<subst_name>"
# - Unclassified taxa have separate numbering
# - Ranks are taken from column names
# - e.g., unknown genus -> "g_1_f_Streptococcaceae"
renamed1 <- renameTaxa(taxtab)
renamed1
#--- Example 2 --------------------------------------------------------------
# - Use phyloseq object (subset of class clostridia to decrease runtime)
global_sub <- subset_taxa(global, Class == "Clostridia")
renamed2 <- renameTaxa(global_sub)
tax_table(renamed2)[1:5, ]
#--- Example 3 --------------------------------------------------------------
# - Known names remain unchanged
# - Substituted names are indicated by their rank in brackets
# - Pattern for numbering unclassified taxa changed
# - e.g., unknown genus -> "Streptococcaceae (F)"
# - Note: Numbering of unknowns is not shown because "<name>" is not
# included in "substPat"
renamed3 <- renameTaxa(taxtab, numUnclassPat = "<name>_<num>",
pat = "<name>",
substPat = "<subst_name> (<subst_R>)")
renamed3
#--- Example 4 --------------------------------------------------------------
# - Same as before but numbering shown for unknown names
# - e.g., unknown genus -> "1 Streptococcaceae (F)"
renamed4 <- renameTaxa(taxtab, numUnclassPat = "<name>_<num>",
pat = "<name>",
substPat = "<name> <subst_name> (<subst_R>)")
renamed4
#--- Example 5 --------------------------------------------------------------
# - Same numbering for unkown names and unclassified taxa
# - e.g., unknown genus -> "1_Streptococcaceae(F)"
# - Note: We get a warning here because "Unclassified" (with capital U)
# are not included in "unknown" but occur in the data
renamed5 <- renameTaxa(taxtab, unclass = NULL,
unknown = c(NA, " ", "unclassified"),
pat = "<name>",
substPat = "<name>_<subst_name>(<subst_R>)")
renamed5
#--- Example 6 --------------------------------------------------------------
# - Same as before, but OTU9 is now renamed correctly
renamed6 <- renameTaxa(taxtab, unclass = NULL,
unknown = c(NA, " ", "unclassified", "Unclassified"),
pat = "<name>",
substPat = "<name>_<subst_name>(<subst_R>)")
renamed6
#--- Example 7 --------------------------------------------------------------
# - Add "(<Rank>: unknown)" to unknown names
# - e.g., unknown genus -> "1 Streptococcaceae (Genus: unknown)"
renamed7 <- renameTaxa(taxtab, unclass = NULL,
unknown = c(NA, " ", "unclassified", "Unclassified"),
pat = "<name>",
substPat = "<name> <subst_name> (<Rank>: unknown)")
renamed7
#--- Example 8 --------------------------------------------------------------
# - Do not substitute unknowns and unclassified taxa by higher ranks
# - e.g., unknown genus -> "1"
renamed8 <- renameTaxa(taxtab,
pat = "<name>", substPat = "<name>")
renamed8
#--- Example 9 --------------------------------------------------------------
# - Error if ranks cannot be automatically determined
# from column names or taxonomic names
taxtab_noranks <- taxtab
colnames(taxtab_noranks) <- paste0("Rank", 1:ncol(taxtab))
head(taxtab_noranks)
## Not run:
renamed9 <- renameTaxa(taxtab_noranks,
pat = "<name>",
substPat = "<name>_<subst_name>(<subst_R>)")
## End(Not run)
# Ranks can either be given via "ranks" ...
(ranks <- colnames(taxtab))
renamed9 <- renameTaxa(taxtab_noranks,
pat = "<name>",
substPat = "<name>_<subst_name>(<subst_R>)",
ranks = ranks)
renamed9
# ... or "ranksAbb" (we now use the lower case within "substPat")
(ranks <- substr(colnames(taxtab), 1, 1))
renamed9 <- renameTaxa(taxtab_noranks,
pat = "<name>",
substPat = "<name>_<subst_name>(<subst_r>)",
ranksAbb = ranks)
renamed9
#--- Example 10 -------------------------------------------------------------
# - Make names of ranks "Family" and "Order" unique by adding numbers to
# duplicated names
renamed10 <- renameTaxa(taxtab,
pat = "<name>",
substPat = "<name>_<subst_name>(<subst_R>)",
numDupli = c("Family", "Order"))
renamed10
any(duplicated(renamed10[, "Family"]))
any(duplicated(renamed10[, "Order"]))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.