knitr::opts_chunk$set(echo = FALSE, comment = "", warning = FALSE, cache = TRUE, autodep = TRUE) # options(crayon.enabled = TRUE) options(width = 80) library(taxa) library(metacoder) library(cowplot)
\begin{columns}
\begin{column}{0.3\textwidth} \includegraphics[height=8cm]{images/taxonomy.png} \end{column}
\begin{column}{0.8\textwidth} %%<--- here \begin{itemize} \setlength\itemsep{1em} \item Taxonomic data is hierarchical \item Associated with tabular data \item Can be names, classifications, or IDs \item Many different taxonomic systems \item Many different data formats \item Hierarchical visualization is difficult \end{itemize} \end{column}
\end{columns}
DNA sequence databases
\begin{center} \resizebox{.99\textwidth}{!}{% \includegraphics[height=1cm]{images/ncbi_icon.png}% \quad \includegraphics[height=1cm]{images/unite_icon.png}% \quad \includegraphics[height=1cm]{images/greengenes_icon.png}% } \end{center}
Species occurrence databases
\begin{center} \resizebox{.99\textwidth}{!}{% \includegraphics[height=1cm]{images/gbif_icon.png}% \quad \includegraphics[height=1cm]{images/inaturalist_icon.png}% \quad \includegraphics[height=1cm]{images/atlas_icon.jpg}% } \end{center}
Museum records
\begin{center} \resizebox{.99\textwidth}{!}{% \includegraphics[height=1cm]{images/smith_icon.jpg}% \quad \includegraphics[height=1cm]{images/bmnh_icon.jpg}% \quad \includegraphics[height=1cm]{images/nhm_icon.jpg}% } \end{center}
\scriptsize
\textbf{NCBI GenBank}
\begin{verbatim}
AC073210.8 Homo sapiens BAC clone RP11-460N20 from 7, complete sequence AACGAACGCTGGCGGCATGCCTAACACATGCAAGTCGAACGAGACCTTCGGGTC... \end{verbatim}
\textbf{UNITE}
\begin{verbatim}
SH099456.05FU_FJ357315_refs kFungi;pAscomycota;cDothideomycetes ;oPleosporales;fPleosporaceae;gEmbellisia;s__Embellisia_planifunda
GCTGGCGGCGTGCCTAACACATGTAAGTCGAACGGGACTGGGGGCAACTCCAGT... \end{verbatim}
\textbf{RDP}
\begin{verbatim}
S000448483 Sparassis crispa; MBUH-PIRJO&ILKKA94-1587/ss5 Lineage=Root;rootrank;Fungi;domain;Basidiomycota;phylum;Agaricomycetes; class;Polyporales;order;Sparassidaceae;family;Sparassis;genus AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGAATGCTTAACACATGAAAC... \end{verbatim}
\textbf{SILVA}
\begin{verbatim}
GCVF01000431.1.2369 Bacteria;Proteobacteria;Gammaproteobacteria; Oceanospirillales;Alcanivoraceae;Alcanivorax;Thalassiosira rotula AGAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGTATGCTTAACACATGCAAG... \end{verbatim}
\scriptsize
\textbf{NCBI GenBank}
\begin{Verbatim}[commandchars=\{}]
\underline{AC073210.8} \underline{Homo sapiens} BAC clone RP11-460N20 from 7, complete sequence AACGAACGCTGGCGGCATGCCTAACACATGCAAGTCGAACGAGACCTTCGGGTC... \end{Verbatim}
\textbf{UNITE}
\begin{Verbatim}[commandchars=\{}]
SH099456.05FU_FJ357315_refs \underline{kFungi;pAscomycota;cDothideomycetes} \underline{;oPleosporales;fPleosporaceae;gEmbellisia;s__Embellisia_planifunda}
GCTGGCGGCGTGCCTAACACATGTAAGTCGAACGGGACTGGGGGCAACTCCAGT... \end{Verbatim}
\textbf{RDP}
\begin{Verbatim}[commandchars=\{}]
S000448483 \underline{Sparassis crispa}; MBUH-PIRJO&ILKKA94-1587/ss5 Lineage=\underline{Root;rootrank;Fungi;domain;Basidiomycota;phylum;Agaricomycetes;} \underline{class;Polyporales;order;Sparassidaceae;family;Sparassis;genus} AGAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGAATGCTTAACACATGAAAC... \end{Verbatim}
\textbf{SILVA}
\begin{Verbatim}[commandchars=\{}]
GCVF01000431.1.2369 \underline{Bacteria;Proteobacteria;Gammaproteobacteria;} \underline{Oceanospirillales;Alcanivoraceae;Alcanivorax;Thalassiosira rotula} AGAGTTTGATCCTGGCTCAGGATGAACGCTGGCGGTATGCTTAACACATGCAAG... \end{Verbatim}
Global Biodiversity Information Facility : Archaea database
\scriptsize
readr::read_tsv("datasets/gbif_archea.csv")[4:8]
Smithsonian Museum of Natural History: Mammal database
\scriptsize
readr::read_csv("datasets/SNMNH.csv")[9]
taxa
package\vfill
The taxa
package is designed to be a solid foundation for using taxonomic data in R.
\vfill
\begin{itemize} \setlength\itemsep{1em} \item R6 classes to hold taxa, taxonomies, and associated data \item Flexible parsers to convert raw data to these classes \item Dplyr-inspired functions to manipulate these classes \item Functions to get data associated with each taxon in a taxonomy \end{itemize}
metacoder
package: visualization of taxon dataset.seed(6)
\scriptsize
readr::read_tsv("datasets/gbif_archea.csv") %>% parse_tax_data(class_cols = 4:8) %>% filter_taxa(taxon_names != "") %>% heat_tree(node_label = taxon_names, node_color = n_obs, node_size = n_obs, layout = "da")
\begin{center} \Huge Classes defined by taxa \end{center}
taxa
: Relationships\begin{center} \resizebox{.8\textwidth}{!}{% \includegraphics[height=1cm]{images/class_diagram.png}% } \end{center}
taxa
: Relationships\begin{center} \resizebox{.8\textwidth}{!}{% \includegraphics[height=1cm]{images/class_diagram_selected.png}% } \end{center}
taxa
: The taxmap
class\begin{center} \hspace*{-0.6cm} \resizebox{1.1\textwidth}{!}{% \includegraphics{images/taxmap_printed.png}% } \end{center}
\begin{center} \Huge Parsing \end{center}
\begin{center} \vspace{-1cm}\hspace{-1cm} \resizebox{1.18\textwidth}{!}{% \includegraphics{images/parsing_guide.png}% } \end{center}
\scriptsize
x <- c("Mammalia;Theria;Metatheria;Diprotodontia;Macropodiformes", "Mammalia;Theria;Eutheria;Primates;Haplorrhini;Simiiformes") parse_tax_data(x, class_sep = ";")
\scriptsize
x <- c("Homo sapiens", "Macropus", "Chordata") lookup_tax_data(x, type = "taxon_name", database = "ncbi")
\footnotesize
Taxon IDs
x <- c("9606", "207598", "7711") # NCBI taxon IDs lookup_tax_data(x, type = "taxon_id", database = "ncbi")
Sequence IDs
x <- c("AC073210", "MG014608", "AE006468") # NCBI sequence IDs lookup_tax_data(x, type = "seq_id", database = "ncbi")
Global Biodiversity Information Facility : Archaea database
\scriptsize
readr::read_tsv("datasets/gbif_archea.csv")[4:8]
\scriptsize
x = readr::read_tsv("datasets/gbif_archea.csv") parse_tax_data(x, class_cols = 4:8)
\scriptsize
x = c("AC073210.8 Homo sapiens BAC clone RP11-460N20 from 7, complete sequence", "AE006468.2 Salmonella enterica subsp. enterica serovar Typhimurium", "MG014608.1 Macropus fuliginosus Csf1r gene, enhancer") extract_tax_data(x, database = "ncbi", regex = "([A-Z0-9.]+) (.+)", key = c(my_ncbi_id = "seq_id", my_desc = "info"))
\begin{center} \Huge Taxon attributes \end{center}
hmp_otus$lineage <- sub(hmp_otus$lineage, pattern = "^r__Root;", replacement = "r__Bacteria;") hmp_otus$lineage <- paste0("r__Life;", hmp_otus$lineage) x = parse_tax_data(hmp_otus, class_cols = "lineage", class_sep = ";", class_key = c(tax_rank = "info", tax_name = "taxon_name"), class_regex = "^(.+)__(.+)$") x = filter_taxa(x, taxon_names == "Proteobacteria", subtaxa = TRUE, supertaxa = TRUE, reassign_obs = F) plot_one <- function(...) { set.seed(2) x %>% heat_tree( node_label = ifelse(is_root, "Root", ""), node_size = n_obs, node_label_size_range = c(0.05, 0.05), edge_color = "grey", layout = "da", initial_layout = "re", make_node_legend = FALSE, title_size = 0.09, ...) } selected = "red" unselected = "grey" target = "black" my_color <- ifelse(x$taxon_indexes() %in% subtaxa(x, subset = taxon_names == "Betaproteobacteria", simplify = T), selected, unselected) my_color[x$taxon_names() == "Betaproteobacteria"] <- target subtaxa_recursive <- plot_one(title = "Subtaxa (recursive = T)", node_color = my_color) my_color <- ifelse(x$taxon_indexes() %in% subtaxa(x, subset = taxon_names == "Betaproteobacteria", simplify = T, recursive = FALSE), selected, unselected) my_color[x$taxon_names() == "Betaproteobacteria"] <- target subtaxa_immediate <- plot_one(title = "Subtaxa (recursive = F)", node_color = my_color) my_color <- ifelse(x$taxon_indexes() %in% supertaxa(x, subset = taxon_names == "Betaproteobacteria", simplify = T), selected, unselected) my_color[x$taxon_names() == "Betaproteobacteria"] <- target supertaxa_recursive <- plot_one(title = "Supertaxa (recursive = T)", node_color = my_color) my_color <- ifelse(x$taxon_indexes() %in% supertaxa(x, subset = taxon_names == "Betaproteobacteria", simplify = T, recursive = FALSE), selected, unselected) my_color[x$taxon_names() == "Betaproteobacteria"] <- target supertaxa_immediate <- plot_one(title = "Supertaxa (recursive = F)", node_color = my_color) leaf_plot <- plot_one(title = "Leaves", node_color = ifelse(is_leaf, selected, unselected)) root_plot <- plot_one(title = "Roots", node_color = ifelse(is_root, selected, unselected)) stem_plot <- plot_one(title = "Stems", node_color = ifelse(is_stem, selected, unselected)) internode_plot <- plot_one(title = "Internodes", node_color = ifelse(is_internode, selected, unselected)) branch_plot <- plot_one(title = "Branches", node_color = ifelse(is_branch, selected, unselected))
\centering
set.seed(2) x %>% heat_tree( node_label = taxon_names, node_size = n_obs, edge_color = "grey", node_color = "grey", layout = "da", initial_layout = "re", make_node_legend = FALSE)
\centering
cowplot::plot_grid(plotlist = list(subtaxa_recursive, subtaxa_immediate, supertaxa_recursive, supertaxa_immediate))
\centering
cowplot::plot_grid(plotlist = list(leaf_plot, root_plot, stem_plot, internode_plot))
Ranks, names, and IDs
taxon_names
, taxon_ranks
, taxon_ids
Parts of the tree
branches
, internodes
, leaves
, roots
, stems
, supertaxa
, subtaxa
Filtering helpers
is_branch
, is_internode
, is_leaf
, is_root
, is_stem
Numbers of supertaxa/subtaxa/data
n_supertaxa
, n_subtaxa
, n_obs
, n_supertaxa_1
, n_subtaxa_1
, n_obs_1
These are derived from the list of taxon
objects.
\scriptsize
taxon_names(ex_taxmap) %>% head taxon_ranks(ex_taxmap) %>% head taxon_ids(ex_taxmap) %>% head
subtaxa
These return a list of vectors named by taxon IDs.
\scriptsize
subtaxa(ex_taxmap, value = "taxon_names")[1:3]
subtaxa
These return a list of vectors named by taxon IDs.
\scriptsize
subtaxa(ex_taxmap, value = "taxon_names", recursive = FALSE)[1:3]
These return counts of things per taxon.
\scriptsize
n_subtaxa(ex_taxmap) n_supertaxa(ex_taxmap) n_obs(ex_taxmap, "info") n_obs(ex_taxmap, "abund")
\begin{center} \Huge Manipulating \end{center}
Here is the example object that will be used:
obj <- ex_taxmap$clone(deep = TRUE) obj$data$abund <- NULL
\scriptsize
print(obj)
Here is the example object that will be used:
set.seed(1)
\scriptsize
heat_tree(obj, node_label = taxon_names, layout = "da")
Subset taxonomy and user-defined data to one taxon:
\scriptsize
filter_taxa(obj, taxon_names == "Plantae", subtaxa = TRUE)
Subset taxonomy and user-defined data to one taxon:
set.seed(1)
\scriptsize
filter_taxa(obj, taxon_names == "Plantae", subtaxa = TRUE) %>% heat_tree(node_label = taxon_names, layout = "da")
Subset taxonomy to one rank:
\scriptsize
filter_taxa(obj, taxon_ranks == "family", supertaxa = TRUE)
Subset taxonomy to one rank:
set.seed(8)
\scriptsize
filter_taxa(obj, taxon_ranks == "family", supertaxa = TRUE) %>% heat_tree(node_label = taxon_names, layout = "da")
Subset user-defined data and remove any taxa not in subset:
\scriptsize
filter_obs(obj, "info", n_legs == 4, drop_taxa = TRUE)
Subset data and remove any taxa not in subset:
set.seed(7)
\scriptsize
filter_obs(obj, "info", n_legs == 4, drop_taxa = TRUE) %>% heat_tree(node_label = taxon_names, layout = "da")
Add a column to a dataset:
\scriptsize
mutate_obs(obj, "info", bipedal = n_legs == 2)
\begin{center} \resizebox{.99\textwidth}{!}{% \includegraphics[height=0.5cm]{images/ropensci_icon.png}% \quad \includegraphics[height=0.5cm]{images/r_icon.jpg}% \quad \includegraphics[height=0.5cm]{images/user_icon.png}% } \end{center}
\begin{center} \resizebox{.99\textwidth}{!}{% \includegraphics[height=0.5cm]{images/osu_icon.png}% \quad \includegraphics[height=0.5cm]{images/ars_icon.png}% } \end{center}
\begin{center} \includegraphics[height=7cm]{images/grunwaldlab.jpg}% \end{center}
The following can be used in manipulation functions as if they were independent variables using Non-Standard Evaluation (NSE):
\begin{itemize} \setlength\itemsep{1em} \item Functions that return per-taxon information \item User-defined table columns \item User-defined vectors and lists \item User-defined functions \end{itemize}
\scriptsize
unname(all_names(obj))
\begin{center} \vspace{-0.3cm}\hspace{-1cm} \resizebox{0.9\textwidth}{!}{% \includegraphics{images/heat_tree_matrix.png}% } \end{center}
\begin{center} \vspace{-0.7cm}\hspace{-0.6cm} \resizebox{1.15\textwidth}{!}{% \includegraphics{images/metacoder_multiroot.png}% } \end{center}
\begin{center} \Huge Questions? \end{center}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.