inst/doc/assign.ensembleTax_tutorial.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## -----------------------------------------------------------------------------
library("ensembleTax")
packageVersion("ensembleTax")

# create a fake taxonomy table of ASVs and taxonomic assignments
taxtab1 <- data.frame(ASV = c("sv1", "sv2", "sv3", "sv4"),
                          kingdom = c("Eukaryota", "Eukaryota", "Eukaryota", "Eukaryota"), 
                          supergroup = c("Stramenopile", "Stramenopile", "Alveolata", "Rhizaria"),
                          division = c("Ochrophyta", NA, "Dinoflagellata", NA),
                          class = c("Bacillariophyta", NA, NA, NA),
                          genus = c("Pseudo-nitzschia", NA, NA, NA))
taxtab2 <- data.frame(ASV = c("sv1", "sv2", "sv3", "sv4"),
                          kingdom = c("Eukaryota", "Eukaryota", "Eukaryota", "Eukaryota"), 
                          supergroup = c("Stramenopile", "Alveolata", "Alveolata", "Stramenopile"),
                          division = c("Ochrophyta", "Dinoflagellata", "Dinoflagellata", NA),
                          class = c("Bacillariophyta", NA, "Syndiniales", NA),
                          genus = c("Pseudo-nitzschia", NA, NA, NA))
# look at your artificial data:
taxtab1
taxtab2

## -----------------------------------------------------------------------------
xx <- list(taxtab1, taxtab2)
names(xx) <- c("tab1","tab2")
eTax.def <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx)), 
                              tiebreakz = NULL, 
                              count.na=TRUE, 
                              assign.threshold = 0)
# show the initials and ensemble for ease-of-interpretation:
taxtab1
taxtab2
eTax.def

## -----------------------------------------------------------------------------
# create a 3rd fake taxonomy table of ASVs and taxonomic assignments
taxtab3 <- taxtab1
xx.with3 <- list(taxtab1, taxtab2, taxtab3)
names(xx.with3) <- c("tab1", "tab2", "tab3")
eTax.def <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx.with3)), 
                              tiebreakz = NULL, 
                              count.na=TRUE, 
                              assign.threshold = 0)
# show the initials and ensemble for ease-of-interpretation:
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.def

## -----------------------------------------------------------------------------
eTax.nona2 <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx)), 
                              tiebreakz = NULL, 
                              count.na=FALSE, 
                              assign.threshold = 0)
# show the initials and ensemble for ease-of-interpretation:
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.nona2

eTax.nona3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx.with3)), 
                              tiebreakz = NULL, 
                              count.na=FALSE, 
                              assign.threshold = 0)
# ensemble with 3 tables:
eTax.nona3

## -----------------------------------------------------------------------------
eTax.tb2 <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx)), 
                              tiebreakz = c("tab2"), 
                              count.na=TRUE, 
                              assign.threshold = 0)
# show the initials and ensemble for ease-of-interpretation:
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.tb2

eTax.tb3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx.with3)), 
                              tiebreakz = c("tab2"), 
                              count.na=TRUE, 
                              assign.threshold = 0)
# ensemble with 3 tables:
eTax.tb3

## -----------------------------------------------------------------------------
eTax.tb2 <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx)), 
                              tiebreakz = c("tab1"), 
                              count.na=FALSE, 
                              assign.threshold = 0)
# show the initials and ensemble for ease-of-interpretation:
taxtab1 
taxtab2
eTax.tb2

## -----------------------------------------------------------------------------
# counting NA's:
eTax.wt2 <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=c(2,1), 
                              tiebreakz = NULL, 
                              count.na=TRUE, 
                              assign.threshold = 0)
# show the initials and ensemble for ease-of-interpretation:
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.wt2

# NOT counting NA's:
eTax.wt2 <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=c(2,1), 
                              tiebreakz = NULL, 
                              count.na=FALSE, 
                              assign.threshold = 0)
# show the initials and ensemble for ease-of-interpretation:
eTax.wt2

## -----------------------------------------------------------------------------
eTax.wt3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=c(1,2,1), 
                              tiebreakz = NULL, 
                              count.na=TRUE, 
                              assign.threshold = 0)
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.wt3

## -----------------------------------------------------------------------------
eTax.wttb3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=c(1,2,1), 
                              tiebreakz = c("tab1"), 
                              count.na=TRUE, 
                              assign.threshold = 0)
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.wttb3

## -----------------------------------------------------------------------------
eTax.wttb3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=c(1,2,1), 
                              tiebreakz = c("tab2"), 
                              count.na=TRUE, 
                              assign.threshold = 0)
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.wttb3

## -----------------------------------------------------------------------------
# tie-breaking to prioritize table 1, but with assign.threshold = 60%
eTax.at <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx)), 
                              tiebreakz = c("tab1"), 
                              count.na=TRUE, 
                              assign.threshold = 0.6)
# show the initials and ensemble for ease-of-interpretation:
taxtab1 # (remember taxtab3 is identical to this, so count 2x)
taxtab2
eTax.at

# take away the tiebreaker and weight table 1 2x:
eTax.at <- assign.ensembleTax(xx, 
                              tablenames = names(xx), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=c(2,1), 
                              tiebreakz = NULL, 
                              count.na=TRUE, 
                              assign.threshold = 0.6)
eTax.at

## -----------------------------------------------------------------------------
# a low threshold:
eTax.at3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx.with3)), 
                              tiebreakz = NULL, 
                              count.na=TRUE, 
                              assign.threshold = 0.5)
eTax.at3

# a high threshold (need all 3 to agree here for ensemble assignment):
eTax.at3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx.with3)), 
                              tiebreakz = NULL, 
                              count.na=TRUE, 
                              assign.threshold = 0.9)
eTax.at3

## -----------------------------------------------------------------------------
# a low threshold with count.na = FALSE:
eTax.at3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx.with3)), 
                              tiebreakz = NULL, 
                              count.na=FALSE, 
                              assign.threshold = 0.5)
eTax.at3

# a high threshold with count.na = FALSE (need all 3 to agree here for ensemble assignment):
eTax.at3 <- assign.ensembleTax(xx.with3, 
                              tablenames = names(xx.with3), 
                              ranknames = colnames(taxtab1)[2:ncol(taxtab1)],
                              weights=rep(1,length(xx.with3)), 
                              tiebreakz = NULL, 
                              count.na=FALSE, 
                              assign.threshold = 0.9)
eTax.at3

Try the ensembleTax package in your browser

Any scripts or data that you put into this service are public.

ensembleTax documentation built on May 21, 2021, 5:08 p.m.