In SCAR/biotaxa: Visualising the growth and prediction of taxonomic diversity and identifying taxonomic incompleteness and imprecision

knitr::opts_chunk$set(echo = TRUE)

This page evaluates the relationship between predicted and actual species richness of well-studiex taxa of Animalia.

First, explore the most frequently recorded phyla in Animalia

library(shiny)
library(biotaxa)
library(dplyr)
topphyla <- topn("Animalia", "Phylum", 12)
topphyla

Take looks of their discovery curves and how well logistic regressions fit the curves.

library(biotaxa)

taxamodel("Arthropoda", "Species", "logistic")
taxamodel("Mollusca", "Species", "logistic")
taxamodel("Chordata", "Species", "logistic")
taxamodel("Cnidaria", "Species", "logistic")
taxamodel("Echinodermata", "Species", "logistic")
taxamodel("Annelida", "Species", "logistic")
taxamodel("Bryozoa", "Species", "logistic")
taxamodel("Porifera", "Species", "logistic")
taxamodel("Nematoda", "Species", "logistic")
taxamodel("Brachiopoda", "Species", "logistic")
taxamodel("Nemertea", "Species", "logistic")
taxamodel("Platyhelminthes", "Species", "logistic")

We can also explore the growths of classes.

topclasses <- topn("Animalia", "Class", 12)
topclasses

taxamodel("Malacostraca", "Species", "logistic")
taxamodel("Gastropoda", "Species", "logistic")
taxamodel("Actinopterygii", "Species", "logistic")
taxamodel("Polychaeta", "Species", "logistic")
taxamodel("Gymnolaemata", "Species", "logistic")
taxamodel("Bivalvia", "Species", "logistic")
taxamodel("Demospongiae", "Species", "logistic")
taxamodel("Hexanauplia", "Species", "logistic")
taxamodel("Anthozoa", "Species", "logistic")
taxamodel("Hydrozoa", "Species", "logistic")

Collect the predictied and actual values of species richness in one data frame.

library(stringr)
library(drc)
library(data.table)

#Perform the analysis
#First collect the needed information
taxa <- c("Arthropoda","Mollusca", "Chordata", "Cnidaria", "Echinodermata","Annelida", "Bryozoa", "Porifera", "Nematoda", "Brachiopoda", "Nemertea", "Platyhelminthes")
ranks <- rep("Species", length(taxa))
strLines <- mapply(taxa_rich, taxa, ranks)
prediction <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][1]
}
preds <- as.numeric(unlist(lapply(strLines, prediction)))
actual <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][2]
}
actuals <- as.numeric(unlist(lapply(strLines, actual)))
ndf <- data.frame(taxa, actuals, preds)

#Visualization
x11(width = 5, height = 5)
plot(actuals, preds, xlab = "Predicted number of species", ylab = "Actual number of species", pch = 19)
abline(0,1, lty = 2)

add points of classes in Animalia to the plot

taxa <- c("Malacostraca", "Gastropoda", "Actinopterygii", "Polychaeta", "Bivalvia", "Demospongiae", "Hexanauplia", "Anthozoa", "Hydrozoa")
ranks <- rep("Species", length(taxa))
strLines <- mapply(taxa_rich, taxa, ranks)

prediction_class <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][1]
}
preds_classes <- unlist(lapply(strLines, prediction_class))

actual_class <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][2]
}
actuals_classes <- unlist(lapply(strLines, actual_class)) 

ndf_classes <- data.frame(actuals_classes, preds_classes)
x11(width=5, height=5)
#windows.options(width = 5, height = 5)
plot(actuals, preds, xlab = "Predicted number of species", ylab = "Actual number of species", pch = 19)
abline(0,1, lty = 2)
points(preds_classes, actuals_classes, pch = 2, cex = 0.8)
legend(500, 5000, c("Phylum", "Class"), pch = c(19, 2))

Now collect all species richness of all subordinate taxa in Animalia.

data_m <- subset(data_m, Kingdoms == "Animalia")
dim(data_m)
richness <- function(rank) {
  length(spetaxalist(data_m, rank))
}

ranks <- c("Phylum", "Class", "Order", "Family", "Genus")

allrichness <- unlist(lapply(ranks, richness))
forx <- seq(1:length(allrichness))
model <- lm(log(log(allrichness)) ~ forx)
summary(model)

The visulization of species richness of subordinate taxa in Animalia.

plot(log(log(allrichness)) ~ forx, ylab = "log10(log10(species richness))", xlab = "", pch = 19, ylim = c(1, 2.5))
abline(model, col = "red", lwd = 3, lty = 2)
title("Species richness of subordinate taxa in Animalia")
text(1.2, 1.4, "Phylum", cex = 0.8)
text(2.2, 1.6, "Class", cex = 0.8)
text(3.2, 1.8, "Order", cex = 0.8)
text(4.2, 1.9, "Family", cex = 0.8)
text(4.9, 2.0, "Genus", cex = 0.8)

Try Mollusca, one of the most well studies phylum in the dataset.

topmollusca <- topn("Mollusca", "Class", 10)
topmollusca #it appears there are only 8 classes of mollusca in the dataset

Visualise the discovery curves of each class in Mollusca.

df <- subset(data_m, Phyla == "Mollusca")
classes_mollusca <- unique(df$Classes)
taxamodel("Gastropoda", "Species","logistic")
taxamodel("Bivalvia", "Species","logistic")
taxamodel("Cephalopoda", "Species","logistic")
taxamodel("Polyplacophora", "Species","logistic")
taxamodel("Solenogastres", "Species", "logistic")
taxamodel("Monoplacophora", "Species","logistic")

taxa <- c("Gastropoda", "Bivalvia", "Cephalopoda", "Polyplacophora", "Solenogastres")
ranks <- rep("Species", length(taxa))
strLines <- mapply(taxa_rich, taxa, ranks)
prediction <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][1]
}
preds <- as.numeric(unlist(lapply(strLines, prediction)))
actual <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][2]
}
actuals <- as.numeric(unlist(lapply(strLines, actual)))
ndf <- data.frame(taxa, actuals, preds)

x11(width=5, height=5)

plot(actuals, preds, xlab = "Predicted number of species", ylab = "Actual number of species", pch = 2)
title("Species richness of classes in Mollusca")
abline(0,1, lty = 2)

SCAR/biotaxa documentation built on May 20, 2019, 3:05 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SCAR/biotaxa
Visualising the growth and prediction of taxonomic diversity and identifying taxonomic incompleteness and imprecision

In SCAR/biotaxa: Visualising the growth and prediction of taxonomic diversity and identifying taxonomic incompleteness and imprecision

R Package Documentation

Browse R Packages

We want your feedback!

SCAR/biotaxa Visualising the growth and prediction of taxonomic diversity and identifying taxonomic incompleteness and imprecision

In SCAR/biotaxa: Visualising the growth and prediction of taxonomic diversity and identifying taxonomic incompleteness and imprecision

R Package Documentation

Browse R Packages

We want your feedback!

SCAR/biotaxa
Visualising the growth and prediction of taxonomic diversity and identifying taxonomic incompleteness and imprecision