knitr::opts_chunk$set(echo = TRUE)

This page evaluates the relationship between predicted and actual species richness of well-studiex taxa of Animalia.

First, explore the most frequently recorded phyla in Animalia

library(shiny)
library(biotaxa)
library(dplyr)
topphyla <- topn("Animalia", "Phylum", 12)
topphyla

Take looks of their discovery curves and how well logistic regressions fit the curves.

library(biotaxa)

taxamodel("Arthropoda", "Species", "logistic")
taxamodel("Mollusca", "Species", "logistic")
taxamodel("Chordata", "Species", "logistic")
taxamodel("Cnidaria", "Species", "logistic")
taxamodel("Echinodermata", "Species", "logistic")
taxamodel("Annelida", "Species", "logistic")
taxamodel("Bryozoa", "Species", "logistic")
taxamodel("Porifera", "Species", "logistic")
taxamodel("Nematoda", "Species", "logistic")
taxamodel("Brachiopoda", "Species", "logistic")
taxamodel("Nemertea", "Species", "logistic")
taxamodel("Platyhelminthes", "Species", "logistic")

We can also explore the growths of classes.

topclasses <- topn("Animalia", "Class", 12)
topclasses
taxamodel("Malacostraca", "Species", "logistic")
taxamodel("Gastropoda", "Species", "logistic")
taxamodel("Actinopterygii", "Species", "logistic")
taxamodel("Polychaeta", "Species", "logistic")
taxamodel("Gymnolaemata", "Species", "logistic")
taxamodel("Bivalvia", "Species", "logistic")
taxamodel("Demospongiae", "Species", "logistic")
taxamodel("Hexanauplia", "Species", "logistic")
taxamodel("Anthozoa", "Species", "logistic")
taxamodel("Hydrozoa", "Species", "logistic")

Collect the predictied and actual values of species richness in one data frame.

library(stringr)
library(drc)
library(data.table)

#Perform the analysis
#First collect the needed information
taxa <- c("Arthropoda","Mollusca", "Chordata", "Cnidaria", "Echinodermata","Annelida", "Bryozoa", "Porifera", "Nematoda", "Brachiopoda", "Nemertea", "Platyhelminthes")
ranks <- rep("Species", length(taxa))
strLines <- mapply(taxa_rich, taxa, ranks)
prediction <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][1]
}
preds <- as.numeric(unlist(lapply(strLines, prediction)))
actual <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][2]
}
actuals <- as.numeric(unlist(lapply(strLines, actual)))
ndf <- data.frame(taxa, actuals, preds)

#Visualization
x11(width = 5, height = 5)
plot(actuals, preds, xlab = "Predicted number of species", ylab = "Actual number of species", pch = 19)
abline(0,1, lty = 2)

add points of classes in Animalia to the plot

taxa <- c("Malacostraca", "Gastropoda", "Actinopterygii", "Polychaeta", "Bivalvia", "Demospongiae", "Hexanauplia", "Anthozoa", "Hydrozoa")
ranks <- rep("Species", length(taxa))
strLines <- mapply(taxa_rich, taxa, ranks)

prediction_class <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][1]
}
preds_classes <- unlist(lapply(strLines, prediction_class))

actual_class <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][2]
}
actuals_classes <- unlist(lapply(strLines, actual_class)) 

ndf_classes <- data.frame(actuals_classes, preds_classes)
x11(width=5, height=5)
#windows.options(width = 5, height = 5)
plot(actuals, preds, xlab = "Predicted number of species", ylab = "Actual number of species", pch = 19)
abline(0,1, lty = 2)
points(preds_classes, actuals_classes, pch = 2, cex = 0.8)
legend(500, 5000, c("Phylum", "Class"), pch = c(19, 2))

Now collect all species richness of all subordinate taxa in Animalia.

data_m <- subset(data_m, Kingdoms == "Animalia")
dim(data_m)
richness <- function(rank) {
  length(spetaxalist(data_m, rank))
}

ranks <- c("Phylum", "Class", "Order", "Family", "Genus")

allrichness <- unlist(lapply(ranks, richness))
forx <- seq(1:length(allrichness))
model <- lm(log(log(allrichness)) ~ forx)
summary(model)

The visulization of species richness of subordinate taxa in Animalia.

plot(log(log(allrichness)) ~ forx, ylab = "log10(log10(species richness))", xlab = "", pch = 19, ylim = c(1, 2.5))
abline(model, col = "red", lwd = 3, lty = 2)
title("Species richness of subordinate taxa in Animalia")
text(1.2, 1.4, "Phylum", cex = 0.8)
text(2.2, 1.6, "Class", cex = 0.8)
text(3.2, 1.8, "Order", cex = 0.8)
text(4.2, 1.9, "Family", cex = 0.8)
text(4.9, 2.0, "Genus", cex = 0.8)

Try Mollusca, one of the most well studies phylum in the dataset.

topmollusca <- topn("Mollusca", "Class", 10)
topmollusca #it appears there are only 8 classes of mollusca in the dataset

Visualise the discovery curves of each class in Mollusca.

df <- subset(data_m, Phyla == "Mollusca")
classes_mollusca <- unique(df$Classes)
taxamodel("Gastropoda", "Species","logistic")
taxamodel("Bivalvia", "Species","logistic")
taxamodel("Cephalopoda", "Species","logistic")
taxamodel("Polyplacophora", "Species","logistic")
taxamodel("Solenogastres", "Species", "logistic")
taxamodel("Monoplacophora", "Species","logistic")
taxa <- c("Gastropoda", "Bivalvia", "Cephalopoda", "Polyplacophora", "Solenogastres")
ranks <- rep("Species", length(taxa))
strLines <- mapply(taxa_rich, taxa, ranks)
prediction <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][1]
}
preds <- as.numeric(unlist(lapply(strLines, prediction)))
actual <- function(strLine) {
  str_extract_all(strLine, "\\(?[0-9]+\\)?")[[1]][2]
}
actuals <- as.numeric(unlist(lapply(strLines, actual)))
ndf <- data.frame(taxa, actuals, preds)

x11(width=5, height=5)

plot(actuals, preds, xlab = "Predicted number of species", ylab = "Actual number of species", pch = 2)
title("Species richness of classes in Mollusca")
abline(0,1, lty = 2)


SCAR/biotaxa documentation built on May 20, 2019, 3:05 p.m.