Nothing
## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
opts_chunk$set(echo = TRUE)
library(vegan)
library(dplyr)
library(tidyr)
library(ggplot2)
library(dggridR)
set.seed(19)
## ----bt_package_loading, warning=FALSE, message=FALSE, eval=TRUE--------------
# Install and load the latest version of BioTIMEr
library(BioTIMEr)
## ----data_description, warning=FALSE, message=FALSE, eval=TRUE, include=TRUE----
# you can run the following commands to retrieve more information about the subsets.
?BTsubset_meta
?BTsubset_data
## ----package info, warning=FALSE, message=FALSE, eval=TRUE, include=TRUE------
# you can also see a full list of BioTIMEr functions and help pages by:
??BioTIMEr
## ----gridding_ex, cache=TRUE, echo=TRUE, message=FALSE, tidy=FALSE, include=TRUE----
grid_samples <- gridding(meta = BTsubset_meta, btf = BTsubset_data, res = 12, resByData = FALSE)
# Get a look at the output
# grid_samples |> head() |> kable()
## ----gridding_ex_plot, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=TRUE----
check_res_1 <- grid_samples |>
group_by(STUDY_ID, StudyMethod) |>
summarise(n_cell = n_distinct(cell), n_aID = n_distinct(assemblageID), res = "res12")
check_res_1 |> head(10) |> kable()
# How many samples were there in each study?
ggplot(data = check_res_1) +
geom_bar(mapping = aes(x = as.character(STUDY_ID), y = n_aID, fill = res),
stat = "identity") +
scale_fill_discrete(type = c("#155f49")) +
xlab("StudyID") + ylab("Number of assemblages in a study") +
theme_bw() +
theme(legend.position = "none")
## ----gridding_ex2, cache=TRUE, echo=TRUE, message=FALSE, tidy=FALSE, include=TRUE,results='hold'----
# define an alternative resolution of 14 (~10.7 km2 cells)
grid_samples_14 <- gridding(meta = BTsubset_meta, btf = BTsubset_data, res = 14, resByData = FALSE)
# allow the spatial extent of the data to define the resolution
grid_samples_auto <- gridding(meta = BTsubset_meta, btf = BTsubset_data, res = 12, resByData = TRUE)
# this option also returns a message with the automatically picked resolution:
## ----gridding_ex3, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=TRUE----
check_res_2 <- grid_samples_14 |>
group_by(StudyMethod, STUDY_ID) |>
summarise(n_cell = n_distinct(cell), n_aID = n_distinct(assemblageID), res = "res14")
check_res_3 <- grid_samples_auto |>
group_by(StudyMethod, STUDY_ID) |>
summarise(n_cell = n_distinct(cell), n_aID = n_distinct(assemblageID), res = "res15")
checks <- rbind(check_res_1, check_res_2, check_res_3)
ggplot(data = checks) +
geom_bar(mapping = aes(x = as.character(STUDY_ID), y = n_aID, fill = res),
stat = "identity", position = "dodge") +
scale_fill_discrete(type = c("#155f49","#66c1d1","#d9d956")) +
xlab("StudyID") + ylab("Number of assemblages in a study") +
theme_bw()
## ----gridding_map, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=TRUE,results='hold', figures='hold'----
## The following example is built on the demonstrations in
## https://cran.r-project.org/web/packages/dggridR/vignettes/dggridR.html.
# First we build the ~96 km2 global grid
dgg_12 <- dggridR::dgconstruct(res = 12)
# To simplify, we only map the grid cell boundaries for cells which
# have observations.
# NOTE: if you are working with the full BioTIME database, this step may take some time.
grid_12 <- dggridR::dgcellstogrid(dgg_12, grid_samples$cell)
# Now let's follow the same steps and build a ~10.7 km2 global grid:
dgg_14 <- dggridR::dgconstruct(res = 14)
grid_14 <- dggridR::dgcellstogrid(dgg_14, grid_samples_14$cell)
# And we get some polygons for each country of the world, to create a background:
countries <- ggplot2::map_data("world")
# Now you could map the whole world, but let's just zoom in the UK and have a look at
# STUDY 466 (Marine Fish):
map_uk_locations <- ggplot() +
geom_polygon(data = countries, aes(x = long, y = lat, group = group), fill = NA, color = "grey") +
geom_sf(data = grid_12, aes(), color = alpha("blue", 0.4)) +
coord_sf(xlim = c(-20, 10), ylim = c(50, 60)) +
geom_rect(aes(xmin = -11, xmax = -0.7, ymin = 57.2, ymax = 59), colour = "red", fill = NA) +
labs(x = NULL, y = NULL) +
theme_bw() + theme(text = element_text(size = 8))
zoom_in_map <- ggplot() +
geom_polygon(data = countries, aes(x = long, y = lat, group = group), fill = NA,
color = "grey") +
geom_sf(data = grid_12, aes(), color = alpha("blue", 0.4)) +
geom_sf(data = grid_14, aes(), color = alpha("red", 0.4)) +
coord_sf(xlim = c(-11, -0.7), ylim = c(57.2, 59)) +
theme_bw()
grid::grid.newpage()
main <- grid::viewport(width = 1, height = 1, x = 0.5, y = 0.5) # the main map
inset <- grid::viewport(width = 0.4, height = 0.4, x = 0.82, y = 0.45) # the inset in bottom left
# The resulting distribution of different size cells appears as follows:
print(zoom_in_map, vp = main)
print(map_uk_locations, vp = inset)
## ----resampling_ex, cache=TRUE, echo=TRUE, message=FALSE, tidy=FALSE, include=TRUE----
# First, if you are not sure you need this step,
# you can always check how many samples there are in every year of the different time series:
check_samples <- grid_samples |>
group_by(STUDY_ID, assemblageID, YEAR) |>
summarise(n_samples = n_distinct(SAMPLE_DESC), n_species = n_distinct(Species))
check_samples |> head(10) |> kable()
## ----resampling_ex1, cache=TRUE, echo=TRUE, message=FALSE, tidy=FALSE, include=TRUE----
# Let's apply resampling() then, using the data frame of the gridded data:
grid_rare <- resampling(x = grid_samples, measure = "ABUNDANCE",
resamps = 1, conservative = FALSE)
## ----tests1, cache=TRUE, echo=TRUE, message=FALSE, tidy=TRUE, include=FALSE----
#let's apply it then:
#g rid_samples_temp <- subset(grid_samples, !grid_samples$BIOMASS==0) #to be deleted after Faye reviews the data and makes all 0=NAs
# grid_rare <- resampling( x= grid_samples_test, measure ="BIOMASS", resamps = 1, conservative = FALSE)
## ----resampling_ex12, cache=TRUE, echo=TRUE, message=FALSE, tidy=TRUE, include=TRUE----
# Keep only observations with both abundance and biomass
grid_rare_ab <- resampling(x = grid_samples, measure = c("ABUNDANCE", "BIOMASS"),
resamps = 1, conservative = FALSE)
# Keep only sampling events where all observations within had both abundance and biomass to start with
grid_rare_abT <- resampling(x = grid_samples, measure = c("ABUNDANCE", "BIOMASS"),
resamps = 1, conservative = TRUE)
## ----resampling_ex3, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=TRUE,results='hold', figures='hold'----
# What is the number of samples in the year with the lowest sampling effort?
ggplot(data = check_samples[check_samples$assemblageID == "18_335699",], aes(x = YEAR, y = n_samples)) +
geom_col(aes(x = YEAR, y = n_samples), fill = "red", alpha = 0.5) +
geom_segment(aes(x = 1926, y = min(n_samples) + 3,
xend = 1927, yend = min(n_samples)),
arrow = arrow(length = unit(0.2, "cm"))) +
xlab("Year") + ylab("Number of samples") +
theme_bw()
# In this case,the year 1927 had the lowest sampling effort, with 3 samples (arrow).
## ----resampling_ex4, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=TRUE,results='hold', figures='hold'----
# Let's implement the sample-based rarefaction by resampling the dataset 10 times.
grid_rare_n10 <- resampling(x = grid_samples, measure = "ABUNDANCE", resamps = 10,
conservative = FALSE)
# Note that you may want to resample many more times (e.g. at least 30-100+ times, but up to 199
# if e.g. working with the whole BioTIME data), depending on how many iterations you want a
# subsequent bootstrap analysis to have.
# This may also take some computation time, so if you are working with a big subset of BioTIME
# is advisable to break it down in smaller subsets.
# Each resampling iteration will be identified as resamp = 1:n, in this case 1:10.
# Now we can check if there are differences across the first 3 of these iterations:
check_resamps <- grid_rare_n10[grid_rare_n10$resamp < 4,] |>
group_by(STUDY_ID, assemblageID, resamp) |>
summarise(n_obs = n(), n_species = n_distinct(Species), n_year = n_distinct(YEAR))
check_resamps |> head(10) |> kable()
## ----metrics, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=TRUE, figures='hold'----
# Get alpha metrics estimates:
alpha_metrics <- getAlphaMetrics(x = grid_rare, measure = "ABUNDANCE")
# see also help("getAlphaMetrics") for more details on the metrics
# Have a quick look at the output
alpha_metrics |> head(6) |> kable()
# Get beta metrics estimates:
beta_metrics <- getBetaMetrics(x = grid_rare, measure = "ABUNDANCE")
#see also help("getBetaMetrics") for more details on the metrics
# Have a quick look at the output
beta_metrics |> head(6) |> kable()
# NOTE the functions used the rarefied data with only one resampling iteration
## ----tests2, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=FALSE, figures='hold'----
# # Get alpha metrics estimates:
# alpha_metrics <- getAlphaMetrics(x = grid_rare_ab, measure = "BIOMASS")
# #see also help("getAlphaMetrics") for more details on the metrics
#
# #Have a quick look at the output
# alpha_metrics |> head(6) |> kable()
#
# # Get beta metrics estimates:
# beta_metrics <- getBetaMetrics(x = grid_rare_ab, measure = "BIOMASS")
# #see also help("getBetaMetrics") for more details on the metrics
#
# #Have a quick look at the output
# beta_metrics |> head(6) |> kable()
## ----trends, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=3,tidy=FALSE, include=TRUE,results='hold', figures='hold'----
# Let's apply it then:
alpha_slopes <- getLinearRegressions(x = alpha_metrics, divType = "alpha",
pThreshold = 0.05) #for alpha metrics
# Have a quick look at the output
alpha_slopes |> head(6) |> kable()
beta_slopes <- getLinearRegressions(x = beta_metrics, divType = "beta",
pThreshold = 0.05) #for beta metrics
# Have a quick look at the output
# beta_slopes |> head(6) |> kable()
## ----trends2, cache=TRUE, echo=TRUE, message=FALSE, fig.width=10, fig.height=7,tidy=FALSE, include=TRUE----
# First, how many assemblages in our dataset show a moderate evidence (P < 0.05) of change in alpha diversity?
check_alpha_trend <- alpha_slopes |>
group_by(metric) |>
filter(significance == 1) |> # or use filter(pvalue<0.05)
summarise(n_sig = n_distinct(assemblageID), mean(slope))
check_alpha_trend |> kable()
# We can see that only a few (<40) of the assemblage time series actually show a significant
# trend of change over time, independently of the metric used. This indicates that in most
# time series in the studies we analysed alpha diversity is not really changing through time.
## ----trends3, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7.2, fig.height=5,tidy=FALSE, include=TRUE,results='hold'----
# Get a slope per assemblageID and metric
alpha_slopes_simp <- alpha_slopes |>
group_by(assemblageID, metric, pvalue) |>
filter(significance == 1) |> #select only the assemblages with significant trends
summarise(slope = unique(slope))
# Calculate the mean slope and CIs
stats_alpha <- alpha_slopes_simp |>
group_by(metric) |>
summarise(mean_slope = mean(slope), #mean
ci_slope = qt(0.95, df = length(slope) - 1) * (sd(slope, na.rm = TRUE) / sqrt(length(slope)))) #margin of error
# Let's put it all together
ggplot(data = alpha_slopes_simp) +
geom_histogram(aes(x = slope, fill = metric), bins = 25) +
#geom_density(alpha=0.5)+ #in case you what a density plot instead
geom_vline(aes(xintercept = 0), linetype = 3, colour = "grey",linewidth = 0.5) + #add slope=0 line
geom_vline(data = stats_alpha, aes(xintercept = mean_slope), linetype = 1,
linewidth = 0.5,colour = "black") + #mean
geom_vline(data = stats_alpha, aes(xintercept = mean_slope - ci_slope),
linetype = 2, linewidth = 0.5, colour = "black") + #lower confidence interval
geom_vline(data = stats_alpha, aes(xintercept = mean_slope + ci_slope),
linetype = 2, linewidth = 0.5, colour = "black") + #upper confidence interval
facet_wrap(~metric, scales = "free") +
scale_fill_biotime() + #using the customize BioTIME colour scale. See help("scale_color_biotime") for more options.
ggtitle("Alpha diversity change") +
theme_bw() +
theme(legend.position = "none",plot.title = element_text(size = 11, hjust = 0.5))
## ----trends4, cache=TRUE, echo=FALSE, message=FALSE, fig.width=7.2, fig.height=1.9,tidy=FALSE, include=TRUE,results='hold'----
# Get a slope per assemblageID and metric
beta_slopes_simp <- beta_slopes |>
group_by(assemblageID, metric, pvalue) |>
filter(significance == 1) |> #select only the assemblages with significant trends
summarise(slope = unique(slope))
# Calculate the mean slope and CIs
stats_beta <- beta_slopes_simp |>
group_by(metric) |>
summarise(mean_slope = mean(slope), #mean
ci_slope = qt(0.95, df = length(slope) - 1) * (sd(slope, na.rm = TRUE) / sqrt(length(slope)))) #margin of error
#Let's put it all together
ggplot(data = beta_slopes_simp) +
geom_histogram(aes(x = slope, fill = metric), bins = 25) +
#geom_density(alpha=0.5)+ #in case you what a density plot instead
geom_vline(aes(xintercept = 0), linetype = 3, colour = "grey", linewidth = 0.5) + #add slope=0 line
geom_vline(data = stats_beta, aes(xintercept = mean_slope), linetype = 1,
linewidth = 0.5, colour = "black") + #mean
geom_vline(data = stats_beta, aes(xintercept = mean_slope - ci_slope),
linetype = 2, linewidth = 0.5, colour = "black") + #lower confidence interval
geom_vline(data = stats_beta, aes(xintercept = mean_slope + ci_slope),
linetype = 2, linewidth = 0.5, colour = "black") + #upper confidence interval
facet_wrap(~metric, scales = "free") +
scale_fill_biotime() + #using the customize BioTIME colour scale. See help("scale_color_biotime") for more options.
ggtitle("Beta diversity change") +
theme_bw() +
theme(legend.position = "none",plot.title = element_text(size = 11, hjust = 0.5))
## ----trends5, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7.2, fig.height=1.9,tidy=FALSE, include=TRUE,results='hold'----
# Hint: If you wish to plot all metrics together, simply merge you alpha_slopes and
# beta_slopes dataframes beforehand.
## ----trends6, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7.2, fig.height=2.3,tidy=FALSE, include=TRUE,results='hold'----
# First, we need to check the meta file and retrieve the information for the studies of interest
#head(BTsubset_meta)
meta <- select(BTsubset_meta, STUDY_ID, TAXA, REALM, CLIMATE)
# Get a slope per assemblageID and metric
alpha_slopes_simp <- alpha_slopes |>
group_by(assemblageID, metric, 'pvalue', significance) |>
summarise(slope = unique(slope))
# Get back the Study ID by separating our assemblageID column into multiple other columns.
alpha_slopes_simp <- alpha_slopes_simp |>
separate_wider_delim(
cols = assemblageID,
names = c("STUDY_ID", "cell"),
delim = "_",
cols_remove = FALSE
) |>
as.data.frame()
# Merge it all
alpha_slopes_meta <- merge(alpha_slopes_simp, meta, by = "STUDY_ID")
# Select relevant data for plotting
alpha_slopes_set1 <- subset(
alpha_slopes_meta,
alpha_slopes_meta$metric == "Shannon" &
(alpha_slopes_meta$TAXA == "Fish" | alpha_slopes_meta$TAXA == "Birds")
)
# Let's put it all together
ggplot(data = alpha_slopes_set1, aes(x = slope, fill = TAXA)) +
geom_histogram(fill = "grey", bins = 25) + #all assemblages
geom_histogram(
data = alpha_slopes_set1[alpha_slopes_set1$significance == 1, ],
bins = 25
) + #only significant change assemblages
geom_vline(
aes(xintercept = 0),
linetype = 3,
colour = "black",
linewidth = 0.5
) + #add slope=0 line
facet_wrap(~TAXA, scales = "free") +
scale_fill_biotime() +
ggtitle("Shannon-Weiner Species Diversity Index") +
theme_bw() +
theme(plot.title = element_text(size = 11, hjust = 0.5))
## ----trends7, cache=TRUE, echo=TRUE, message=FALSE, fig.width=7.2, fig.height=1.9, include=TRUE,results='hold'----
# Now we see the full distribution of slopes of change for the Shannon index for the birds
# and fish time series in our subset: all slopes are shown in grey, while in color we show
# the subset of assemblages for which evidence (P < 0.05) of change was detected.
## ----trends8, cache=TRUE, echo=TRUE, message=FALSE, tidy=FALSE, include=TRUE,fig.width=7.2, fig.height=5, eval=TRUE,results='hold', figures='hold'----
# Let's go back to the data frame with the yearly values and select our relevant data for plotting
alpha_metrics_set <- subset(alpha_metrics, assemblageID == "18_335699")
# Transform data
alpha_metrics_set_long <- pivot_longer(alpha_metrics_set,
cols = c("S", "N", "maxN", "Shannon", "expShannon",
"Simpson", "invSimpson", "PIE", "DomMc"),
names_to = "metric",
names_transform = as.factor)
# Get assemblage ID slope of change
alpha_slopes_set2 <- subset(alpha_slopes, assemblageID == "18_335699")
# Get assemblage ID
name_assemblage <- unique(alpha_slopes_set2$assemblageID)
# Merge info
alpha_metr_slop<- left_join(alpha_slopes_set2, alpha_metrics_set_long,
join_by(assemblageID, metric))
# Let's put it all together
ggplot(data = alpha_metr_slop, aes(x = YEAR, y = value)) +
geom_point(colour = "#155f49", size = 1) + #plot the yearly estimates
stat_smooth(method = "lm", se = FALSE, linetype = 2, colour = "grey") + #draw all regression line
stat_smooth(data = subset(alpha_metr_slop, alpha_metr_slop$significance == 1),
aes(x = YEAR, y = value), method = "lm", se = FALSE,
linetype = 2, colour = "#155f49") + #color only trends that are significant
facet_wrap(~metric, scales = "free") +
scale_fill_biotime() +
ggtitle(paste("Assemblage", name_assemblage)) + ylab("Diversity") +
theme_bw() + theme(plot.title = element_text(size = 11, hjust = 0.5))
## ----trends9, cache=TRUE, echo=TRUE, message=FALSE, fig.width=10, fig.height=7,tidy=FALSE, include=FALSE,results='hold', figures='hold'----
# Hint: If you want to draw the p-value on the plot, you can try using the #ggpmisc::stat_fit_glance which takes anything passed through lm() in R and
#allows it to processed and printed using ggplot2.
## ----citation, cache=TRUE, echo=FALSE, message=FALSE, fig.width=10, fig.height=7,tidy=FALSE, include=FALSE,results='hold', figures='hold'----
citation("BioTIMEr")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.