knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE)
# For avoiding long waiting time, read already saved/cached objects
nest <- readRDS(file = "./man/cache/explain-sampling-method-nest-1.rds")
# Install bootstrapnet if not already done:
# install.packages("devtools")
# devtools::install_github("valentinitnelav/bootstrapnet")
library(bootstrapnet)
library(bipartite)
library(dplyr)
library(data.table)
library(ggplot2)
library(animation)
library(patchwork)

library(filesstrings)

data(Safariland)
matrix_plot <- function(data){
  ggplot(data = data,
         aes(x = Insects,
             y = Plants)) + 
    geom_raster(aes(fill = counts_fct)) +
    scale_fill_manual(values = "gray", na.value = "transparent") +
    geom_text(aes(label = counts),
              size = 3) +
    coord_fixed() +
    theme_bw() +
    theme(axis.text.x = element_text(size = 9, angle = 90, hjust = 1, vjust = 0.3),
          axis.text.y = element_text(size = 9),
          legend.position = "none")
}

matrix_plot_sampled <- function(data){
  data %>% 
    count(lower, higher) %>% 
    rename(Plants = lower,
           Insects = higher) %>% 
    full_join(x = saf_long, y = ., by = c("Plants", "Insects")) %>% 
    mutate(counts = n,
           counts_fct = ifelse(is.na(n), yes = NA, no = "red"),
           Plants = factor(Plants, levels = levels(saf_long$Plants)),
           Insects = factor(Insects, levels = levels(saf_long$Insects))) %>% 
    matrix_plot() +
    theme(axis.title = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank())
}

plot_nest <- function(df_nest){
  ggplot(data = df_nest,
         aes(x = n_interctions,
             y = nestedness)) +
    geom_point() +
    geom_line() +
    scale_x_continuous(breaks = seq(from = 100, to = 1200, by = 100),
                       limits = c(100, 1150)) +
    scale_y_continuous(breaks = seq(from = 15, to = 35, by = 5),
                       limits = c(19, 35)) +
    theme_bw()
}

One iteration

Explain

Consider the Safariland web from the bipartite package, which looks like this:

saf_long <- reshape2::melt(Safariland) %>%
  rename(Plants = Var1,
         Insects = Var2,
         counts = value) %>% 
  filter(counts != 0) %>% 
  mutate(counts_fct = "gray",
         Plants = reorder(Plants, desc(Plants)))

matrix_plot(saf_long)
# visweb(Safariland, labsize = 4, text = "interaction", textsize = 2)
# kable(Safariland)

There are r sum(Safariland) total interactions in the web. We can draw the first start sample of, say, 100 random interactions without replacement.

dt <- Safariland %>% web_matrix_to_df() %>% setDT()
ids_lst <- bootstrapnet:::sample_indices(data = dt, start = 100, step = 50, seed = 666)

Side note: for a start value of 100 interactions and a step of 50, the sampling procedure splits the r sum(Safariland) interactions into r length(ids_lst) sub-webs (first one contains 100 interactions, then each subsequent one contains approximatively 50 more sampled interactions on top of the previous one).

So, with the start sample of 100 interactions we can form this web below (Fig 2). All names of plants and insects are kept for easy visual comparison with the entire web from above (Fig 1).

# dt[ids_lst[[1]], table(lower, higher)] %>%
#   visweb(labsize = 1, text = "interaction", textsize = 2)
dt[ids_lst[[1]]] %>% 
  count(lower, higher) %>% 
  rename(Plants = lower,
         Insects = higher) %>% 
  full_join(x = saf_long, y = ., by = c("Plants", "Insects")) %>% 
  mutate(counts = n,
         counts_fct = ifelse(is.na(n), yes = NA, no = "red"),
         Plants = factor(Plants, levels = levels(saf_long$Plants)),
         Insects = factor(Insects, levels = levels(saf_long$Insects))) %>% 
  matrix_plot()

Adding about 50 more sampled interactions to the previous one, we get the second sub-web, which looks like this (Fig. 3):

# dt[ids_lst[[2]], table(lower, higher)] %>% 
#   visweb(labsize = 1, text = "interaction", textsize = 2)
dt[ids_lst[[2]]] %>% 
  count(lower, higher) %>% 
  rename(Plants = lower,
         Insects = higher) %>% 
  full_join(x = saf_long, y = ., by = c("Plants", "Insects")) %>% 
  mutate(counts = n,
         counts_fct = ifelse(is.na(n), yes = NA, no = "red"),
         Plants = factor(Plants, levels = levels(saf_long$Plants)),
         Insects = factor(Insects, levels = levels(saf_long$Insects))) %>% 
  matrix_plot()

Note that, in the second sub-web we managed to sample new species by sampling 50 more interactions:

And we keep on sampling without replacement until we sample the entire Safariland. For each sub-web we can compute a network index, say 'nestedness'. So, we get a vector of r length(ids_lst) values, the last one corresponding to the entire network.

metric_lst <- vector(mode = "list", length = length(ids_lst))
for (i in 1:length(ids_lst)){
  metric_lst[[i]] <- try({
    web <- dt[ids_lst[[i]], table(lower, higher)]
    set.seed(42)
    bipartite::networklevel(web = web, index = "nestedness", level = "both")
  })
}
# Prepare results
df_nest <- metric_lst %>%
  lapply(rbind) %>%
  lapply(as.data.frame) %>%
  data.table::rbindlist()
df_nest$n_interctions <- ids_lst %>% sapply(length)

ggplot(data = df_nest,
       aes(x = n_interctions,
           y = nestedness)) +
  geom_point() +
  geom_line() +
  theme_bw()

Animation

Below is an animation of the sampling method (one iteration):

safariland_gg <- matrix_plot(saf_long) +
  scale_x_discrete(position = "top") +
  theme(axis.text.x = element_text(size = 9, angle = 90, hjust = 0),
        axis.title = element_blank())

saveGIF({
  for (i in 1:length(ids_lst)) {
    safariland_sample_gg <- dt[ids_lst[[i]]] %>% matrix_plot_sampled()

    nest_gg <- plot_nest(df_nest[1:i]) +
      coord_fixed(ratio = 20)

    wrap_plots(safariland_gg, safariland_sample_gg, nest_gg,
               ncol = 1) %>% 
      plot()
  }
},
movie.name = "sample-nestedness-1-boot.gif",
ani.width = 600, ani.height = 800,
interval = 0.5)

file.move(files = "./sample-nestedness-1-boot.gif",
          destinations = "./man/cache",
          overwrite = TRUE)

Multiple iterations

If we repeat this sampling procedure n times, then we get n lines. We can then compute an average line with 95% quantile based confidence intervals as in Fig. 5 below.

nest <- list(web = Safariland) %>% 
  lapply(web_matrix_to_df) %>%
  boot_networklevel(col_lower = "lower", # column name for plants
                    col_higher = "higher", # column name for insects
                    index = "nestedness",
                    level = "both", # here, nestedness is not affected by level
                    start = 100,
                    step = 50,
                    n_boot = 10,
                    n_cpu = 2)

saveRDS(nest, file = "./man/cache/explain-sampling-method-nest-1.rds")
gg_networklevel(nest)$nestedness +
  labs(x = "n_interctions") +
  theme_bw() +
  theme(legend.position = "none")

Such accumulation/rarefaction curves allow comparison of networks/webs with different number of interactions. Ideally the indices/metrics will be compared if the curves display a trend of reaching an asymptote. That means that if we keep on investing effort to sample interactions (observe plant-pollinator in the field) we will not gain much further information, so network comparison is already possible.



valentinitnelav/bootstrapnet documentation built on June 5, 2024, 3:21 p.m.