#' @title
#' Prepare for plotting the bootstrapped network level metric results of one or
#' multiple networks.
#'
#' @description
#' Takes a list of network interactions (each interaction being repeated as
#' many times as it was observed), and bootstraps the network level metric
#' (`index`) for each network. Runs `boot_networklevel_n()` for a list of
#' network interactions and prepares the data for plotting with `ggplot`. The
#' output list can be passed to \code{\link[bootstrapnet]{gg_networklevel}}.
#' See examples below.
#'
#' @param lst
#' A list of one or multiple data frames of interactions from which to build
#' and sample web matrices. Each interaction (row in the data frame) must be
#' repeated as many times as it was observed. E.g. if the interaction species_1
#' x species_2 was observed 5 times, then repeat that row 5 times within the
#' data frame.
#'
#' @param col_lower
#' Quoted column name in `data` for lower trophic level species (plants).
#'
#' @param col_higher
#' Quoted column name in `data` for higher trophic level species (insects).
#'
#' @param index
#' The name of the network level metric. Passed to
#' \code{\link[bipartite]{networklevel}}. See `?bipartite::networklevel` for
#' details.
#'
#' @param level
#' For which level should the level-specific indices be computed: 'both'
#' (default), 'lower' or 'higher'? Passed to
#' \code{\link[bipartite]{networklevel}}. See `?bipartite::networklevel` for
#' details.
#'
#' @param start
#' Integer. The sample size (number of interactions) to start the bootstrap
#' with. If the start sample size is small (e.g. 5 or 10), then first
#' iterations might results in NaN-s and warning messages are displayed.
#' Consider to set `start` to maybe 10\\% of your total unique interactions.
#'
#' @param step
#' Integer. Sample size (number of interactions) used to increase gradually the
#' sampled network until all interactions are sampled. If `step` is too small
#' (e.g. 1) then the computation time is very long depending on your total
#' number of interactions from which samples are taken. Consider to set `step`
#' to maybe 5-10\\% of your total unique interactions.
#'
#' @param n_boot
#' Number of desired bootstraps (50 or 100 can be enough).
#'
#' @param n_cpu
#' Number of CPU-s to use for parallel processing.
#'
#' @param probs
#' A numeric vector of two probabilities in `[0, 1]`. Passed to
#' \code{\link[matrixStats]{rowQuantiles}} and used for building the lower and
#' upper bounds of the confidence intervals. Defaults to `c(0.025, 0.975)`,
#' which corresponds to a 95\\% confidence interval.
#'
#' @param ...
#' Other arguments passed to \code{\link[bipartite]{networklevel}} like
#' `logbase`, etc.
#'
#' @return
#' A list of one (when `level = 'lower'` or `level = 'higher'`) or two
#' sub-lists (when `level = 'both'`). The list can be passed to
#' \code{\link[bootstrapnet]{gg_networklevel}}. Each sub-list, contains two
#' data frames: `stats_df` and `lines_df`, which can be used by the
#' `ggplot2::geom_line()` function. See the return section of
#' \code{\link[bootstrapnet]{get_stats_single}} for more details about
#' `stats_df` and `lines_df` data frames.
#'
#' @examples
#'
#' library(bootstrapnet)
#' library(bipartite)
#' library(magrittr)
#' data(Safariland)
#'
#' set.seed(321)
#' Safariland_1 <- Safariland[, sort(sample.int(ncol(Safariland), 10))]
#' set.seed(123)
#' Safariland_2 <- Safariland[, sort(sample.int(ncol(Safariland), 10))]
#'
#' lst <- list(s1 = Safariland_1, s2 = Safariland_2) %>%
#' lapply(web_matrix_to_df) %>%
#' boot_networklevel(col_lower = "lower", # column name for plants
#' col_higher = "higher", # column name for insects
#' index = "nestedness",
#' level = "both",
#' start = 10,
#' step = 10,
#' n_boot = 10,
#' n_cpu = 2)
#' gg_networklevel(lst)
#'
#' @export
#'
#' @md
boot_networklevel <- function(lst,
col_lower,
col_higher,
index,
level,
start,
step,
n_boot,
n_cpu,
probs = c(0.025, 0.975),
...) {
webs_stats <- vector(mode = "list", length = length(lst))
names(webs_stats) <- names(lst)
for (i in 1:length(webs_stats)){
webs_stats[[i]] <- lst[[i]] %>%
boot_networklevel_n(col_lower = col_lower,
col_higher = col_higher,
index = index,
level = level,
start = start,
step = step,
n_boot = n_boot,
n_cpu = n_cpu,
...) %>%
lapply(FUN = get_stats_single, probs = probs)
}
webs_stats %>%
get_stats_multi() %>%
return()
}
#' @title
#' Bootstrap network level metric multiple times.
#'
#' @description
#' Bootstrap a single network of interactions n times in parallel and collects
#' the network level metrics. Starts with a small sample size of interactions
#' (e.g. `start = 30`), builds the corresponding web matrix/network, computes
#' its metric (e.g. `index = "nestedness"`) using
#' \code{\link[bipartite]{networklevel}}, then adds new interactions (e.g.
#' `step = 20`) until all interactions are sampled. The last sample is actually
#' the entire network. Repeats n times (as given in `n_boot`) these steps in
#' parallel on multiple CPUs.
#'
#' @param data
#' Data frame of interactions from which to build and sample web matrices. Each
#' interaction (row in the data frame) must be repeated as many times as it was
#' observed. E.g. if the interaction species_1 x species_2 was observed 5
#' times, then repeat that row 5 times within the data frame. See examples
#' below.
#'
#' @param col_lower
#' Quoted column name in `data` for lower trophic level species (plants).
#'
#' @param col_higher
#' Quoted column name in `data` for higher trophic level species (insects).
#'
#' @param index
#' Passed to \code{\link[bipartite]{networklevel}}. See
#' `?bipartite::networklevel` for details.
#'
#' @param level
#' Passed to \code{\link[bipartite]{networklevel}}. See
#' `?bipartite::networklevel` for details. For which level should the
#' level-specific indices be computed: 'both' (default), 'lower' or 'higher'?
#'
#' @param start
#' Integer. The sample size (number of interactions) to start the bootstrap
#' with. If the start sample size is small (e.g. 5 or 10), then first
#' iterations might results in NaN-s and warning messages are displayed.
#' Consider to set `start` to maybe 10\\% of your total unique interactions.
#'
#' @param step
#' Integer. Sample size (number of interactions) used to increase gradually the
#' sampled network until all interactions are sampled. If `step` is too small
#' (e.g. 1) then the computation time is very long depending on your total
#' number of interactions from which samples are taken. Consider to set `step`
#' to maybe 5-10\\% of your total unique interactions.
#'
#' @param n_boot
#' Number of desired bootstraps (50 or 100 can be enough).
#'
#' @param n_cpu
#' Number of CPU-s to use for parallel processing.
#'
#' @param ...
#' Other arguments passed to \code{\link[bipartite]{networklevel}} like
#' `logbase`, etc.
#'
#' @return
#' Returns a list of a single matrix or a list of two matrices depending on the
#' provided `index` metric and `level` value. For example if
#' `index='nestedness'` and `level='both'`, then it returns a list of a single
#' matrix. But in the case of `index='niche overlap'`, it returns a list of two
#' matrices, first matrix (`niche.overlap.HL`) corresponding to the higher level
#' (e.g. insects), and the second (`niche.overlap.LL`) for the lower level (e.g.
#' plants). The number of rows of a matrix indicates how many iterations took
#' place. This is decided internally based on the given values to `start`,
#' `step` and the total number of rows (interactions) in `data`.The row names
#' give the sample size at each iteration. The last iteration (last row name) is
#' always the entire network (total number of interactions in `data`). The
#' number of columns corresponds to `n_boot` (number of bootstraps).
#'
#' @examples
#'
#' library(bootstrapnet)
#' library(magrittr)
#' library(bipartite)
#' data(Safariland)
#'
#' Safariland %>%
#' web_matrix_to_df() %>%
#' boot_networklevel_n(col_lower = "lower", # column name for plants
#' col_higher = "higher", # column name for insects
#' index = "niche overlap",
#' level = "both",
#' start = 100,
#' step = 100,
#' n_boot = 10,
#' n_cpu = 2)
#'
#' @import data.table
#'
#' @importFrom foreach foreach %dopar% registerDoSEQ
#' @importFrom parallel splitIndices makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom iterators iter
#'
#' @export
#'
#' @md
boot_networklevel_n <- function(data,
col_lower,
col_higher,
index,
level,
start,
step,
n_boot,
n_cpu,
...){
test_data(data, col_lower, col_higher)
cls_data <- class(data)
if (! "data.table" %in% cls_data) data.table::setDT(data)
test_index_networklevel(index)
test_level_value(level)
test_data_species_names(data, col_lower, col_higher)
# Get sample sizes. Row names of the resulting bootstrap matrices will carry
# information about the sample size at each iteration/bootstrap step. This is
# run also before the parallel processing initiation because it can throw
# error messages if the start and step values are not adequate. No need to
# initiate parallel processing for something that will fail.
iter_spl_size <- sample_indices(data = data, start = start, step = step, seed = 42) %>%
sapply(length)
{ # Start parallel processing
chunks <- parallel::splitIndices(n_boot, n_cpu)
cl <- parallel::makeCluster(n_cpu)
doParallel::registerDoParallel(cl)
i <- NULL # to avoid 'Undefined global functions or variables: i' in R CMD check
boot_lst <-
foreach::foreach(i = iterators::iter(chunks),
.errorhandling = 'pass',
.packages = c("magrittr",
"data.table",
"bipartite"),
.export = c("boot_networklevel_once",
"split_in_chunks",
"sample_indices")) %dopar% {
lapply(i, FUN = function(x) # note the lapply!
boot_networklevel_once(data = data,
col_lower = col_lower,
col_higher = col_higher,
index = index,
level = level,
start = start,
step = step,
seed = x,
...)
)
}
parallel::stopCluster(cl)
remove(cl)
foreach::registerDoSEQ()
} # End of parallel processing
boot_lst <- unlist(boot_lst, recursive = FALSE)
metric_names <- names(boot_lst[[1]])
n <- length(metric_names)
if (n == 1) {
results <- get_list_of_matrices(boot_lst, metric_names, n, iter_spl_size)
# Convert data back to data frame if applicable
if (! "data.table" %in% cls_data) data.table::setDF(data)
return(results)
} else if (n == 2) {
results <- get_list_of_matrices(boot_lst, metric_names, n, iter_spl_size)
if (! "data.table" %in% cls_data) data.table::setDF(data)
return(results)
}
}
#' @title
#' Bootstrap network level metric once.
#'
#' @description
#' You will rarely use this function alone. It was designed to be executed in
#' parallel by \code{\link[bootstrapnet]{boot_networklevel_n}}. See more
#' details there.
#'
#' @param data
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @param col_lower
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @param col_higher
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @param index
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @param level
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @param start
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @param step
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @param seed
#' Set seed to get reproducible random results. Passed to `set.seed()`.
#'
#' @param ...
#' See \code{\link[bootstrapnet]{boot_networklevel_n}}.
#'
#' @return
#' Returns a data frame of one or two columns depending on the provided `index`
#' metric and `level` value. For example if `index='nestedness'` and
#' `level='both'`, then it returns a one column data frame. But in case of
#' `index='niche overlap'`, it returns a two columns data frame, first column
#' ('niche.overlap.HL') corresponding to the higher level (e.g. insects), and
#' the second column ('niche.overlap.LL') for the lower level (e.g. plants).
#' The number of rows in the returned data frame corresponds to the number of
#' splits given by `bootstrapnet:::sample_indices` (is the length/number of
#' elements of the list returned by this function). The last value(s) in the
#' data frame correspond to the results of
#' \code{\link[bipartite]{networklevel}} on the entire network - see in
#' examples.
#'
#' @references
#' This function is a wrapper of \code{\link[bipartite]{networklevel}}.
#'
#' @import data.table
#'
#' @importFrom magrittr %>%
#' @importFrom bipartite networklevel
#'
#' @examples
#'
#' library(bootstrapnet)
#' library(bipartite)
#' library(data.table)
#'
#' data(Safariland)
#'
#' df <- web_matrix_to_df(Safariland)
#' setDT(df) # df must be a data.table and not data.frame
#'
#' # Example with "nestedness":
#'
#' boot_networklevel_once(data = df,
#' col_lower = "lower", # column name for plants
#' col_higher = "higher", # column name for insects
#' index = "nestedness",
#' level = "both",
#' start = 100,
#' step = 100,
#' seed = 2020-11-5)
#' # Returns a one column data frame. The last value must equal the result of:
#' set.seed(42) # this is the same seed used in the boot_networklevel_once() function
#' bipartite::networklevel(table(df$lower, df$higher),
#' index = "nestedness")
#' # which is also the equivalent of:
#' Safariland_sorted <- Safariland[order(rownames(Safariland)), order(colnames(Safariland))]
#' set.seed(42)
#' bipartite::networklevel(Safariland_sorted, index = "nestedness")
#'
#'
#' # Example with "niche overlap":
#'
#' boot_networklevel_once(data = df,
#' col_lower = "lower", # column name for plants
#' col_higher = "higher", # column name for insects
#' index = "niche overlap",
#' level = "both",
#' start = 100,
#' step = 100,
#' seed = 2020-11-5)
#' # Returns a two column data frame
#'
#' @export
#'
#' @md
boot_networklevel_once <- function(data,
col_lower,
col_higher,
index,
level,
start,
step,
seed,
...){
# List of sampled row indices from data to be used for bootstrapping. Each row
# represents a plant-pollinator interaction. Each vector of sampled indices is
# used to build a web matrix/network from data.
ids_lst <- sample_indices(data = data, start = start, step = step, seed = seed)
# Allocate memory for the objects that will grow during iterations/bootstrapping.
metric_lst <- vector(mode = "list", length = length(ids_lst))
# Sample interactions with the indices build above and compute the network
# level metric until all interactions are sampled. In case of errors (most
# probably related to small sample size of a network at first iterations),
# then `try()` doesn't break the bootstrapping process.
for (i in 1:length(ids_lst)){
metric_lst[[i]] <- try({
# Some metrics, like nestedness have random processes in their
# computation, so set seed here also for reproducibility. That is, if one
# wants to run the same random processes again, should get identical
# results.
web <- data[ids_lst[[i]], table(get(col_lower), get(col_higher))]
set.seed(42)
bipartite::networklevel(web = web, index = index, level = level, ...)
})
}
# Prepare results as data frame.
df <- metric_lst %>%
lapply(rbind) %>%
lapply(as.data.frame) %>%
data.table::rbindlist()
setDF(df)
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.