knitr::opts_chunk$set(echo = TRUE) library(knitr) opts_chunk$set(tidy.opts=list(width.cutoff=50),tidy=TRUE)
This appendix describes how to extract information on species' migratory status from BirdLife International as well as calculate indices of habitat use, habitat diversity, diet diversity and rarity. This data can be accessed in the BBS.occurrences
library using data('Bird.traits')
. Note, the primary functions for extracting migratory status were adapted from code that was generously provided by Tad Dallas.
Some libraries that are not automatically included in the package will be needed for web-scraping. Check that they are installed and up-to-date
library(tidyverse) library(httr) library(rvest) library(purrr) library(stringr) library(parallel)
Download the Birdtree.org taxonomy file, which will be necessary for searching common names when species' binomials are not recognised by BirdLife. Note, this step requires an internet connection
temp <- tempfile() download.file('https://data.vertlife.org/birdtree/BLIOCPhyloMasterTax.csv', temp) BT.tax <- read.csv(temp)
Load the binary species occurrence data that is included in the BBS.occurrences
package and extract the column names
library(BBS.occurrences) data("BBS.occurrences") sp_names <- colnames(BBS.occurrences)
\pagebreak
Here we perform a sanity check to ensure that parallel loading is supported on our machine. Note, the library parallel
needs to be loaded for this to function properly. If parallel computation is not desired, just set n_cores
to 1
n_cores <- 3 if(n_cores > 1){ cl <- makePSOCKcluster(n_cores) setDefaultCluster(cl) # Check errors when loading on each cluster test_load1 <- try(clusterEvalQ(cl, library(rvest)), silent = TRUE) #If errors, iterate over options for loading if(class(test_load1) == "try-error") { #Try finding paths using system.file() pkgLibs <- unique(c(sub("/rvest$", "", system.file(package = "rvest")))) clusterExport(NULL, c('pkgLibs'), envir = environment()) clusterEvalQ(cl, .libPaths(pkgLibs)) #Check again for errors test_load2 <- try(clusterEvalQ(cl, library(rvest)), silent = TRUE) if(class(test_load2) == "try-error"){ #Try loading .libPath() directly clusterEvalQ(cl,.libPaths(as.character(.libPaths()))) test_load3 <- try(clusterEvalQ(cl, library(rvest)), silent = TRUE) if(class(test_load3) == "try-error"){ parallel_compliant <- FALSE stopCluster(cl) } else { parallel_compliant <- TRUE } } else { parallel_compliant <- TRUE } } else { parallel_compliant <- TRUE } } else { parallel_compliant <- FALSE } parallel_compliant
\pagebreak
After confirming parallel capabilities, start the parallel clusters and run the function. Note, this step requires an internet connection and will take some time to process the 303 avian species in the dataset
#Export necessary data and variables to each cluster clusterExport(NULL, c('BT.tax', 'sp_names'), envir = environment()) #Export necessary libraries clusterEvalQ(cl, library(rvest)) clusterEvalQ(cl, library(stringr)) clusterEvalQ(cl, library(purrr)) clusterEvalQ(cl, library(xml2)) search_migstatus <- pbapply::pblapply(seq_along(sp_names), function(x){ session <- rvest::html_session("http://datazone.birdlife.org/species/search") Sys.sleep(4) form <- html_form(session)[[3]] filledform <- set_values(form, kw = sp_names[x]) session <- submit_form(session, filledform)$url landing_pg <- xml2::read_html(session) # Find text that forms the species' hyperlink rvest::html_nodes(landing_pg, css = "tr") %>% rvest::html_attrs() %>% purrr::flatten_chr() %>% purrr::keep(~grepl("rowClick", .x)) %>% stringr::str_replace("rowClick\\(\\'", "") %>% stringr::str_replace("\\'\\)", "") -> sp_link # If binomial not found, try the common name if(identical(sp_link, character(0))){ session <- rvest::html_session("http://datazone.birdlife.org/species/search") Sys.sleep(4) comm_name <- as.character(BT.tax$English[which(BT.tax$TipLabel == paste(sp_names[x]))]) form <- rvest::html_form(session)[[3]] filledform <- rvest::set_values(form, kw = comm_name) session <- rvest::submit_form(session, filledform)$url landing_pg <- xml2::read_html(session) rvest::html_nodes(landing_pg, css = "tr") %>% rvest::html_attrs() %>% purrr::flatten_chr() %>% purrr::keep(~grepl("rowClick", .x)) %>% stringr::str_replace("rowClick\\(\\'", "") %>% stringr::str_replace("\\'\\)", "") -> sp_link # If still not found, return NA if(identical(sp_link, character(0))){ output <- data.frame(species = sp_names[x], Migrate.status = 'NA') } else{ details_link <- paste0("http://datazone.birdlife.org/species/factsheet/", sp_link[1], '/details/') migrate_status <- xml2::read_html(details_link) %>% rvest::html_nodes(css="tr:nth-child(1) td:nth-child(2)") %>% rvest::html_text() output <- data.frame(species = sp_names[x], Migrate.status = migrate_status) } } else{ details_link <- paste0("http://datazone.birdlife.org/species/factsheet/", sp_link[1], '/details/') migrate_status <- xml2::read_html(details_link) %>% rvest::html_nodes(css="tr:nth-child(1) td:nth-child(2)") %>% rvest::html_text() output <- data.frame(species = sp_names[x], Migrate.status = migrate_status) } output }, cl = cl) Bird.mig.status <- do.call(rbind, search_migstatus) Bird.mig.status$Migrate.status <- as.character(Bird.mig.status$Migrate.status)
Next, we calculate Shannon Diversity indices to represent species' diet and habitat breadths. This function makes use of the data available in the EltonTraits database, which is the same that we used in Appendix_S4
temp <- tempfile() download.file('https://ndownloader.figshare.com/files/5631081', temp) Sp_traits <- read.table(temp, header = TRUE, fill = TRUE, quote = '"', stringsAsFactors = FALSE, sep = "\t") unlink(temp)
\pagebreak
Calculate Shannon indices of diet and habitat diversity for each species
Sp_traits <- Sp_traits[which(Sp_traits$Scientific %in% sub('_', ' ', sp_names)),] %>% dplyr::right_join(data.frame(Scientific = sub('_', ' ', sp_names))) %>% dplyr::select(Scientific:ForStrat.aerial) %>% dplyr::mutate(species = sub(' ', '_', Scientific)) %>% dplyr::select(-Scientific, -English, -Diet.5Cat, -Diet.Source, -Diet.Certainty, -Diet.EnteredBy) %>% dplyr::mutate_if(stringr::str_detect(colnames(.), "Diet"), funs((. / 100) * log(. / 100) * -1)) %>% dplyr::mutate_if(stringr::str_detect(colnames(.), "ForStrat"), funs((. / 100) * log(. / 100) * -1)) %>% dplyr::mutate(Diet.diversity = rowSums(.[1:10], na.rm = T), Hab.diversity = rowSums(.[11:17], na.rm = T)) %>% dplyr::select(species, Diet.diversity, Hab.diversity)
Calculate an index of rarity for each species, defined by it's scaled occurrence total across all observations, where larger values indicate a species is more rare
Rarity <- data.frame(species = sp_names, Rarity = as.vector(scale(colSums(BBS.occurrences) / nrow(BBS.occurrences)))) Rarity$Rarity <- -1 * Rarity
Add the rarity and diveristy metrics to the migration dataset
Bird.traits = Bird.mig.status %>% dplyr::left_join(Rarity) %>% dplyr::left_join(Sp_traits) %>% dplyr::select(species, dplyr::everything())
Next, we extract information on species' average clutch size from the Avian Life History Traits Dataset.
temp <- tempfile() download.file("http://www.esapubs.org/archive/ecol/E088/096/avian_ssd_jan07.txt", temp) life_hist <- read.table(temp, header = TRUE, fill = TRUE, quote = '"', stringsAsFactors = FALSE, sep = "\t") unlink(temp) life_hist <- life_hist[ , 3:37] life_hist %>% dplyr::mutate_if(is.integer, as.numeric) %>% dplyr::select(Species_name, Clutch_size) %>% purrr::set_names(c('species','Clutch_size')) %>% dplyr::mutate(Clutch.size = ifelse(Clutch_size == -999, NA, Clutch_size), species = sub(' ', '_', species)) %>% dplyr::right_join(Bird.traits) -> Bird.traits Bird.traits$Clutch.size <- as.vector(scale(Bird.traits$Clutch.size))
\pagebreak
Finally, we extract additional information on species' nesting behaviour and broad habitat use by creating binary indicators using data available in Land Condition Trend Analysis Avian Database. We then join these variables into a single trait dataset and save in the Analysis
folder. Note, this data can be accessed in the BBS.occurrences
library using data('Bird.traits')
.
temp <- tempfile() download.file('https://ecologicaldata.org/sites/default/files/lcta.txt', temp) nest_traits <- read.table(temp, header = TRUE, fill = TRUE, quote = '"', stringsAsFactors = FALSE, sep = "\t") unlink(temp) nest_traits %>% dplyr::mutate(species = sub(' ', '_', SCIENTIFIC_NAME)) %>% dplyr::mutate(Ground.nest = ifelse(grepl('Ground', NEST_LOC) == T, 1, 0), Lower.canopy.nest = ifelse(grepl('Lower Canopy', NEST_LOC) == T, 1, 0), Upper.canopy.nest = ifelse(grepl('Upper Canopy', NEST_LOC) == T, 1, 0), Floating.nest = ifelse(grepl('Floating', NEST_LOC) == T, 1, 0), Forest.hab = ifelse(grepl('Shrubland|Riparian|Forest', HABITAT) == T, 1, 0), Disturbed.hab = ifelse(grepl('Cropland|Urban|Forest Edge', HABITAT) == T, 1, 0), Grassland.hab = ifelse(grepl('Grassland|Tundra', HABITAT) == T, 1, 0)) %>% dplyr::select(species,Ground.nest,Lower.canopy.nest,Upper.canopy.nest, Floating.nest,Forest.hab,Grassland.hab,Disturbed.hab) %>% dplyr::right_join(Bird.traits) -> Bird.traits save(Bird.traits, file = './Analysis_data/ Bird.traits.Rdata')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.