knitr::opts_chunk$set(echo = TRUE)
library(knitr)
opts_chunk$set(tidy.opts=list(width.cutoff=50),tidy=TRUE)

This appendix describes how to extract information on species' migratory status from BirdLife International as well as calculate indices of habitat use, habitat diversity, diet diversity and rarity. This data can be accessed in the BBS.occurrences library using data('Bird.traits'). Note, the primary functions for extracting migratory status were adapted from code that was generously provided by Tad Dallas.

Some libraries that are not automatically included in the package will be needed for web-scraping. Check that they are installed and up-to-date

library(tidyverse)  
library(httr)
library(rvest)
library(purrr)
library(stringr)
library(parallel)

Download the Birdtree.org taxonomy file, which will be necessary for searching common names when species' binomials are not recognised by BirdLife. Note, this step requires an internet connection

temp <- tempfile()
download.file('https://data.vertlife.org/birdtree/BLIOCPhyloMasterTax.csv',
              temp)
BT.tax <- read.csv(temp)

Load the binary species occurrence data that is included in the BBS.occurrences package and extract the column names

library(BBS.occurrences)
data("BBS.occurrences")
sp_names <- colnames(BBS.occurrences)

\pagebreak

Here we perform a sanity check to ensure that parallel loading is supported on our machine. Note, the library parallel needs to be loaded for this to function properly. If parallel computation is not desired, just set n_cores to 1

n_cores <- 3

if(n_cores > 1){
  cl <- makePSOCKcluster(n_cores)
  setDefaultCluster(cl)

  # Check errors when loading on each cluster
  test_load1 <- try(clusterEvalQ(cl, library(rvest)), silent = TRUE)

  #If errors, iterate over options for loading
  if(class(test_load1) == "try-error") {

    #Try finding paths using system.file()
    pkgLibs <- unique(c(sub("/rvest$", "", system.file(package = "rvest"))))
    clusterExport(NULL, c('pkgLibs'), envir = environment())
    clusterEvalQ(cl, .libPaths(pkgLibs))

    #Check again for errors
    test_load2 <- try(clusterEvalQ(cl, library(rvest)), silent = TRUE)

    if(class(test_load2) == "try-error"){

      #Try loading .libPath() directly
      clusterEvalQ(cl,.libPaths(as.character(.libPaths())))
      test_load3 <- try(clusterEvalQ(cl, library(rvest)), silent = TRUE)

      if(class(test_load3) == "try-error"){

        parallel_compliant <- FALSE
        stopCluster(cl)

      } else {
        parallel_compliant <- TRUE
      }

    } else {
      parallel_compliant <- TRUE
    }

  } else {
    parallel_compliant <- TRUE
  }
} else {
  parallel_compliant <- FALSE
}
parallel_compliant

\pagebreak

After confirming parallel capabilities, start the parallel clusters and run the function. Note, this step requires an internet connection and will take some time to process the 303 avian species in the dataset

  #Export necessary data and variables to each cluster
  clusterExport(NULL, c('BT.tax', 'sp_names'), envir = environment())

  #Export necessary libraries
  clusterEvalQ(cl, library(rvest))
  clusterEvalQ(cl, library(stringr))
  clusterEvalQ(cl, library(purrr))
  clusterEvalQ(cl, library(xml2))

search_migstatus <- pbapply::pblapply(seq_along(sp_names), function(x){

session <- rvest::html_session("http://datazone.birdlife.org/species/search")
Sys.sleep(4)

form <- html_form(session)[[3]]

filledform <- set_values(form, kw = sp_names[x])
session <- submit_form(session, filledform)$url

landing_pg <- xml2::read_html(session)

# Find text that forms the species' hyperlink
rvest::html_nodes(landing_pg, css = "tr") %>% 
  rvest::html_attrs() %>%                 
  purrr::flatten_chr() %>%                
  purrr::keep(~grepl("rowClick", .x)) %>%  
  stringr::str_replace("rowClick\\(\\'", "") %>%
  stringr::str_replace("\\'\\)", "") -> sp_link

# If binomial not found, try the common name
if(identical(sp_link, character(0))){
  session <- rvest::html_session("http://datazone.birdlife.org/species/search")
  Sys.sleep(4)

  comm_name <- as.character(BT.tax$English[which(BT.tax$TipLabel == paste(sp_names[x]))])
  form <- rvest::html_form(session)[[3]]

  filledform <- rvest::set_values(form, 
                           kw = comm_name)
  session <- rvest::submit_form(session, filledform)$url
  landing_pg <- xml2::read_html(session)

  rvest::html_nodes(landing_pg, css = "tr") %>% 
    rvest::html_attrs() %>%                 
    purrr::flatten_chr() %>%                
    purrr::keep(~grepl("rowClick", .x)) %>%  
    stringr::str_replace("rowClick\\(\\'", "") %>%
    stringr::str_replace("\\'\\)", "") -> sp_link

  # If still not found, return NA
  if(identical(sp_link, character(0))){
  output <- data.frame(species = sp_names[x],
             Migrate.status = 'NA')
  } else{
    details_link <- paste0("http://datazone.birdlife.org/species/factsheet/",
                           sp_link[1], '/details/')  

    migrate_status <- xml2::read_html(details_link) %>%
      rvest::html_nodes(css="tr:nth-child(1) td:nth-child(2)") %>% 
      rvest::html_text()
    output <- data.frame(species = sp_names[x],
                         Migrate.status = migrate_status)
  }
} else{
details_link <- paste0("http://datazone.birdlife.org/species/factsheet/",
                      sp_link[1], '/details/')  

migrate_status <- xml2::read_html(details_link) %>%
  rvest::html_nodes(css="tr:nth-child(1) td:nth-child(2)") %>% 
  rvest::html_text()
output <- data.frame(species = sp_names[x],
           Migrate.status = migrate_status)
}
output
}, cl = cl)
Bird.mig.status <- do.call(rbind, search_migstatus)
Bird.mig.status$Migrate.status <- as.character(Bird.mig.status$Migrate.status)

Next, we calculate Shannon Diversity indices to represent species' diet and habitat breadths. This function makes use of the data available in the EltonTraits database, which is the same that we used in Appendix_S4

temp <- tempfile()
download.file('https://ndownloader.figshare.com/files/5631081',
              temp)
Sp_traits <- read.table(temp, header = TRUE,
                        fill = TRUE,
                        quote = '"',
                        stringsAsFactors = FALSE,
                        sep = "\t")
unlink(temp)

\pagebreak

Calculate Shannon indices of diet and habitat diversity for each species

Sp_traits <- Sp_traits[which(Sp_traits$Scientific %in% sub('_', ' ', sp_names)),] %>%
  dplyr::right_join(data.frame(Scientific = sub('_', ' ', sp_names))) %>%
  dplyr::select(Scientific:ForStrat.aerial) %>%
  dplyr::mutate(species = sub(' ', '_', Scientific)) %>%
  dplyr::select(-Scientific, -English, -Diet.5Cat, -Diet.Source,
                -Diet.Certainty, -Diet.EnteredBy) %>%
  dplyr::mutate_if(stringr::str_detect(colnames(.), "Diet"),
                   funs((. / 100) * log(. / 100) * -1)) %>%
  dplyr::mutate_if(stringr::str_detect(colnames(.), "ForStrat"),
                   funs((. / 100) * log(. / 100) * -1)) %>%
  dplyr::mutate(Diet.diversity = rowSums(.[1:10], na.rm = T),
                Hab.diversity = rowSums(.[11:17], na.rm = T)) %>%
  dplyr::select(species, Diet.diversity, Hab.diversity)

Calculate an index of rarity for each species, defined by it's scaled occurrence total across all observations, where larger values indicate a species is more rare

Rarity <- data.frame(species = sp_names,

                     Rarity = as.vector(scale(colSums(BBS.occurrences) / 
                                                   nrow(BBS.occurrences))))
Rarity$Rarity <- -1 * Rarity

Add the rarity and diveristy metrics to the migration dataset

Bird.traits = Bird.mig.status %>%
  dplyr::left_join(Rarity) %>%
  dplyr::left_join(Sp_traits) %>%
  dplyr::select(species, dplyr::everything())

Next, we extract information on species' average clutch size from the Avian Life History Traits Dataset.

temp <- tempfile()
download.file("http://www.esapubs.org/archive/ecol/E088/096/avian_ssd_jan07.txt",
              temp)
life_hist <- read.table(temp, header = TRUE,
                        fill = TRUE,
                        quote = '"',
                        stringsAsFactors = FALSE,
                        sep = "\t")
unlink(temp)

life_hist <- life_hist[ , 3:37]

life_hist %>%
  dplyr::mutate_if(is.integer, as.numeric) %>%
  dplyr::select(Species_name, Clutch_size) %>%
  purrr::set_names(c('species','Clutch_size')) %>%
  dplyr::mutate(Clutch.size = ifelse(Clutch_size == -999, 
                                     NA, Clutch_size),
                species = sub(' ', '_', species)) %>%
  dplyr::right_join(Bird.traits) -> Bird.traits

Bird.traits$Clutch.size <- as.vector(scale(Bird.traits$Clutch.size))

\pagebreak

Finally, we extract additional information on species' nesting behaviour and broad habitat use by creating binary indicators using data available in Land Condition Trend Analysis Avian Database. We then join these variables into a single trait dataset and save in the Analysis folder. Note, this data can be accessed in the BBS.occurrences library using data('Bird.traits').

temp <- tempfile()
download.file('https://ecologicaldata.org/sites/default/files/lcta.txt',
              temp)
nest_traits <- read.table(temp, header = TRUE,
                        fill = TRUE,
                        quote = '"',
                        stringsAsFactors = FALSE,
                        sep = "\t")
unlink(temp)

nest_traits %>%
  dplyr::mutate(species = sub(' ', '_', SCIENTIFIC_NAME)) %>%
  dplyr::mutate(Ground.nest = ifelse(grepl('Ground', 
                                           NEST_LOC) == T, 1, 0),
                Lower.canopy.nest = ifelse(grepl('Lower Canopy', 
                                                 NEST_LOC) == T, 1, 0),
                Upper.canopy.nest = ifelse(grepl('Upper Canopy', 
                                                 NEST_LOC) == T, 1, 0),
                Floating.nest = ifelse(grepl('Floating', 
                                                 NEST_LOC) == T, 1, 0),
                Forest.hab = ifelse(grepl('Shrubland|Riparian|Forest',
                                             HABITAT) == T, 1, 0),
                Disturbed.hab = ifelse(grepl('Cropland|Urban|Forest Edge',
                                             HABITAT) == T, 1, 0),
                Grassland.hab = ifelse(grepl('Grassland|Tundra',
                                             HABITAT) == T, 1, 0)) %>%
  dplyr::select(species,Ground.nest,Lower.canopy.nest,Upper.canopy.nest,
                Floating.nest,Forest.hab,Grassland.hab,Disturbed.hab) %>%
  dplyr::right_join(Bird.traits) -> Bird.traits

save(Bird.traits, 
     file = './Analysis_data/
     Bird.traits.Rdata')


nicholasjclark/BBS.occurrences documentation built on July 19, 2020, 8:31 p.m.