knitr::opts_chunk$set(fig.path = "figures/", warning=FALSE, message=FALSE)
Load required packages from CRAN
#Automatically install required packages, which are not yet installed packages <- c("rgbif", "sp", "raster", "dplyr", "sf", "ggplot2", "rebird", "remotes", "outliers") new.packages <- packages[!(packages %in% installed.packages()[,"Package"])] if(length(new.packages)) install.packages(new.packages); rm(new.packages) # Load packages l <- sapply(packages, require, character.only = TRUE, quietly=TRUE); rm(packages, l)
Load additional packages from GitHub
# Instal missing packages from GitHub packages_github <- c("rasterSp", "climateNiche", "ggmap2", "rISIMIP") new.packages <- packages_github[!(packages_github %in% installed.packages()[,"Package"])] if(length(new.packages)) remotes::install_github(paste0("RS-eco/", new.packages), build_vignettes=T) rm(new.packages) # Load additional packages l <- sapply(packages_github, require, character.only = TRUE, quietly=TRUE); rm(packages_github, l)
Note: You need to change this to a location, where you have the required input data stored and/or where you want to have all the output data stored.
#Set file directory filedir <- "/home/matt/Documents/"
We have rasterized IUCN and BirdLife range maps of amphibians, terrestrial mammals, birds and reptiles to a resolution of 0.5°. Details of how this was done, can be found in the rasterSp
package.
We can directly read the distribution data for all species of these taxa
data(amphibians_dist, package="rasterSp") data(ter_birds_dist, package="rasterSp") data(ter_mammals_dist, package="rasterSp") data(reptiles_dist, package="rasterSp")
Rasterized species range data for the house sparrow Passer domesticus and the great sparrow Passer motitensis is also included in this package and can be accessed by:
# Load data of the house sparrow data(Passer_domesticus) # Load data of the great sparrow data(Passer_motitensis)
# Display the range map of one species plotSp(data=Passer_domesticus, extent=c(-180, 180, -65, 90))
eBird data can be accessed through the rebird
package available on CRAN. However, this only gives access to the records of the last 14 days and requires Internet access.
## Search for bird occurrences by latitude and longitude point data <- ebirdgeo(species_code('Passer domesticus'), 42, -76) ## Same, but with additional parameter settings, including provisional records, and hotspot records data <- ebirdgeo(species_code("Passer domesticus"), lat = 42, lng = -76, includeProvisional = TRUE, hotspot = TRUE) ## Search for bird occurrences by region and species name data <- ebirdregion(loc = 'US', species = species_code('Passer domesticus')) ## Search for bird occurrences by their location IDs data <- ebirdregion(loc = c('L99381','L99382')) ## Search by location ID and species name, as well as some additional parameter settings data <- ebirdregion(loc = 'L99381', species = species_code('Passer domesticus'), max = 10, provisional = TRUE, hotspot=TRUE) ## Obtain frequency data for a given state or county data_freq <- ebirdfreq(loctype = 'states', loc = 'CA-BC') # loctype= states, counties, hotspots, startyear, endyear, startmonth, endmonth, long=TRUE or FALSE ## Search for notable sightings at a given latitude and longitude data_notable <- ebirdnotable(lat = 42, lng = -70) ## Return eBird taxonomy #ebirdtaxonomy()
The rebird
package, only gives you data from the last 14 days, when analysing species distributions it is best to download the whole ebird Database from their website. The original eBird Datafile is around 150 GB in size, we converted the file to a sqlite database, which can be read using R on any computer, without running into memory limitations. To obtain a copy of the ebird database just get in touch with me or check out the vignette("create-ebird-db")
vignette in order to know how to create the database yourself.
# Connect to ebird database con <- DBI::dbConnect(RSQLite::SQLite(), dbname = paste0(filedir, "/ebird_database.sqlite")) # Get eBird data of Passer_domesticus ebird <- tbl(con, "ebird") Pass_dom <- ebird %>% dplyr::filter(`SCIENTIFIC NAME` == "Passer domesticus") %>% data.frame() # Disconnect from database DBI::dbDisconnect(con); rm(ebird, con)
Note: The database contains a considerable smaller number of records for each species compared to the eBird Website (http://ebird.org), as this database contains only verified records.
Create plot of eBird data
# Load outline data data(outline, package="ggmap2") # Plot locations and size of points according to Observation count ggplot(data=Pass_dom, aes(x=LONGITUDE, y=LATITUDE)) + geom_point(aes(size=OBSERVATION.COUNT, col=month)) + geom_polygon(data=outline, aes(x=long, y=lat, group=group), fill="transparent", color="black") + coord_quickmap(xlim=c(-130,-45), ylim=c(-35,65)) + theme_bw()
Download occurrence data from GBIF using the rgbif package. This approach is limited to 200000 data entries, but here we limit it to 20000 entries, as data takes a long time to download.
sp_data <- rgbif::occ_data(scientificName="Passer domesticus", limit=20000, year=1970, month='1,12', hasCoordinate=TRUE) sp_data <- sp_data$data[,c("name", "decimalLatitude", "decimalLongitude", "year", "month", "day")] #unique(sp_data$month)
Turn sp_data into a SpatialPointsDataFrame
coordinates(sp_data) <- ~decimalLongitude+decimalLatitude crs(sp_data) <- sp::CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
Alternatively, we can use the terrestrialGBIF
function from the climateNiche
package, which automatically downloads the data, removes outliers and points which are not on land. In addition, the file is saved to a specified path so that the data does not have to be downloaded over and over again.
sp_data <- terrestrialGBIF("Passer domesticus", limit=20000, hasCoordinate=TRUE, path=paste0(filedir, "/GBIF/"))
For analysing GBIF data of multiple species, it is easier to download all the data from the GBIF Website (ca. 350 GB) and create a sqlite database from it. In order to obtain a copy of the database, just ask me or have a look at the vignette("create-gbif-db")
vignette in order to create the database yourself.
# Connect to GBIF database con <- DBI::dbConnect(RSQLite::SQLite(), dbname=paste0(filedir, "gbif_database.sqlite")) # Get gbif data of Passer_domesticus gbif <- tbl(con, "gbif") sp_data <- gbif %>% filter(species == "Passer domesticus") %>% collect() %>% data.frame() # Disconnect from database DBI::dbDisconnect(con); rm(gbif,con)
Create plot of gbif data
# Load outline data data(outline, package="ggmap2") # Plot locations sp_data <- as.data.frame(sp_data) ggplot() + geom_point(data=sp_data, aes(x=decimalLongitude, y=decimalLatitude), col="red") + geom_sf(data=outline, fill="transparent", color="black") + coord_sf(xlim=c(-160,180), ylim=c(-55,75), expand=F) + theme_bw()
Using the getData
function from the raster package, we can extract altitude, tmin, tmax and prec as well as 19 bioclimatic variables. The function downloads global bioclim data with a resolution of 10 Arc min and save the file to a subfolder called wc10 located at the path previously specified as filedir.
Note: If the data has been downloaded once to the specified path, it automatically loads these data instead of downloading them again.
Here, the resolution is 10, but worldclim also provides 0.5, 2.5 and 5 ArcMin grid sizes. Note that for the 0.5 resolution, we would need to specify a latitude and longitude value, as in this case getData only downloads and reads one tile and not the entire earth.
bioclim <- getData(name="worldclim", download=TRUE, path=filedir, res=10, var="bio")
Note: The temperature variables have to be divided by 10 to get actual °C.
bioclim_temp <- calc(bioclim[[c(1,2,5,6,7,8,9,10,11)]], fun=function(x){x/10}) bioclim <- stack(bioclim_temp[[c(1:2)]], bioclim[[c(3:4)]], bioclim_temp[[c(3:9)]], bioclim[[c(12:19)]]); rm(bioclim_temp) names(bioclim) <- paste0("bio", 1:19)
Alternatively, we can also load the landonly processed ISIMIP2b data at a resolution of 0.5°. Load present Bioclim ISIMIP2b files
data(bioclim_ewembi_1995_landonly, package="rISIMIP")
Now, we want to extract the environmental data for each presence point of our bird species.
sp_data <- sp_data[complete.cases(sp_data$decimalLatitude, sp_data$decimalLongitude),] coordinates(sp_data) <- ~decimalLongitude+decimalLatitude crs(sp_data) <- sp::CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") sp_bio <- raster::extract(bioclim, sp_data, df=TRUE); rm(bioclim) sp_data@data <- cbind(sp_data@data, sp_bio); rm(sp_bio)
Our bird species might have different ranges throughout the season.
Therefore, we use sptExtract()
to extract monthly temperature values for our species.
# Get temperature data tmean <- raster::getData(name="worldclim", path=filedir, res=10, var="tmean") # Transform into degrees tmean <- raster::calc(tmean, fun=function(x){x/10}) names(tmean) <- paste0("tmean", 1:12) # Remove data where no month is provided sp_month <- sp_data[which(!is.na(sp_data$month)),] # Extract monthly temperature sp_month <- sptExtract(sp_month, tmean, tres="month"); rm(tmean) # Plot monthly mean temperature niche of Passer domesticus ggplot(data=sp_month@data) + geom_boxplot(aes(x=factor(month), y=env_locs)) + labs(x="Month", y="Monthly mean temperature (°C)") + theme_bw()
# Summarise the climatic niche of our species sp_month@data %>% group_by(month) %>% summarise(tmean=mean(env_locs, na.rm=T)) %>% rename(Month = month) %>% mutate(Month = factor(Month, labels=month.abb)) %>% t() %>% knitr::kable()
displayNiche
is a function that plots the climatic/environmental niche of a given species. The function is included in the climateNiche
package.
Annual temperature (min/max) & precipitation niche of GBIF data
# Display temperature and precipitation niche using Worldclim data displayNiche(data=data.frame(sp_data), res=10, variables=c("tmin", "tmax", "prec"), path=filedir)
Bioclim niche from IUCN range Map
# Display bioclimatic niche using Worldclim data displayNiche(data=Passer_domesticus, res=10, variables=c("tmax", "prec"), path=filedir)
Compare the climatic niche of one species using monthly and quarterly maximum temperature and precipitation data and qua
# Plot the monthly climatic niche of a species displayNiche(data=Passer_domesticus, path=filedir, variables=c("tmax"), tres="month", res=10) # Plot the monthly climatic niche of a species displayNiche(data=Passer_domesticus, path=filedir, variables=c("tmax", "prec"), tres="month", res=10) # Plot the quarterly climatic niche of a species displayNiche(data=Passer_domesticus, path=filedir, variables=c("tmin", "tmax", "prec"), tres="quarter", res=10)
# Load raster file of IUCN species range map of the Great sparrow data(Passer_motitensis) # Create list of 2 species Passer_spp <- list(Passer_domesticus, Passer_motitensis) # Display the range map of both species plotSp(data=Passer_spp, extent=c(-180, 180, -65, 90)) # Load data of the Somali sparrow & the Asian desert sparrow data(Passer_castanopterus) data(Passer_zarudnyi) # Create list of 4 species Passer_spp4 <- list(Passer_castanopterus, Passer_domesticus, Passer_motitensis, Passer_zarudnyi) # Display the range map of four species plotSp(data=Passer_spp4, extent=c(-180, 180, -65, 90))
Compare the climatic niche of four species using the most important bioclimatic variables (bio5, bio12, bio15)
# Compare the climatic niche of four species displayNiche(data=Passer_spp4, path=filedir, variables=c("bio5", "bio12", "bio15"), res=10)
# Compare the climatic envelope of two species climateEnvelope(data=Passer_spp, path=filedir)
# Merge data into one list r_data <- rasterize(x=sp_data, y=Passer_domesticus, fun="count")[[1]] r_data[r_data > 0] <- 1 Passer_data <- list(Passer_domesticus, r_data) names(Passer_data) <- c("IUCN", "GBIF")
# Display the range map of both species plotSp(data=Passer_data, name="Data source", extent=c(-180, 180, -65, 90))
# Compare the climatic niche of two datasets displayNiche(data=Passer_data, variables=c("tmin", "tmax", "prec"), path=filedir, res=10)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.