knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
check_pkgs <- requireNamespace("getCRUCLdata", quietly = TRUE) if (!check_pkgs) { message <- paste("This article of the package requires the 'getCRUCLdata' package", "which is not available on CRAN anymore. To install the package, please", "run 'install.packages('getCRUCLdata', repos = 'http://packages.ropensci.org', typ = 'source')'.") message <- paste(strwrap(message), collapse = "\n") message(message) }
This vignette demonstrates how to use shar
to analyse species occurrence data obtained from the Global Biodiversity Information Facility (GBIF) and environmental raster data obtained from the Climate Research Unit (CRU) entirely in R
. The "Gamma test" approach as detailed in the vignette("background")
is used. The distribution of the tree species Cormus domestica in Europe is selected, a tree which tolerates a wide range of conditions but favors warm to mild climates, occurring in the "Subtropical dry forest" and "Temperate Continental" FAO ecological zones. Cormus domestica is most commonly found in Southern Europe, though there it's natural range is uncertain owing to it's cultivation and distribution by the Roman Empire (De Rigo et al., 2016; Rotach, 2003).
This article of the package requires the getCRUCLdata
package, which is not available on CRAN anymore. To install the package, please run install.packages('getCRUCLdata', repos = 'http://packages.ropensci.org', typ = 'source')
.
library(dplyr) # For data wrangling library(getCRUCLdata) # For retrieving climate raster data library(ggplot2) # For plotting library(patchwork) # For composing multiple plots library(rgbif) # For retrieving species occurrence data library(rnaturalearth) # For retrieving geographical data library(rnaturalearthdata) # For retrieving geographical data library(sf) # For spatial data operations library(spatstat) # For spatial point pattern analysis library(terra) # For spatial data operations library(shar) # For species-habitat association analysis
To retrieve species occurrence data the R
package rgbif
(Chamberlain & Boettiger, 2017) is used, which provides an interface to access the GBIF database. For this vignette we are interested in observations within the likely natural distribution of Cormus domestica, which includes Europe, the wider Mediterranean, the Black Sea Region, and potentially North Africa.
# Load bundled GBIF occurrence data data_simp <- shar:::data_internal$data_gbif
# Establish region of interest # *** We need to do this here as the chinnk below is not evaluated *** roi <- c(xmin = -20, xmax = 45, ymin = 30, ymax = 73) roi_bbox <- sf::st_bbox(roi, crs = sf::st_crs("EPSG:4326")) roi_sfc <- sf::st_sfc(sf::st_point(c(roi[["xmin"]], roi[["ymin"]])), sf::st_point(c(roi[["xmax"]], roi[["ymax"]])), crs = "EPSG:4326")
# Retrieve key for Cormus domestica key <- rgbif::name_backbone(name = 'Cormus domestica', kingdom = 'plants') # Establish region of interest roi <- c(xmin = -20, xmax = 45, ymin = 30, ymax = 73) roi_bbox <- sf::st_bbox(roi, crs = sf::st_crs("EPSG:4326")) roi_sfc <- sf::st_sfc(sf::st_point(c(roi[["xmin"]], roi[["ymin"]])), sf::st_point(c(roi[["xmax"]], roi[["ymax"]])), crs = "EPSG:4326") # Retrieve occurrences for the region of interest res <- rgbif::occ_search(taxonKey = as.numeric(key$usageKey), limit = 99999, geometry = roi_bbox) # Create a simple data frame containing only the unique identifier (id), # latitude (lat), and longtitude (lon). data_simp <- data.frame(id = res$data$key, lat = res$data$decimalLatitude, lon = res$data$decimalLongitude) %>% dplyr::filter(!is.na(lat) | !is.na(lon))
Spatial polygon data for the world is obtained from the rnaturalearth
package (South, 2022), the map is then restricted to the region of interest. The spatstat
package requires geospatial data in the format of a projected coordinate system; the data is therefore converted from the geographic coordinate system 4336 to the projected coordinate system 3395. The shar
function fit_point_process
requires a spatial point pattern (ppp
) object bounded within an observation window of the class owin
, which is then created.
# Retrieve data from rnaturalearth worldmap <- rnaturalearth::ne_countries(returnclass = "sf", scale = 50) %>% sf::st_transform(crs = sf::st_crs("EPSG:3395")) # Re-project simple features region of interest roi_sfc_3395 <- roi_sfc %>% sf::st_transform(crs = sf::st_crs("EPSG:3395")) # Crop world map to include polygons within the region of interest roi_map <- sf::st_crop(x = worldmap, y = roi_sfc_3395) # Define observation window roi_owin <- spatstat.geom::as.owin(roi_map$geometry)
The environmental variable selected for demonstrative purposes is the mean temperature in January over the 1961-1990 period. Data is obtained through the getCRUCLdata
package (Sparks, 2017) which provides access to the datasets described in New et al. (2002).
# Download data as a raster brick through the getCRUCLdata package # Mean temperature (tmn) data should be 180.4MB cru_data <- getCRUCLdata::get_CRU_stack(tmp = TRUE) # Select temperature variable and the month of January tmp_raster <- cru_data$tmp$jan
The climate data obtained above is restricted to the region of interest, then classified into 10 habitats based on temperature ranges, achieved by setting the lower and upper bounds of these ranges in the fixedBreaks
argument of the classify_habitats
function. Figure 1 displays the unclassified and classified habitat.
# Crop tmp raster tmp_raster_eur <- terra::crop(x = tmp_raster, y = roi) # Project raster to EPSG:3395 tmp_raster_eur_3395 <- terra::project(x = tmp_raster_eur, y = "EPSG:3395", method = "bilinear") # Classify landscape landscape_classified <- shar::classify_habitats(raster = tmp_raster_eur_3395, return_breaks = TRUE, style = "fixed", fixedBreaks = c(-20, -10, -7.5, -5, -2.5, 0, 2.5, 5, 7.5, 10, 20))
raster_unclassed_df <- terra::as.data.frame(tmp_raster_eur_3395, xy = TRUE) %>% dplyr::rename("value" = "jan") %>% dplyr::mutate("type" = "Unclassified") raster_classed_df <- terra::as.data.frame(landscape_classified$raster, xy = TRUE) %>% dplyr::rename("value" = "layer") %>% dplyr::mutate("type" = "Classified") plot_unclassed <- ggplot2::ggplot() + ggplot2::geom_raster(data = raster_unclassed_df, mapping = ggplot2::aes(x = x, y = y, fill = value)) + ggplot2::geom_sf(data = roi_map$geometry, colour = "black", fill = NA, size = 0.1) + ggplot2::theme_minimal() + ggplot2::xlab(label = NULL) + ggplot2::ylab(label = NULL) + ggplot2::labs(fill = NULL) + ggplot2::scale_fill_distiller(palette = "RdBu", na.value = "transparent") + ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "#c9c9c9", linetype = "dashed", size = 0.075), panel.background = ggplot2::element_rect(fill = "#f0f8ff"), panel.border = ggplot2::element_rect(fill = NA), text = ggplot2::element_text(size = 12), axis.text.x = ggplot2::element_text(size = 9), axis.text.y = ggplot2::element_text(size = 9), plot.margin = ggplot2::margin(t = 0, # Top margin r = 0, # Right margin b = 0, # Bottom margin l = 0), legend.position = "bottom", legend.direction = "horizontal", legend.justification = "center", legend.text = ggplot2::element_text(size = 8), legend.key.height = ggplot2::unit(0.25, 'cm'), legend.key.width = ggplot2::unit(0.75, "cm") ) plot_classed <- ggplot2::ggplot() + ggplot2::geom_raster(data = raster_classed_df, mapping = ggplot2::aes(x = x, y = y, fill = factor(value))) + ggplot2::geom_sf(data = roi_map$geometry, colour = "black", fill = NA, size = 0.1) + ggplot2::theme_minimal() + ggplot2::xlab(label = NULL) + ggplot2::ylab(label = NULL) + ggplot2::labs(fill = NULL) + ggplot2::scale_fill_brewer(palette = "RdBu", direction = -1, na.value = "transparent", guide = ggplot2::guide_legend()) + ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "#c9c9c9", linetype = "dashed", size = 0.075), panel.background = ggplot2::element_rect(fill = "#f0f8ff"), panel.border = ggplot2::element_rect(fill = NA), text = ggplot2::element_text(size = 12), axis.text.x = ggplot2::element_text(size = 9), axis.text.y = ggplot2::element_blank(), # ggplot2::element_text(size = 4), plot.margin = ggplot2::margin(t = 0, # Top margin r = 0, # Right margin b = 0, # Bottom margin l = 0), legend.position = "bottom", legend.direction = "horizontal", legend.justification = "center", legend.text = ggplot2::element_text(size = 8), legend.key.height = ggplot2::unit(0.25, 'cm'), legend.key.width = ggplot2::unit(0.75, "cm") ) plot_unclassed + plot_classed
The occurrence data is prepared, then the shar
function fit_point_process
is called, yielding the randomized occurrence data within the observation window as required by the results_habitat_association
function. Figure 2 displays the real and randomised occurrence data.
# Convert occurrence data to a simple features object data_sf <- sf::st_as_sf(data_simp, coords = c("lon", "lat"), crs = "EPSG:4326") # Re-project the occurrence data data_sf_3395 <- data_sf %>% sf::st_transform(crs = sf::st_crs("EPSG:3395")) # Extract the coordinates as a matrix from the sf occurrences object data_sf_coords <- sf::st_coordinates(data_sf_3395) # Create a spatial points pattern object containing the occurrence data data_sf_ppp <- spatstat.geom::as.ppp(X = data_sf_coords, W = roi_owin) # Fit point pattern process to data rand_pattern <- shar::fit_point_process(pattern = data_sf_ppp, n_random = 19)
recon_occ_df <- as.data.frame(rand_pattern$randomized$randomized_1) %>% dplyr::mutate("type" = "Randomised Occurrences") real_occ_df <- data_sf_coords %>% as.data.frame() %>% dplyr::rename(x = "X", y = "Y") %>% dplyr::mutate("type" = "Real Occurrences") real_occ_plot <- ggplot2::ggplot() + ggplot2::geom_sf(data = roi_map$geometry, # mapping = ggplot2::aes(), colour = "black", fill = "white", size = 0.1) + ggplot2::geom_point(real_occ_df, mapping = ggplot2::aes(x = x, y = y), size = 0.2, stroke = 0, shape = 16, color = "Red") + ggplot2::theme_minimal() + ggplot2::xlab(label = NULL) + ggplot2::ylab(label = NULL) + ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "#c9c9c9", linetype = "dashed", size = 0.075), panel.background = ggplot2::element_rect(fill = "#f0f8ff"), panel.border = ggplot2::element_rect(fill = NA), text = ggplot2::element_text(size = 12), axis.text.x = ggplot2::element_text(size = 9), axis.text.y = ggplot2::element_text(size = 9), plot.margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 0)) recon_occ_plot <- ggplot2::ggplot() + ggplot2::geom_sf(data = roi_map$geometry, mapping = ggplot2::aes(), colour = "black", fill = "white", size = 0.1) + ggplot2::geom_point(recon_occ_df, mapping = ggplot2::aes(x = x, y = y), size = 0.2, stroke = 0, shape = 16, color = "Red") + ggplot2::theme_minimal() + ggplot2::xlab(label = NULL) + ggplot2::ylab(label = NULL) + ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "#c9c9c9", linetype = "dashed", size = 0.075), panel.background = ggplot2::element_rect(fill = "#f0f8ff"), panel.border = ggplot2::element_rect(fill = NA), text = ggplot2::element_text(size = 12), axis.text.x = ggplot2::element_text(size = 9), axis.text.y = ggplot2::element_blank(), # ggplot2::element_text(size = 4), plot.margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 0)) real_occ_plot + recon_occ_plot
The analysis function results_habitat_association
is then called. The results of the analysis show that Cormus domestica is positively associated with locations which experience a mean January temperature of -2.5C - 10C (habitats 5, 6, 7, 8, and 9). Furthermore, Cormus domestica is negatively associated with all other habitats classified by temperature.
# Establish significance level sig_level <- 0.01 # Run analysis results <- shar::results_habitat_association(pattern = rand_pattern, raster = landscape_classified$raster, breaks = landscape_classified$breaks, significance_level = sig_level) %>% dplyr::arrange(habitat) results
# Create a data frame containing only the cells containing values positively # associated with Cormus domestica occurrences # First reclassify the cells which corresponding with associated habitats # (6 and 7) to the value 2, and all other cells to the value 1 matrix <- c(1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 2, 7, 2, 8, 1, 9, 1, 10, 1) rclmat <- matrix(matrix, ncol = 2, byrow = TRUE) landscape_classified_results <- terra::classify(x = landscape_classified$raster, rcl = rclmat) landscape_classified_results <- terra::as.data.frame(landscape_classified_results, xy = TRUE) %>% dplyr::mutate(layer = dplyr::case_when( layer == 1 ~ "Unassociated habitat", layer == 2 ~ "Associated habitat", TRUE ~ as.character(layer) )) plot_res_classed <- ggplot2::ggplot() + ggplot2::geom_raster(data = landscape_classified_results, mapping = ggplot2::aes(x = x, y = y, fill = factor(layer, levels = c("Associated habitat", "Unassociated habitat")))) + ggplot2::geom_sf(data = roi_map$geometry, colour = "black", fill = NA, size = 0.1) + ggplot2::theme_minimal() + ggplot2::xlab(label = NULL) + ggplot2::ylab(label = NULL) + ggplot2::labs(fill = NULL) + # ggplot2::scale_fill_brewer(type = "seq", # palette = 1, # direction = -1, # na.value = "transparent", # guide = ggplot2::guide_legend(ncol = 1)) + ggplot2::scale_fill_discrete(type = c("#a4ddab", "white"), na.value = "transparent", guide = ggplot2::guide_legend(ncol = 1)) + ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "#c9c9c9", linetype = "dashed", size = 0.075), panel.background = ggplot2::element_rect(fill = "#f0f8ff"), panel.border = ggplot2::element_rect(fill = NA), text = ggplot2::element_text(size = 12), axis.text.x = ggplot2::element_text(size = 9), axis.text.y = ggplot2::element_text(size = 9), plot.margin = ggplot2::margin(t = 0, # Top margin r = 0, # Right margin b = 0, # Bottom margin l = 0), legend.position = "bottom", legend.direction = "horizontal", legend.justification = "center", legend.text = ggplot2::element_text(size = 8), legend.key.height = ggplot2::unit(0.25, 'cm'), legend.key.width = ggplot2::unit(0.75, "cm")) plot_res_classed_occs <- ggplot2::ggplot() + ggplot2::geom_raster(data = landscape_classified_results, mapping = ggplot2::aes(x = x, y = y, fill = factor(layer, levels = c("Associated habitat", "Unassociated habitat")))) + ggplot2::geom_sf(data = roi_map$geometry, colour = "black", fill = NA, size = 0.1) + ggplot2::geom_point(real_occ_df, mapping = ggplot2::aes(x = x, y = y), size = 0.2, stroke = 0, shape = 16, color = "Red") + ggplot2::theme_minimal() + ggplot2::xlab(label = NULL) + ggplot2::ylab(label = NULL) + ggplot2::labs(fill = NULL) + # ggplot2::scale_fill_brewer(type = "seq", # palette = 1, # direction = -1, # na.value = "transparent", # guide = ggplot2::guide_legend(ncol = 1)) + ggplot2::scale_fill_discrete(type = c("#a4ddab", "white"), na.value = "transparent", guide = ggplot2::guide_legend(ncol = 1)) + ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "#c9c9c9", linetype = "dashed", size = 0.075), panel.background = ggplot2::element_rect(fill = "#f0f8ff"), panel.border = ggplot2::element_rect(fill = NA), text = ggplot2::element_text(size = 12), axis.text.x = ggplot2::element_text(size = 9), axis.text.y = ggplot2::element_text(size = 9), plot.margin = ggplot2::margin(t = 0, # Top margin r = 0, # Right margin b = 0, # Bottom margin l = 0), legend.position = "bottom", legend.direction = "horizontal", legend.justification = "center", legend.text = ggplot2::element_text(size = 8), legend.key.height = ggplot2::unit(0.25, 'cm'), legend.key.width = ggplot2::unit(0.75, "cm")) plot_res_classed + plot_res_classed_occs
By visually inspecting Figure 3 we can see that occurrences of Cormus domestica are mostly restricted to the associated habitat as defined by a mean January temperature of -2.5C - 10C. However, Cormus domestica does not exist in all areas experiencing a mean January temperature of -2.5C - 10C, additionally there are a number of occurrences outside these areas. The next step assessing in the climatic niche of Cormus domestica would be to repeat the above analysis for more environmental variables, which is beyond the scope of this minimal example.
Chamberlain SA, Boettiger C. 2017. R Python, and Ruby clients for GBIF species occurrence data. PeerJ Preprints 5:e3304v1
De Rigo, D., Caudullo, G., Houston Durrant, T. and San-Miguel-Ayanz, J., 2016. The European Atlas of Forest Tree Species: modelling, data and information on forest tree species. European Atlas of Forest Tree Species, p.e01aa69.
New, M., Lister, D., Hulme, M. and Makin, I., 2002. A high-resolution data set of surface climate over global land areas. Climate research, 21(1), pp.1-25.
Rotach, P., 2003. EUFORGEN Technical Guidelines for genetic conservation and use for service tree (Sorbus domestica). Bioversity International.
South A (2022). rnaturalearth: World Map Data from Natural Earth. https://docs.ropensci.org/rnaturalearth (website) https://github.com/ropensci/rnaturalearth.
Sparks, (2017). getCRUCLdata: Use and Explore CRU CL v. 2.0 Climatology Elements in R. Journal of Open Source Software, 2(12), 230,
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.