knitr::opts_chunk$set(echo = TRUE) # for development
devtools::load_all() # for development



Introduction

The package is a tool for identifying biodiversity records that have been designated locations on widely used grid systems. Our tool also estimates the degree of environmental heterogeneity associated with grid systems, allowing users to make informed decisions about how to use such occurrence data in global change studies. We show that a significant proportion (~13.5%; 261 million; accessed November 2021) of records on GBIF, largest aggregator of natural history collection data, are potentially gridded data, and demonstrate that our tool can reliably identify such records and quantify the associated uncertainties.

Workflow for grid simulation and detection

knitr::include_graphics("inst/workflow.png")

Overview of functions in GridDER

table_2 <- gsheet::gsheet2tbl("https://docs.google.com/spreadsheets/d/1gj0EHmPpLNCNHZV09PQqt-ixG526Mvowjegk_z7oPyY/edit#gid=0")
knitr::kable(table_2)

Install the GridDER package

remotes::install_github("BiogeographyLab/GridDER")

Examples of grid systems

Examples of four grid systems used in France (a), United Kingdom (b), South Africa (c), and Australia (d). The four grid systems have different spatial solutions – 10km for (a), 1km for (b), 5 arc-minute for (c), and 6 arc-minute for (d). The black points represent the biological collections assigned to the centroid of the corresponding grid systems. All maps constructed using WGS84.

knitr::include_graphics("inst/fig1.png")

Major use cases

Infer CRS (coordinate reference system)

First load the demo occurrence data. This dataset includes a group of gridded coordinates. It was originally downloaded from GBIF [https://doi.org/10.15468/dl.fp5795; accessed on 29 November 2021], but has been simplified to be spatially unique points for easier demonstration.

data("occs_unique") # dataframe of occurrences
print(nrow(occs_unique))
print(head(occs_unique))

Look at the coordinates on a map

library(ggplot2)
library(broom)

data("ne_10m_admin_0_countries") # load NaturalEarth world map

spdf_fortified = broom::tidy(ne_10m_admin_0_countries, region = "ADMIN")

ggplot2::ggplot() +
  geom_polygon(data = spdf_fortified, 
               aes( x = long, y = lat, group = group), 
               fill="#69b3a2", color="white") +
  geom_point(data = occs_unique,
             aes(x = decimalLongitude, y = decimalLatitude), 
             size = 2, 
        shape = 23, fill = "darkred") +
    coord_sf(xlim = c(0, 6), ylim = c(46, 50), expand = FALSE)



The coordinates have been projected to WGS84 (which is the same for occurrences from GBIF), which could be different from the CRS where the grid system was defined. So here we use infer_crs() to infer the correct CRS.

results_crs = GridDER::infer_crs(occ_path = occs_unique, # input of coordinates
                                 truth_crs_num = "2154", # (optional) here we know the true CRS, just add it here for a comparison with the inferred CRS ,
                                 #flag_debug = c(4363,5451,5981,5982,5991,5992,5425,5424,5422,3901), # (optional) only evaluate a few CRS
                                 cup_num = 4) # the function will loop through many CRS, so using multiple cores can speed up the process. The default uses 2 cores, but you can adjust it to your preferred number. 

It may take minutes - hours to complete the inference. If you want to quickly look at the output, you can load the results we calculated earlier.

results_crs = system.file("extdata", "results_crs.rds", package = "GridDER")
results_crs = readRDS(results_crs)
# Now, let's take a look at the output. The CRS code "2154" is ranked the first.
print(results_crs$selected[1:10, c("code", "note")])

Infer the spatial resolution

data("occs_unique") # Dataframe of occurrences

# Transform the coordinates to be spatial objects
input_occ = GridDER::load_occ(occs_unique)
input_occ_prj = sp::spTransform(input_occ,crs(paste0("+init=epsg:","2154")))
result_res = GridDER::infer_resolution(input_coord=input_occ_prj@coords)
print(result_res$res_x)
print(result_res$res_y)

The inferred resolution is 10km on x & y axes

Infer the spatial extent

result_ext = GridDER::infer_extent(method="crs_extent",
                          crs_grid=results_crs$selected$code[1],
                          flag_adjust_by_res=TRUE,
                          res_x=result_res$res_x,
                          res_y=result_res$res_y)

print(result_ext)

Generation of a grid system based on its spatial attributes

simulated_grid = GridDER::grid_generation(res_x=result_res$res_x, # resolution along x-axis
                                          res_y=result_res$res_y, # resolution along y-axis
                                          crs_num=results_crs$selected$code[1], # use the inferred CRS 
                                          country="France",       # the spatial file will be masked by country
                      flag_maskByCountry=TRUE,
                                          extent_unit="empirical_occ_extent",
                                          input_extent=result_ext,# use the inferred spatial extent
                                          flag_offset=c(0,-result_res$res_y*10,
                                                        result_res$res_x*10,0) # adjustment of the spatial extent                                          
                                          )
plot(simulated_grid)

Check the simulated grid

plot(simulated_grid,
     #ext=extent(c(extent(input_occ_prj)[1],extent(input_occ_prj)[1]+100000,
     #           extent(input_occ_prj)[3],extent(input_occ_prj)[3]+100000)))
      xlim=c(extent(input_occ_prj)[1],extent(input_occ_prj)[1]+110000),
      ylim=c(extent(input_occ_prj)[3],extent(input_occ_prj)[3]+110000))
plot(input_occ_prj,add=T,col="red")

Match occ data set against known grid systems

# take 100 gridded points, and generate 100 random points
point_grid = input_occ[sample(1:length(input_occ),100),]
point_nongrid = point_grid
point_nongrid@coords=point_nongrid@coords+runif(100)
point_all=rbind(point_grid,point_nongrid)

plot(input_occ)
plot(point_grid,add=T, col="red")      # red is gridded coordinate 
plot(point_nongrid,add=T,col="orange") # orange is random (nongridded) coordinate

Grid metadata

We have compiled metadata for 34 grid systems, you could take a look at the table below.

meta_data <- gsheet::gsheet2tbl("https://docs.google.com/spreadsheets/d/1Qp5KOpLSVnF2t16uIbNwKwK5ZBXt4k5unONp7eopPzI/edit#gid=166945453") 

meta_data

Here, we simulated a grid based on the spatial attributes we just inferred.

grid_metadata = data.frame(grid_ID=c("88"),
                           resolution_x=c(10000),
                           resolution_y=c(10000)  )

temp_result = GridDER::grid_matching(input_occ=point_all,
                                     input_grid=simulated_grid,
                                     grid_metadata=grid_metadata,
                                     flag_relativeTHD=0.1,
                                     flag_absoluteTHD=10)

# 100 points were recognized to be gridded coordinates
print( table(temp_result@data$grid_closest_both))

plot(temp_result,col=as.factor(temp_result@data$grid_closest_both))

Compute variation of environmental conditions

rgee::ee_Initialize() # The follow command authorizes rgee to manage Earth Engine resources
# Load ImageCollection of interest (environmental layer)

nasadem = rgee::ee$Image('NASA/NASADEM_HGT/001')$select('elevation') # Global elevation data in 30 meters. We do not plot due to extension and resolution 

# Grid Id 9
data("grid_ID_9") # Grid system example available in the package 

grid = grid_ID_9 |> 
  rgee::sf_as_ee()

# Compute standard deviation

std_dev <- GridDER::assess_env_uncertainty(x= nasadem, y= grid)

print(std_dev, n=5)

# Plot
library(ggplot2)
ggplot(data = grid_ID_9) +
  geom_sf(aes(fill = std_dev$elevation))+
  scale_fill_viridis_c(option = "plasma", trans = "sqrt")

Note the you need to setup rgee package (R package to acessess Google Earth Engine API) before using assessing the environmental variation (assess_env_uncertainty). You may need to use the following code to do so.

rgee::ee_install() # Creating virtual environmental to install the necessary dependencies

# If the following message happen : `An error occur when ee_install was creating the Python Environment. Run ee_clean_pyenv() and restart the R session, before trying again`

# Then execute the bellow command and restart R session:

rgee::ee_clean_pyenv()

# After that you must run `rgee::ee_install()`. You will request to continues and Rstart R seccion choose Yes in all options.

rgee::ee_Initialize() # The follow command authorizes rgee to manage Earth Engine resources

rgee::ee_check() # Checks if everything is OK. Optinal command . 

A simplified workflow for grid inference and matching

library(GridDER)

#1. (mandatory) specify input coordinates (dataframe, tibble, or the path of a csv) with columns "decimalLatitude" "decimalLongitude"
input_occ = occs_unique # a demo dataset is used here

#2. (optional) specify the crs. flag_crs=NULL will activate the crs inferring process, specifying a EPSG code of a crs will skip the crs inferring process
flag_crs = NULL
possible_true_crs = "2154" #a crs code that will be used as a reference, to be compared with many other crs codes; 2154 is the original crs for the demo dataset.
cup_num = 15 # number of cpus to be used in the inferring process

#3. (optional) specify the resolution of the grid along x and y, e.g. flag_res= c(1000,1000). NULL will activate the resolution inferring process
flag_res = NULL

#4. (optional) specify the spatial extent of the grid, e.g. flag_extent= c(xmin, ymin, xmax, ymax). NULL will activate the extent inferring process 
flag_extent = NULL

#5. (optional) generate a grid based on crs, resolution, extent.
flag_generateGrid = TRUE

#6. (optional) match input coordinates to the simulated grid
flag_matchOccToGrid = TRUE

#7. (optional) plot input coordinates and grid
flag_plot = TRUE

#######################
if(is.null(flag_crs)) {
#1.1 Infer coordinate reference system
result_crs = infer_crs(occ_path=input_occ,
                       truth_crs_num = possible_true_crs,
                       cup_num =cup_num)
#results_crs = readRDS(system.file("extdata", "results_crs.rds", package = "GridDER"))
flag_crs =  results_crs$selected$code[1]
}
print(flag_crs)              

if( is.null(flag_res) ){
 temp_occ = load_occ(input_occ)
 input_occ_prj = sp::spTransform(temp_occ,crs(paste0("+init=epsg:",flag_crs)))
 result_res = infer_resolution(input_coord=input_occ_prj@coords) 
 flag_res = c(result_res$res_x,result_res$res_y)
}
print(flag_res)              


if( is.null(flag_extent) ){
result_ext = infer_extent(method = "crs_extent",
                          crs_grid = flag_crs,
                          flag_adjust_by_res = TRUE,
                          res_x = flag_res[1],
                          res_y = flag_res[1])
flag_extent = result_ext
}
print(flag_extent)

if(flag_generateGrid  ){
simulated_grid = grid_generation(res_x = flag_res[1],
                                 res_y = flag_res[2],
                                 extent_unit="empirical_occ_extent",
                                 input_extent=result_ext,
                                 flag_offset=c(0,-flag_res[2]*10,
                                               flag_res[1]*10,0),
                                                 crs_num = flag_crs,
                                                 flag_maskByCountry = FALSE)
}



if(flag_matchOccToGrid){
  point_grid = load_occ(input_occ)

  grid_metadata = data.frame(grid_ID= c("simulatedGrid"),
                            resolution_x=flag_res[1],
                            resolution_y=flag_res[2] ) 

  temp_result = grid_matching(input_occ = point_grid,
                                input_grid = simulated_grid,
                                grid_metadata=grid_metadata,
                                flag_relativeTHD = 0.1,
                                flag_absoluteTHD = 100)
  table(temp_result@data$grid_closest_both)
}

if(flag_plot){
  temp_result_prj = sp::spTransform(temp_result,crs(paste0("+init=epsg:",flag_crs)))
  simulated_grid_crop = crop(simulated_grid,extent(temp_result_prj))
  plot(simulated_grid_crop )
  plot(temp_result_prj,add=T,col="blue")
}


BiogeographyLab/gridder documentation built on April 21, 2024, 2:32 a.m.