knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(sdmpipeline)
#devtools::load_all()
library(stacatalogue)
library(rgbif)
library(ggplot2)

Select the directory where to create the output.

output_dir <- "C:/SDMpipeline"

1. Load observations

proj <- "EPSG:6623"
species <- "Glyptemys insculpta"
gbifData <- rgbif::occ_data(scientificName = species, hasCoordinate = T, limit = 2000) 
presence <- gbifData$data
presence <- presence %>% dplyr::select(key, species, decimalLongitude, decimalLatitude, year) %>%
      dplyr::filter(year >= 1980 )%>%
      dplyr::rename(id = key, scientificName = species) %>%
      dplyr::mutate(id = as.double(id))

presence <- create_projection(presence, lon = "decimalLongitude", lat = "decimalLatitude", 
proj_from = "+proj=longlat +datum=WGS84", proj_to = proj, new_lon = "lon", new_lat = "lat") 

bbox <- points_to_bbox(dplyr::select(presence, lon, lat), proj_from = proj, buffer = 0)

2. Covariates

mask <- terra::vect("C:/GitHub/sdm-pipeline/data/shape_study_area_nolakes_nad83.shp")
bbox <- shp_to_bbox(mask)

Load covariates from local tif files

As a first option, we choose to load predictors from local tif files (in the folder predictors_dir). When subset_layers is NULL, all the files from the predictors_dir folder will be loaded. They must be in the same projection system and have the same extent and resolution.

predictors_dir <- "C:/Users/vals3103/Dropbox/Post-doc/data/environmental/out/chelsa"
predictors <- load_predictors(source = "from_tif",
                           predictors_dir = predictors_dir,
                           subset_layers = NULL,
                           remove_collinear = T,
                           method = "vif.cor",
                           method_cor_vif = NULL,
                           proj = proj,
                           mask = mask,
                           sample = TRUE,
                           nb_points = 1000,
                           cutoff_cor = 0.7,
                           cutoff_vif = 7,
                           export = F,
                           ouput_dir = getwd(),
                           as_list = F)
predictors

Alternatively, we can pre-select layers by their names and desactivate the collinearity test:

Load covariates from the stac catalogue

The function load_predictors can also load predictors from the IO stac catalogue. It uses the function load_cube from the library stacatalogue, see the package documentation for further explanation and details.

Here, we select climatic layers from CHELSA in their native resolution.

#bbox <- shp_to_bbox(qbc_us_shp)
predictors_clim <- load_predictors(source = "from_cube",
                               cube_args = list(stac_path = "https://io.biodiversite-quebec.ca/stac",
            limit = 5000, 
            collections = c("chelsa-clim"),     
            t0 = "1981-01-01",
            t1 = "1981-01-01",
            spatial.res = 1000, # in meters
            temporal.res = "P1Y",
            aggregation = "mean",
            resampling = "near"),

                             subset_layers = subset_layers,
                           remove_collinear = F,
                           method = "vif.cor",
                           method_cor_vif = NULL,
                           proj = proj,
                           bbox = bbox,
                          mask = mask,
                           sample = TRUE,
                           nb_points = 50000,
                           cutoff_cor = 0.7,
                           cutoff_vif = 3,
                           export = F,
                           ouput_dir = getwd(),
                           as_list = F)
predictors <- predictors_clim
terra::add(predictors) <- predictors_topo
 plot(predictors)
    clean_presence <- clean_coordinates(
      presence,
      predictors = predictors,
      species_name = species,
      srs = proj,
      unique_id = "id",
      lon = "lon",
      lat = "lat",
      species_col = "scientificName",
      spatial_res = 1000,
      tests = c(
        "equal",
        "zeros",
        "duplicates",
        "same_pixel",
      #  "centroids",
     #   "seas",
     #   "gbif",
     #   "institutions",
        "env"
      ),
      threshold_env = 0.2,
       report = TRUE,
      dir = output_dir,
   value = "clean"
    )

head(clean_presence)
study_extent <- create_study_extent(clean_presence, 
                              lon = "lon",
                              lat = "lat",
                              proj = proj,
                              method = "buffer",
                              mask = NULL,
                              dist_buffer = 200000,
                              shapefile_path = NULL)

predictors_small <- fast_crop(predictors, study_extent)
#raster::plot(predictors_small)
study_extent
raster::plot(study_extent)
n_background <- 10000

background <- create_background(species = species,
                                   predictors = predictors_small, 
                                    method = "random", #will select random points in predictors_study_extent area
                                    n = n_background,
                                    obs = clean_presence,
                                   density_bias = NULL) 
clean_background <- clean_coordinates(
      background,
      predictors = predictors,
      species_name = species,
      srs = proj,
      unique_id = "id",
      lon = "lon",
      lat = "lat",
      species_col = "scientific_name",
      tests = c(
        "equal",
        "zeros",
         "env"
       ),
      threshold_env = 0.8,
      value = "clean",
      report = F
    )
sdm_data <- setup_sdm_data(
  clean_presence,
  clean_background,
  predictors_small,
  partition_type = "none")                            
species_dir <- sprintf("%s/%s", output_dir, species)
dens_plots <- create_density_plots(sdm_data, covars = names(predictors), factors = c(), export = T, path = paste0(species_dir, "/density_plots.pdf"))
dens_plots
  #dir.create(file.path(sprintf("%s/%s/maxEnt", outputDir, species)), showWarnings = FALSE) #dir.create() does not crash if the directory already exists}
    # First, we calculate models using the list of fc and rm provided
    partition_type <-  c("block")
    mod_tuning <- run_maxent(sdm_data, 
                             with_raster = F,  # can be set to F to speed up
                             covars = names(predictors),
                             algorithm = "maxent.jar",
                             partition_type = partition_type,
                             factors = NULL,
                            nfolds = 5, #used if partition_type is "randomkfold"
                             rm = 1, 
                             fc = "L",
                             parallel = T,
                             updateProgress = T,
                             parallelType = "doParallel")

    res_tuning <- mod_tuning@results
    print(res_tuning)  
  # Selection resTuningof fc and rm parameters
  tuned_param <- select_param(res_tuning, method = "AIC", list = F)
  pred_pres <- predict_maxent(mod_tuning,
  param = tuned_param,
   predictors_small, type = "cloglog")

   raster::plot(pred_pres)
  # Selection resTuningof fc and rm parameters
  r_plot <- response_plot(mod_tuning, 
  param = tuned_param, type = "cloglog", path = "./response_plot.jpeg") 
  # Selection resTuningof fc and rm parameters
thresh <- find_threshold(pred_pres, occs = mod_tuning@occs[, c("lon", "lat")],
                          bg = mod_tuning@bg[, c("lon", "lat")],
                          type = "spse") 

sprintf("Selected threshold: %f", thresh)
pred_bin <- binarize_pred(pred_pres, thresh)
raster::plot(pred_bin)


ReseauBiodiversiteQuebec/sdm-pipeline documentation built on June 23, 2022, 9:10 p.m.