R/Processing/RUN_SDM_GEN_MAPS.R

#########################################################################################################################
#####################################  RUNNING SDM ANALYSIS FOR TAXA ---- ###############################################
#########################################################################################################################



## This code runs an SDM work flow, for a subset of species (e.g. whichever you supply)
## See (Burley et al 2019) for more details on the method ::
## https://www.sciencedirect.com/science/article/pii/S0048969719323289
## Also see the Markdown file


## Add data that fits within git limits

# - Koppen files are created by me, and cross-referenced to Kriticos. They won't change, so making them static is ok
#
# - The example shape files and species lists all come with the package
#
# - The Kopppen raster is downloaded from Google drive, etc, because it is bespoke
#
# - The worldclim rasters are all take from the getraster function
#
# - The creation of the SUA.tif is the key outstanding step : this is needed to run the final aggregation, and needs to be generalisable



## SETUP DATA & ENVIRONMENT ===========================================================================================


## First, load the data needed to run the analysis
## This file is created by running (./R/READ_SPATIAL_DATA.R)
## Move this across to the RMD file


## Set the R library so we know all the packages and dependencies will be the same
# .libPaths("./R/win-library/3.5")
# devtools::install_github("hrbrmstr/dtupdate")
# library(dtupdate)
# github_update("HMB3/sdmgen")


## Load only the packages needed for the analysis
package.list <- c('ff',         'things',    'raster',        'dismo',             'rJava',        'sp',                    'latticeExtra',
                  'data.table', 'devtools',  'roxygen2',
                  'rgdal',      'rgeos',     'gdalUtils',     'rmaxent',           'readr',        'plyr',                  'dplyr',
                  'tidyr',      'readr',     'rnaturalearth', 'rasterVis',         'RColorBrewer', 'latticeExtra',          'parallel',
                  'ALA4R',      'stringr',   'Taxonstand',    'CoordinateCleaner', 'gsubfn',       'PerformanceAnalytics',  'utf8',
                  'rvest',      'magrittr',  'devtools',      'ggplot2',           'reshape2',     'rmarkdown',             'flexdashboard',
                  'shiny',      'rgbif',     'ENMeval',       'tibble',            'ncdf4',        'Cairo',                 'taxonlookup',
                  'kgc',        'betareg',   'gridExtra',     'grid',              'lattice',      'ConR',                  'writexl',
                  'sf',         'ggmap',     'DataCombine',   'exactextractr',     'mgcv', 'doSNOW', 'tidyverse',  'ggpubr', 'GGally', 'maptools')


## Require packages
sapply(package.list, require, character.only = TRUE)


## Source functions, and set temporary directory
source('./R/SDM_GEN_PROCESSOR_FUNCTIONS.R')
source('./R/SDM_GEN_MAXENT_FUNCTIONS.R')
source('./R/SDM_GEN_MAPPING_FUNCTIONS.R')
source('./R/SDM_GEN_MODEL_LISTS.R')
# source('./R/READ_SPATIAL_DATA.R')


## Set temporary raster directory ----
## This step is vital
rasterOptions(tmpdir = './r_temp')





## STEP 1 :: Download species occurrence data ================================================================================


## Load in all the .rda files that are save with the package ::
## this is all the data needed to run an example analysis
list.filenames <- list.files(path = './data/', pattern = ".rda$", full.names = TRUE)
list.data <-list()

## See the global environemnt for a list of objects
for (i in 1:length(list.filenames))

{
  #message('load .rda for ', i)
  list.data[[i]]<-load(list.filenames[i])
}


## The species list to be analyzed ----
analysis_spp <- plant.spp[1:11]


## Download GBIF and ALA data
download_GBIF_all_species(species_list  = analysis_spp,
                          download_path = "./data/GBIF/",
                          download_limit = 20000)


download_ALA_all_species(species_list  = analysis_spp,
                         download_path = "./data/ALA/",
                         download_limit = 20000)





## STEP 2 :: Combine species occurrence data ============================================================================


## Combine ALA data and filter to records on land taken > 1950
## The climate data is the worldclim version 1.0
ALA.LAND = combine_ala_records(species_list      = analysis_spp,
                               records_path      = "./data/ALA/",
                               records_extension = "_ALA_records.RData",
                               record_type       = "ALA",
                               keep_cols         = ALA_keep,
                               world_raster      = world.grids.current)


## Combine GBIF data and filter to records on land taken > 1950
GBIF.LAND = combine_gbif_records(species_list      = analysis_spp,
                                 records_path      = "./data/GBIF/",
                                 records_extension = "_GBIF_records.RData",
                                 record_type       = "GBIF",
                                 keep_cols         = gbif_keep,
                                 world_raster      = world.grids.current)





## STEP 3 :: extract environmental values =================================================================================


## Combine GBIF and ALA data, and extract environmental values
## Create messages for this
COMBO.RASTER.CONVERT = combine_records_extract(ala_df          = ALA.LAND,
                                               gbif_df         = GBIF.LAND,
                                               urban_df        = 'NONE',
                                               thin_records    = TRUE,
                                               template_raster = template.raster.1km.WGS84,
                                               world_raster    = world.grids.current,
                                               prj             = CRS("+init=epsg:4326"),
                                               species_list    = analysis_spp,
                                               biocl_vars      = bioclim_variables,
                                               env_vars        = env_variables,
                                               worldclim_grids = "TRUE",
                                               save_data       = "FALSE",
                                               #data_path    = "./output/results/",
                                               save_run        = "TEST_BATS")


## If needed, extract urban data (i.e. if the species analaysed are in Alessandro's urban dataset)
## This needs to be separate, so that the urban records aren't removed by spatial cleaning
URBAN.RASTER.CONVERT = urban_records_extract(urban_df        = TI.XY,
                                             template_raster = template.raster.1km.WGS84,
                                             world_raster    = world.grids.current,
                                             prj             = CRS("+init=epsg:4326"),
                                             species_list    = analysis_spp,
                                             thin_records    = TRUE,
                                             biocl_vars      = bioclim_variables,
                                             env_vars        = env_variables,
                                             worldclim_grids = "TRUE",
                                             save_data       = "FALSE",
                                             #data_path    = "./output/results/",
                                             save_run        = "TEST_URBAN")





## STEP 4 :: Flag institutional outliers ===================================================================


## :: Flag records as institutional or spatial outliers
COORD.CLEAN = coord_clean_records(records    = COMBO.RASTER.CONVERT,
                                  capitals   = 10000,  ## Remove records within 10km  of capitals
                                  centroids  = 5000,   ## Remove records within 5km of country centroids
                                  save_run   = "TEST_BATS",
                                  #data_path    = "./output/results/",
                                  save_data  = "FALSE")


## Step 4b :: Flag spatial outliers
SPATIAL.CLEAN = check_spatial_outliers(all_df       = COORD.CLEAN,
                                       land_shp     = LAND,
                                       urban_df     = URBAN.RASTER.CONVERT,
                                       clean_path   = './data/GBIF/Check_plots/',
                                       spatial_mult = 10,
                                       prj          = CRS("+init=epsg:4326"))


## Step 4c ::Estima ecliate niches usign pecies records
GLOB.NICHE = calc_1km_niches(coord_df     = SPATIAL.CLEAN,  ## Replace COORD.CLEAN
                             country_shp      = AUS,
                             world_shp    = LAND,
                             kop_shp      = Koppen_shp,
                             ibra_shp     = IBRA,
                             species_list = analysis_spp,
                             env_vars     = env_variables,
                             cell_size    = 2,
                             save_run     = "Stoten_EG",
                             data_path    = "./output/results/",
                             save_data    = "TRUE")


## Step 4d :: plot species ranges using histograms and convex hulls for rainfall and temperature distributions
##  make GUTI color light grey,  GBIF cyan and ALA another color-blind friendly contrasting color
plot_range_histograms(coord_df     = SPATIAL.CLEAN,
                      species_list = analysis_spp,
                      range_path = './data/GBIF/Check_plots/')





## STEP 5 :: Prepare SDM table ==============================================================================


## This code creates the table for running SDMs in 'species with data', or SWD format.
## There is a switch in the function, that adds additional bg points from other taxa, if specified.
## In this example for bats, we'll just use
SDM.SPAT.OCC.BG = prepare_sdm_table(coord_df        = COORD.CLEAN,
                                    species_list    = unique(COORD.CLEAN$searchTaxon),
                                    sdm_table_vars  = sdm_table_vars,
                                    save_run        = "test_species",
                                    read_background = "FALSE",
                                    #BG_points       = 'SDM_SPAT_ALL_ANIMAL_BG_POINTS.rds', ## This should be just bats
                                    save_data       = "FALSE",
                                    save_shp        = "FALSE")





## STEP 6 :: Run Global SDMs ============================================================================================
## Run SDMs across all taxa, but don't show extra loops, etc.


## This function takes a table of all species occurrences (rows) and environmental values (columns), and runs SDMs for a list
## of taxa. First, targetted background selection is used. Then, backwards selection is used on the occurrence and
## background points generated by the targetted selection. The function saves files in folders, it doesn't return anything.


## Here we can read in a file saved earlier
#SDM.SPAT.OCC.BG <- readRDS("./data/ANALYSIS/SDM_SPAT_OCC_BG_ALL_BATS.rds")


## Run the SDMs for a set of taxa ----
run_sdm_analysis(species_list            = analysis_spp,
                 maxent_dir              = 'output/maxent/full_models',      ## Must be created in your directory
                 bs_dir                  = 'output/maxent/back_sel_models',  ## Must be created in your directory
                 sdm_df                  = SDM.SPAT.OCC.BG,
                 sdm_predictors          = bs_predictors,
                 backwards_sel           = "TRUE",        ## If TRUE, run both full models, and backwards selected models
                 template_raster         = template_raster_1km_mol,
                 cor_thr                 = 0.8,           ## The maximum allowable pairwise correlation between predictor variables
                 pct_thr                 = 5,             ## The minimum allowable percent variable contribution
                 k_thr                   = 4,             ## The minimum number of variables to be kept in the model.

                 min_n                   = 20,            ## This should be higher...
                 max_bg_size             = 70000,         ## could be 50k or lower, it just depends on the biogeography
                 background_buffer_width = 200000,        ## How far away should BG points be selected?
                 shapefiles              = TRUE,          ## Save the shapefiles?
                 features                = 'lpq',         ## Maxent features to use
                 replicates              = 5,             ## Number of replicates
                 responsecurves          = TRUE,          ## Save response curves?
                 country_shp                 = AUS,
                 Koppen_zones            = Koppen_zones,    ## Cross-ref Kriticos
                 Koppen_raster           = Koppen_1975_1km) ## This needs to be created locally using Koppen Zones





## STEP 7 :: Project SDMs across Australia ================================================================================


## Create a table of maxent results
## This function will just aggregate the results for models that ran successfully
MAXENT.RESULTS = compile_sdm_results(species_list = analysis_spp,
                                     results_dir  = 'output/maxent/back_sel_models',
                                     save_data    = FALSE,
                                     data_path    = "./output/results/",
                                     save_run     = "TEST_BATS")


## First, get map_spp from the maxent results table above, change the species column,
## and create a list of logistic thresholds - These values are debateable, need a few references from the literature...
map_spp         <- MAXENT.RESULTS$searchTaxon %>% gsub(" ", "_", .,)
percent.10.log  <- MAXENT.RESULTS$Logistic_threshold
sdm.results.dir <- MAXENT.RESULTS$results_dir


## Then project the SDMS across the whole of Australia
## This function takes the output of the Maxent species distribution models using current conditions, and generates
## a prediction of habitat suitability for current and future environmental conditions.


## Use a function to convert a rasterized shapefile into a vector, based on a field from one of the shapefiles
## We are just using the Significant Urban Areas of Australia as an example
## This could be any shapefile (e.g. the IBRA regions, etc.)


## Create 2030 sdm map projections ----
## This step would need to change, to run as a package
## Cannot create a RasterLayer object from this file.
## Error in .local(.Object, ...) :
tryCatch(
  project_maxent_grids_mess(country_shp   = AUS,          ## Shapefile, e.g. Australian states
                            world_shp     = LAND,         ## World shapefile
                            country_prj   = CRS("+init=EPSG:3577"),
                            world_prj     = CRS("+init=epsg:4326"),

                            scen_list     = scen_2070,                 ## List of climate scenarios
                            species_list  = map_spp,                   ## List of species folders with maxent models
                            maxent_path   = './output/maxent/back_sel_models/',    ## Output folder
                            climate_path  = './data/worldclim/aus/',               ## Future climate data

                            grid_names    = env_variables,             ## Names of all variables
                            time_slice    = 70,                        ## Time period
                            current_grids = aus.grids.current,         ## Create this locally
                            create_mess   = TRUE,
                            OSGeo_path    = 'C:/OSGeo4W64/OSGeo4W.bat', ## Other users would need to install this
                            nclust        = 1),

  ## If the species fails, write a fail message to file.
  error = function(cond) {

    ## This will write the error message inside the text file, but it won't include the species
    file.create(file.path("output/maxent/back_sel_models/mapping_failed_2030.txt"))
    cat(cond$message, file = file.path("output/maxent/back_sel_models/mapping_failed_2030.txt"))
    warning(cond$message)

  })





## STEP 8 :: Aggregate SDM projections within Spatial units ==============================================================


## Rasterize a shapefile ----
## Can't use the .rda, must use the file path
areal_unit_vec <- shapefile_vector_from_raster(shp_file = SUA,
                                               prj      = CRS("+init=EPSG:3577"),
                                               agg_var  = 'SUA_CODE16',
                                               temp_ras = aus.grids.current[[1]],
                                               targ_ras = './data/SUA_2016_AUST.tif')


## This is a vector of all the cells that either are or aren't in the rasterized shapefile
## Need to join here
summary(areal_unit_vec)


## This error was introduced by changing the shapefiles to being supplied as objects,
## rather than read in using file paths as before, and also be removing the ordering of the shapefile.

## Also, check that it works inside the loop, but not outside...
## It does appear to work inside the loop

# Running summary of SDM predictions within SUAs for Dobsonia_magna using  shapefile
# Combining SDM prediction for 6 GCMS for 2030
# Calculating mean of 6 GCMS for 2030
# doing Dobsonia_magna | Logistic > 0.4227 for 2030
# Calcualting the max contiguous suitability patch for Dobsonia_magna
# Running thresholds for Dobsonia_magna | 2030 combined suitability > 0.4227
# Calculating change for Dobsonia_magna | 2030 combined suitability > 0.4227
# Counting cells lost/gained/stable/never suitable, both across AUS and per unit
# group by, SUA_CODE16
# Error in as.vector(x) :
#   trying to get slot "data" from an object (class "factor") that is not an S4 object
# In addition: Warning message:


## Combine GCM predictions and calculate gain and loss for 2030 ----
## Then loop over the species folders and climate scenarios
## Why doesn't using the
tryCatch(mapply(sdm_area_cell_count,                                  ## Function aggreagating GCM predictions by spatial unit
                unit_shp      = './data/SUA_albers.rds',          ## Spatial unit of analysis - E.G. SUAs, in Australian Albers
                unit_vec      = areal_unit_vec,                   ## Vector of rasterized unit cells
                sort_var      = "SUA_NAME16",
                agg_var       = "SUA_CODE16",
                world_shp     = './data/LAND_albers.rds',         ## Polygon for AUS maps
                country_shp   = './data/AUS_albers.rds',          ## Polygon for World maps

                DIR_list      = sdm.results.dir,                  ## List of directories with rasters
                species_list  = map_spp,                          ## List of species' directories
                number_gcms   = 6,
                maxent_path   = 'output/maxent/back_sel_models/', ## Directory of maxent results
                thresholds    = percent.10.log,                   ## List of maxent thresholds
                time_slice    = 30,                               ## Time period, eg 2030
                write_rasters = TRUE),

         ## If the species fails, write a fail message to file.
         error = function(cond) {

           ## This will write the error message inside the text file,
           ## but it won't include the species
           file.create(file.path("output/maxent/back_sel_models/sua_count_failed_2030.txt"))
           cat(cond$message, file=file.path("output/maxent/back_sel_models/sua_count_failed_2030.txt"))
           warning(cond$message)

         })





## STEP 9 ::: Collate SDM results ===================================================================================


## Summarise the SDM results as per the Stoten publication
## Create a function which aggregates the SDM results and plots them vs. temp


## The figure code steps start with 'AGG_SDM_results'. The output of that, 'SUA.PLOT' was combined with Linda's
## manual calculation of new species gain (ideally, that would be caluclate too) to create 'turover'.
## Make a function which ignores the columns N.spp	N.spp.loss	N.spp.gain	N.spp.stable. These could be added to
## later.



#########################################################################################################################
## THINGS TO CHANGE FOR BAT MODELLING ::
#########################################################################################################################



## 1). Update all the functions for bats

##     - Test the whole code for bats
##     - Check the SUA cell count section: read RDS does the trick
##     - Need to reproduce the data and code in the package
##     - Add a step 10 which plots data as per Stoten publication
##     -


## 2). Functionalise the workflow

##     - Iron out problems in Step 8 for mapping
##     - change SUAs to IBRA regions



## 2).  Update background points to be all bats
##
##      - Use ALA data
##      - Consider the overseas data?


## 4).  Add in taxon stand cleaing from Aniko's code

##      - Clean up files on H:



## 5). How can the code be made more portable? Convert to R package, etc. The raster step is the hard part...
##     ## The raster names from the worldclim website would need to not be hard-coded





#########################################################################################################################
## OUTSTANDING WORKFLOW TASKS:
#########################################################################################################################


## 3).  Make the code more modular, less monolithic

## 3).  Iron out some of the points which are not as reproducible - e.g the background points in step 6).

## 5).  Create a 'dashboard' for each species, using the MAXENT output (eg see "./R/PLOT_RANGE_HISTOGRAMS.R')





#########################################################################################################################
###################################################### TBC ##############################################################
#########################################################################################################################
HMB3/sdmgen documentation built on April 16, 2021, 7:29 p.m.