inst/doc/S4DM.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----get data, fig.show='hold'------------------------------------------------


# Load libraries

library(BIEN)
library(geodata)
library(ggplot2)
library(S4DM)
library(sf)
library(terra)
library(tidyterra)

# Make a temporary directory to store climate data

  temp <- tempdir()

# Get some occurrence data

  #tv <- BIEN_occurrence_species(species = "Trillium vaseyi")
  data("sample_points")
  
  
# Get environmental data
# To make things a bit faster and easier, we'll limit ourselves to the 2 variables (mean temperature and annual precipitation)

  
  # env <- worldclim_global(var = "bio",
  #                          res = 10,
  #                          path = temp)
  # env <- env[[c(1,12)]]

  env <- rast(system.file('ex/sample_env.tif', package="S4DM"))  
  
# And we'll rescale the variables as well

  env <- scale(env)

# Just to take a look to make sure we didn't mess anything up

plot(env)


## ----rangebagging, echo=FALSE, results='asis'---------------------------------

data("sample_points")

sample_points_rangebagged <- 
make_range_map(occurrences = sample_points[c("longitude","latitude")],
               env = env,
               presence_method = "rangebagging",
               background_method = "none",
               background_buffer_width = 100000)


#Lets see what it looks like

# convert to polygon for easier visualization

sample_points_rangebagged_polygon <-
  sample_points_rangebagged |>
  as.polygons() |>
  st_as_sf()

# get a bbox for plotting

sample_points_bbox <-
sample_points_rangebagged_polygon |>
  st_buffer(dist = 500000) |>
  st_bbox()

#Now, we'll plot the standardized temperature raster, along with the occurrence records and the range map 

ggplot(env)+
  geom_raster(mapping = aes(x=x,y=y,fill=wc2.1_10m_bio_1))+
  scale_fill_viridis_c(name="Temp. C", na.value = "transparent")+
  scale_x_continuous(expand=c(0,0),
                     limits = c(sample_points_bbox[1],sample_points_bbox[3]))+
    scale_y_continuous(limits = c(sample_points_bbox[2],sample_points_bbox[4]),
                     expand=c(0,0))+
  theme_bw()+
  geom_sf(data = sample_points_rangebagged_polygon,
          fill = "grey",
          size=2,
          alpha=0.5)+
    geom_point(data = sample_points,
             mapping = aes(x=longitude,y=latitude))




## -----------------------------------------------------------------------------


# Here, we'll use the same data as before for Trillium vaseyi.

#First, we'll select the background data

sample_points_bg <- get_env_bg(coords = sample_points[c("longitude","latitude")],
                    env = env,
                    width = 50000,
                    standardize = TRUE) #note that we used a small set of background points to expedite model fitting

# The returned object 'xs_bg' contains two objects:
  # 1) sample_points_bg$env a matrix of environmental covariates. This is what we need for modeling.
  # 2) sample_points_bg$bg_cells a vector containing the environmental raster cell IDs that are present in tv_bg$env. This is useful for mapping the results.

# Next, we get the presence data:

  sample_points_presence <- get_env_pres(coords = sample_points[c("longitude","latitude")],
                              env = env,
                              env_bg = sample_points_bg)

#The returned object 'tv_presence' contains two objects:

  # 1) tv_presence$env a matrix of environmental covariates. This is what we need for modeling.
  # 2) tv_presence$occurrence_sf a sf object containing the coordinate data. This is useful for conducting spatially stratified cross-validation.




# Now, we can fit the model.  Previously we used rangebagging, this time we'll use a simple KDE estimation

  sample_points_kde_kde <- fit_plug_and_play(presence = sample_points_presence$env,
                                  background = sample_points_bg$env,
                                  method = "kde")


# The object that was returned is of the class "pnp_model", which is essentially a list of model fits and associated metadata.

# To view this data on a map, we can project it to the background data we used in fitting (or we could project to a new location entirely). In either case, we use the function `project_plu_and_play`.


  sample_points_kde_kde_predictions <- project_plug_and_play(pnp_model = sample_points_kde_kde,
                                                  data = sample_points_bg$env)



#Now we can make a blank raster to store our predictions
  
  sample_points_kde_kde_raster <- env[[1]]

  values(sample_points_kde_kde_raster) <- NA

#Add our predictions to the raster

  sample_points_kde_kde_raster[sample_points_bg$bg_cells] <-
    sample_points_kde_kde_predictions

#Now, we can plot our raster

  plot(sample_points_kde_kde_raster,
       xlim = c(sample_points_bbox[1],sample_points_bbox[3]),
       ylim = c(sample_points_bbox[2],sample_points_bbox[4]))
  points(sample_points[c("longitude","latitude")])
  

## ----thresholding-------------------------------------------------------------

#To threshold this continuous raster to yield a binary raster

sample_points_kde_kde_raster <- sdm_threshold(prediction_raster = sample_points_kde_kde_raster,
                                   occurrence_sf = sample_points_presence$occurrence_sf,
                                   quantile = 0.05,
                                   return_binary = T)


# As before, we'll plot this on top of temperature and occurrence records to see how well we did

  # convert to polygon for easier visualization
  
    sample_points_kde_kde_polygon <-
      sample_points_kde_kde_raster |>
      as.polygons()|>
      st_as_sf()

# Now, we'll plot the standardized temperature raster, along with the occurrence records and the range map 

  ggplot(env)+
    geom_raster(mapping = aes(x=x,y=y,fill=wc2.1_10m_bio_1))+
    scale_fill_viridis_c(name="Temp. C", na.value = "transparent")+
    scale_x_continuous(expand=c(0,0),
                       limits = c(sample_points_bbox[1],
                                  sample_points_bbox[3]))+
      scale_y_continuous(limits = c(sample_points_bbox[2],
                                    sample_points_bbox[4]),
                       expand=c(0,0))+
    theme_bw()+
    geom_sf(data = sample_points_kde_kde_polygon,
            fill = "grey",
            size=2,
            alpha=0.5)+
      geom_point(data = sample_points,
               mapping = aes(x=longitude,y=latitude))




## -----------------------------------------------------------------------------

# We'll rely on the same data as last time for simplicity.
# Since we're using different methods for estimating the presence and background distributions, we need to specify these separately:

sample_points_gaussian_kde <- fit_plug_and_play(presence = sample_points_presence$env,
                                     background = sample_points_bg$env,
                                     presence_method = "gaussian",
                                     background_method = "kde")

sample_points_gaussian_kde_predictions <- project_plug_and_play(pnp_model = sample_points_gaussian_kde,
                                                data = sample_points_bg$env)

# Now, we again convert everything to a raster and then to a polygon

  sample_points_gaussian_kde_raster <- env[[1]]

  values(sample_points_gaussian_kde_raster) <- NA

  sample_points_gaussian_kde_raster[sample_points_bg$bg_cells] <-  sample_points_gaussian_kde_predictions

# Now, we can plot our raster
  
  plot(sample_points_gaussian_kde_raster,
       xlim = c(sample_points_bbox[1],sample_points_bbox[3]),
       ylim = c(sample_points_bbox[2],sample_points_bbox[4]))
  points(sample_points[c("longitude","latitude")])
  

## ----thresholding gk----------------------------------------------------------
# To threshold this continuous raster to yield a binary raster

  sample_points_gaussian_kde_raster <- sdm_threshold(prediction_raster =
                                                       sample_points_gaussian_kde_raster,
                                     occurrence_sf = sample_points_presence$occurrence_sf,
                                     quantile = 0.05,
                                     return_binary = T)


# As before, we'll plot this on top of temperature and occurrence records to see how well we did


# Convert the raster to a polygon for visualization

  # convert to polygon for easier visualization
  
    sample_points_gaussian_kde_polygon <-
      sample_points_gaussian_kde_raster |>
      as.polygons()|>
      st_as_sf()

# Now, we'll plot the standardized temperature raster, along with the occurrence records and the range map 

  ggplot(env)+
    geom_raster(mapping = aes(x = x, y = y, fill = wc2.1_10m_bio_1))+
    scale_fill_viridis_c(name="Temp. C", na.value = "transparent")+
    scale_x_continuous(expand=c(0,0),
                       limits = c(sample_points_bbox[1],
                                  sample_points_bbox[3]))+
      scale_y_continuous(limits = c(sample_points_bbox[2],
                                    sample_points_bbox[4]),
                       expand=c(0,0))+
    theme_bw()+
    geom_sf(data = sample_points_gaussian_kde_polygon,
            fill = "grey",
            size=2,
            alpha=0.5)+
      geom_point(data = sample_points,
               mapping = aes(x=longitude,y=latitude))



## ----maxnet-------------------------------------------------------------------


  sample_points_maxnet <-
  fit_density_ratio(presence = sample_points_presence$env,
                    background = sample_points_bg$env,
                    method = "maxnet")
  
  
  sample_points_maxnet_predictions <-
    project_density_ratio(dr_model = sample_points_maxnet,
                          data = sample_points_bg$env)


#Now, we again convert everything to a raster and then to a polygon
  
  sample_points_maxnet_raster <- env[[1]]

  values(sample_points_maxnet_raster) <- NA

  sample_points_maxnet_raster[sample_points_bg$bg_cells] <-
    sample_points_maxnet_predictions


#Now, we can plot our raster
  
    plot(sample_points_maxnet_raster,
       xlim = c(sample_points_bbox[1],
                sample_points_bbox[3]),
       ylim = c(sample_points_bbox[2],
                sample_points_bbox[4]))
  points(sample_points[c("longitude","latitude")])





## ----ulsif--------------------------------------------------------------------


  sample_points_ulsif <-
  fit_density_ratio(presence = sample_points_presence$env,
                    background = sample_points_bg$env,
                    method = "ulsif")
  
  
  sample_points_ulsif_predictions <-
    project_density_ratio(dr_model = sample_points_ulsif,
                          data = sample_points_bg$env)


#Now, we again convert everything to a raster and then to a polygon
  
  sample_points_ulsif_raster <- env[[1]]

  values(sample_points_ulsif_raster) <- NA

  sample_points_ulsif_raster[sample_points_bg$bg_cells] <-
    sample_points_ulsif_predictions


#Now, we can plot our raster
  
    plot(sample_points_ulsif_raster,
       xlim = c(sample_points_bbox[1],
                sample_points_bbox[3]),
       ylim = c(sample_points_bbox[2],
                sample_points_bbox[4]))
  points(sample_points[c("longitude","latitude")])


## ----model evaluation---------------------------------------------------------

sample_points_gaussian_gaussian_fit <-
  evaluate_range_map(occurrences = sample_points[c("longitude","latitude")],
                     env = env,
                     presence_method = "gaussian",
                     background_method = "gaussian")


#Rather than looking at all of the results, we'll focus on just a few:

sample_points_gaussian_gaussian_fit$fold_results[c('testing_AUC','testing_sensitivity','testing_specificity')]

#The AUC gives us an overall idea of the discriminatory ability of the model, while the sensitivity and specificity tell us how well it discriminates presence vs. background points (respectively).

Try the S4DM package in your browser

Any scripts or data that you put into this service are public.

S4DM documentation built on April 4, 2025, 2:22 a.m.