knitr::opts_chunk$set(echo = TRUE)

Announcing a new R package dedicated to implementing CRACLE for climate estimation from biological community data.

How to:

First load some climate data:

#devtools::install_github('rsh249/cRacle') #code to install cRacle
library(cRacle)
library(raster)
clim = getData('worldclim', var='bio', res=10, path=tempdir())

For a list of species:

We will query iNaturalist and GBIF for observed plant species within ~500m of a given location.

require(rinat)

site.coord = matrix(ncol=2, nrow=1)

site.coord = c(44.258885, -71.317860)
loc.name = "Mt_Washington"


boundary = c( site.coord[1]-0.001, 
              site.coord[2]-0.001, 
              site.coord[1]+0.001, 
              site.coord[2]+0.001 ) ## Approximately a 100mx100m area. Distorted at higher latitudes


inatq = get_inat_obs(taxon_name = "Tracheophyta", 
                               bounds=boundary, maxresults  =5000)



#filters
#cultivated?
inatq = inatq[which(inatq$captive_cultivated=='false'),]
#research grade
inatq = inatq[which(inatq$quality_grade=='research'),]
#positional accuracy
inatq = inatq[which(inatq$positional_accuracy<200),]

t_names = unique(inatq$scientific_name)
sp_bi = t_names[grep(' ', t_names)]
sp_bi = sort(sp_bi)
print(sp_bi) 

Get occurrence data for each of those species:

The cRacle package includes a function that grabs data from GBIF and other databases and references this against a climate raster object (e.g. the WorldClim data we got earlier). Several other preprocessing steps can be opted for at this point including spatial thinning and crude climatic outlier detection.

The settings below are good for many applications:

#Before that we need to crop the clim object to our study area:
clim = crop(clim, extent(c(boundary[2]-10, boundary[4]+10,boundary[1]-10, boundary[3]+10)))
extr = getextr(
  sp_bi,
  clim,
  repo=c('gbif', 'inat'), #query GBIF and iNaturalist together
  maxrec = 900, #Should set higher in practice! 
  schema = 'flat', #Spatial thinning by grid
  factor = 4, #merge grid to 2x (5 arcmin in example) for thinning
  nmin = 15, #minimum number of occurrence records required per species
  parallel = FALSE #if you have parallel architecture you can take advantage here
)

library(dplyr)
count= extr %>% 
  group_by(tax) %>%
  summarise(count=n()) %>%
  arrange(desc(count))

extr_filt = extr %>% 
  group_by(tax) %>% 
  filter(n() >=  15)

#tax = which(count$freq >=15)
#sub = count$x[tax]
#extr=extr[extr$tax %in% sub,]

Then build probability distributions for each taxon/variable pair.

densall = dens_obj(extr_filt,
                   clim,
                   manip = 'condi',
                   kern = 'gaussian',
                   bg.n = 90) #consider using parallel=TRUE if bg.n>500

Then calculate the intersect of all taxa for each variable.

This is the Coexistence Likelihood Estimation.

and = and_fun(densall)

And find the optima for these joint likelihood functions.

optim = get_optim(and)

print(optim$origk) #As implemented in Harbert and Nixon, 2015
print(optim$conintkde) #with 95% confidence intervals on the likelihood distribution.

And visualize (for Mean Annual Temperature):

multiplot() plots the object created by dens_obj(). addplot() plots a single element of dens_obj() (e.g. densall[[1]]) or a similar object created by the and_fun() function.

n = 1
multiplot(densall, names(clim)[[n]], l.cex=0.3)
addplot(and, names(clim)[[n]], col = 'black')

abline(v = median(optim$origk[, n]), col = 'green') #Midpoint will be very close to the optimum. Original method of Harbert and Nixon, 2015 returned a top 1% range to introduce some slop

cRacle performance

To see how well we have done with cRacle let's have compare to the WorldClim model output as well as a summary of data from nearby weather stations from NOAA.

Get NOAA data

require(rnoaa);
require(ggplot2);
station_data <- ghcnd_stations() # Takes a while to run (minutes)

lat_lon_df <- data.frame(id = 'locality',
                         latitude = c(site.coord[1]),
                         longitude = c(site.coord[2]))
nearby_stations <-  meteo_nearby_stations(lat_lon_df = lat_lon_df, 
                                          #var = c('TMIN', 'TMAX', 'TAVG', 'PRCP'),
                                          station_data = station_data, 
                                          radius = 10)
monitors <- nearby_stations$locality$id

all_monitors_clean <- meteo_pull_monitors(monitors) #get GHCN Data for each nearby station.


summary(all_monitors_clean);
#plot e.g.,
#plot(all_monitors_clean$date, (all_monitors_clean$tmax))

all_monitors_clean = all_monitors_clean[order(all_monitors_clean$date),]
coverage=meteo_coverage(all_monitors_clean)

#autoplot(coverage)
cbase = coverage
coverage = coverage$summary

best_cover = coverage[which(coverage$total_obs == max(coverage$total_obs)),]$id;

monitors_clean = subset(all_monitors_clean, all_monitors_clean$id == best_cover)
dates = monitors_clean$date
datestr = strsplit(as.character(dates), '-')
year = list();
for(i in 1:length(datestr)){
  year[[i]] = datestr[[i]][1];
}
year = unlist(year)
tmean = (monitors_clean$tmax + monitors_clean$tmin)/2

plot(year, tmean/10, main="Daily temperatures (C)")


#summarize mean temperature profile
year.tmean = cbind(as.numeric(year), tmean/10)
m = vector(); z = 1;
for(i in min(year.tmean[,1]):(max(year.tmean[,1]))){
  m[z] = mean(as.numeric(year.tmean[which(year.tmean[,1] == i), 2]), 
              na.rm=TRUE)
  z = z+1;
}

ghcn.mat = mean(m, na.rm=TRUE) #Mean Annual Temperature

Get WorldClim data for locality

ex = extract(clim, cbind(site.coord[2], site.coord[1])) #extract Worldclim for site
site.ex = cbind(loc.name, site.coord[2], site.coord[1], ex)
colnames(site.ex) = c('locality', 'lon', 'lat', names(clim))

Plot comparison

densplot(and, names(clim[[1]]), col = 'black')
abline(v=median(optim$origk[,1])/10, col='red', lwd=3)
abline(v=ghcn.mat, col='blue')
abline(v=as.numeric(site.ex[,names(clim[[1]])])/10, col = 'green')

Recap

The demonstration code above can be found at the cRacle github page: https://github.com/rsh249/cRacle/scripts/cRacle_beta.Rmd



rsh249/CRACLE documentation built on Feb. 2, 2022, 11:31 p.m.