knitr::opts_chunk$set(echo = TRUE)
#devtools::install_github('rsh249/cRacle') #code to install cRacle library(cRacle) library(raster) clim = getData('worldclim', var='bio', res=10, path=tempdir())
We will query iNaturalist and GBIF for observed plant species within ~500m of a given location.
require(rinat) site.coord = matrix(ncol=2, nrow=1) site.coord = c(44.258885, -71.317860) loc.name = "Mt_Washington" boundary = c( site.coord[1]-0.001, site.coord[2]-0.001, site.coord[1]+0.001, site.coord[2]+0.001 ) ## Approximately a 100mx100m area. Distorted at higher latitudes inatq = get_inat_obs(taxon_name = "Tracheophyta", bounds=boundary, maxresults =5000) #filters #cultivated? inatq = inatq[which(inatq$captive_cultivated=='false'),] #research grade inatq = inatq[which(inatq$quality_grade=='research'),] #positional accuracy inatq = inatq[which(inatq$positional_accuracy<200),] t_names = unique(inatq$scientific_name) sp_bi = t_names[grep(' ', t_names)] sp_bi = sort(sp_bi) print(sp_bi)
The cRacle package includes a function that grabs data from GBIF and other databases and references this against a climate raster object (e.g. the WorldClim data we got earlier). Several other preprocessing steps can be opted for at this point including spatial thinning and crude climatic outlier detection.
The settings below are good for many applications:
#Before that we need to crop the clim object to our study area: clim = crop(clim, extent(c(boundary[2]-10, boundary[4]+10,boundary[1]-10, boundary[3]+10))) extr = getextr( sp_bi, clim, repo=c('gbif', 'inat'), #query GBIF and iNaturalist together maxrec = 900, #Should set higher in practice! schema = 'flat', #Spatial thinning by grid factor = 4, #merge grid to 2x (5 arcmin in example) for thinning nmin = 15, #minimum number of occurrence records required per species parallel = FALSE #if you have parallel architecture you can take advantage here ) library(dplyr) count= extr %>% group_by(tax) %>% summarise(count=n()) %>% arrange(desc(count)) extr_filt = extr %>% group_by(tax) %>% filter(n() >= 15) #tax = which(count$freq >=15) #sub = count$x[tax] #extr=extr[extr$tax %in% sub,]
densall = dens_obj(extr_filt, clim, manip = 'condi', kern = 'gaussian', bg.n = 90) #consider using parallel=TRUE if bg.n>500
This is the Coexistence Likelihood Estimation.
and = and_fun(densall)
optim = get_optim(and) print(optim$origk) #As implemented in Harbert and Nixon, 2015 print(optim$conintkde) #with 95% confidence intervals on the likelihood distribution.
multiplot() plots the object created by dens_obj(). addplot() plots a single element of dens_obj() (e.g. densall[[1]]) or a similar object created by the and_fun() function.
n = 1 multiplot(densall, names(clim)[[n]], l.cex=0.3) addplot(and, names(clim)[[n]], col = 'black') abline(v = median(optim$origk[, n]), col = 'green') #Midpoint will be very close to the optimum. Original method of Harbert and Nixon, 2015 returned a top 1% range to introduce some slop
To see how well we have done with cRacle let's have compare to the WorldClim model output as well as a summary of data from nearby weather stations from NOAA.
require(rnoaa); require(ggplot2); station_data <- ghcnd_stations() # Takes a while to run (minutes) lat_lon_df <- data.frame(id = 'locality', latitude = c(site.coord[1]), longitude = c(site.coord[2])) nearby_stations <- meteo_nearby_stations(lat_lon_df = lat_lon_df, #var = c('TMIN', 'TMAX', 'TAVG', 'PRCP'), station_data = station_data, radius = 10) monitors <- nearby_stations$locality$id all_monitors_clean <- meteo_pull_monitors(monitors) #get GHCN Data for each nearby station. summary(all_monitors_clean); #plot e.g., #plot(all_monitors_clean$date, (all_monitors_clean$tmax)) all_monitors_clean = all_monitors_clean[order(all_monitors_clean$date),] coverage=meteo_coverage(all_monitors_clean) #autoplot(coverage) cbase = coverage coverage = coverage$summary best_cover = coverage[which(coverage$total_obs == max(coverage$total_obs)),]$id; monitors_clean = subset(all_monitors_clean, all_monitors_clean$id == best_cover) dates = monitors_clean$date datestr = strsplit(as.character(dates), '-') year = list(); for(i in 1:length(datestr)){ year[[i]] = datestr[[i]][1]; } year = unlist(year) tmean = (monitors_clean$tmax + monitors_clean$tmin)/2 plot(year, tmean/10, main="Daily temperatures (C)") #summarize mean temperature profile year.tmean = cbind(as.numeric(year), tmean/10) m = vector(); z = 1; for(i in min(year.tmean[,1]):(max(year.tmean[,1]))){ m[z] = mean(as.numeric(year.tmean[which(year.tmean[,1] == i), 2]), na.rm=TRUE) z = z+1; } ghcn.mat = mean(m, na.rm=TRUE) #Mean Annual Temperature
ex = extract(clim, cbind(site.coord[2], site.coord[1])) #extract Worldclim for site site.ex = cbind(loc.name, site.coord[2], site.coord[1], ex) colnames(site.ex) = c('locality', 'lon', 'lat', names(clim))
densplot(and, names(clim[[1]]), col = 'black') abline(v=median(optim$origk[,1])/10, col='red', lwd=3) abline(v=ghcn.mat, col='blue') abline(v=as.numeric(site.ex[,names(clim[[1]])])/10, col = 'green')
The demonstration code above can be found at the cRacle github page: https://github.com/rsh249/cRacle/scripts/cRacle_beta.Rmd
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.