knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(cache=T)

Background

NOTE: this file is the development version; the currently stable vignette is fastDiversityLite.rmd

Details on computations will be stored in text blocks like this. They can safely be ignored if you're just getting started.


Set up

Pacakges you'll need.

library(changeRangeR)
library(raster)
library(rasterVis)
library(rgdal)
library(Matrix.utils)
library(tidyverse)
if(Sys.info()["sysname"]== "Windows") library (parallelsugar)
mc.cores=6

Main Inputs

### determine where you want outputs
summaryBaseDir='/Volumes/cm2/changeRangerDemos/trees190_test6'
if(!file.exists(summaryBaseDir)) dir.create(summaryBaseDir)

# Indicate scenarios. these names will be used throughout to structure folders 
allScen=c('present','8580')
#load environnent with the reprojected projection (typically the one you used for modeling). You only need the raster grid that , not the actual layer values
envGrid=raster::stack(system.file("extdata/treeDemo/envGrid.tif",package='changeRangeR'))

# folder of binary range rasters
myDir=system.file("extdata/treeDemo/BinaryMaps",package='changeRangeR')

# shapefiles for plotting. This one comes preinstallted
world.shp=readOGR(system.file(
  "extdata/treeDemo/TM_WORLD_BORDERS_SIMPL-0.3/TM_WORLD_BORDERS_SIMPL-0.3.shp",
  package='changeRangeR'),'TM_WORLD_BORDERS_SIMPL-0.3',verbose=F)
world.shp2=spTransform(world.shp,projection(envGrid))

Set up directory structure

Set up a standardized set of directory names and structure so that that changeRanger functions can access the right files internally. You can specify any optional subdirectories you like, although changeRanger will only write outputs there.

sumDirs=setupSummaryDirectories(summaryBaseDir, optionalSubDirs=c('funcDiv','phyloDiv'))
# its a good idea to save this in case you need to start an analysis in the middle
saveRDS(sumDirs,file=paste0(sumDirs$sumBaseDir,'/dirList.rds'))
str(sumDirs)

Make Sparse Matrices

Begin by converting the range rasters to a matrix with rows indexing cells and columns indexing species. With the nCellChunks argument, I'm specifying that the globe should be split up into 10 chunks; this is important when your cell x species matrix is large, because the whole thing can't be read into memory at once. 'Large' will depend on the number of species, the resolution and extent of your maps, and your computer's capabilities.

# make a table of the raster file names and the associated species names
allSpeciesMaps=tibble(rasterFiles=list.files(paste0(myDir,'/present'),
                                             recursive=T, full.names=T)) %>% 
  mutate(sp.names= rasterFiles %>% basename %>% file_path_sans_ext) %>%
  separate(sp.names,into=c('g','sp','t','s'),sep='_') %>% select(-t,-s) %>%
  unite(sp.names,g,sp)
# species index table. columns: species name, integer index
sp.ind=speciesIndexTable(allSpeciesMaps,sumDirs)
# cell index Table. columns: long, lat, cellid
cell.ind=cellIndexTable(envGrid,nCellChunks=10,sumDirs)

Two key outputs are:

As we develop more complex analyses with auxilary data, we'll add 'cell attributes' and 'species attributes' to these data frames, respectively, that will allow us to generate a wide range of summaries.

cell.ind=readRDS(paste0(sumDirs$myBaseDir,'/cellIndexTable.rds'))
head(cell.ind)
sp.ind=readRDS(paste0(sumDirs$myBaseDir,'/speciesIndexTable.rds'))
head(sp.ind)

You can see how the globe is broken up into chunks. sparseToRaster is also useful to convert anything stored as a sparse matrix back to a raster for spatial operations.

cell.ind=readRDS(paste0(sumDirs$myBaseDir,'/cellIndexTable.rds'))
# a convenient function to convert between sparse matrices and rasters, where plotting is easier
chunks.r=cellIDToRaster(cell.ind,envGrid,'chunkID')
# a convient plotting function in changeRanger
fdMapPlot(chunks.r,shp=world.shp2,legend.args=list(text='chunk ID'))

Now we're ready to make the sparse matrices. This step can take a little time, but makes all the downstream steps fast. It converted all the ranges into sparse matrices and stored them in the folder '.../SparseMatrices/CellBySpecies'. We refer to these as 'cell by species' matrices, or CBS matrices for short. Depending on file size, or your settings, CBS matrices can be stored in chunks to reduce the need to load in huge files in later operations where you may only need a subset of the chunks. Chunking is done based on rows; that is each contains a subset of cells, but all species.

for (scn in allScen){
  allSpeciesMaps=tibble(rasterFiles=list.files(paste0(myDir,'/',scn),
                                               recursive=T, full.names=T)) %>% 
    mutate(spNames= rasterFiles %>% basename %>% file_path_sans_ext) %>%
    separate(spNames,into=c('g','sp','t','s'),sep='_') %>% select(-t,-s) %>%
    unite(spNames,g,sp)
  cellBySpeciesMatrices(sumDirs$cbsDir,
                          rasterFiles=allSpeciesMaps$rasterFiles,
                        spNames=allSpeciesMaps$spNames,
                          scenario=scn,
                          envGrid=envGrid,
                          sp.ind=sp.ind,
                        cell.ind=cell.ind,
                          nCellChunks=10, # number of chunks to split the envGrid into
                          mc.cores=mc.cores,
                          overwrite=T)
}

Metrics based only on ranges

In this section we only require that the user provide ranges for each species; auxilary information, such as other environmental layers, traits, or phylogeny is handled in subsequent sections.

Total richness

rich=lapply(allScen,function(scn){
  r=richnessFromCBS(cbsDir=sumDirs$cbsDir,
                    scenario=scn,env=envGrid,
                    mc.cores=mc.cores, outDir=sumDirs$richDir)
  # I like to store all the plots as I go
  fdMapPlot(stack(r),paste0(sumDirs$figDir,'/Richness_',scn,'.pdf'),shp=world.shp2,
            legend.args=list(text='Species Richness'))
  r
}) %>% stack
fdMapPlot(rich,shp=world.shp2,legend.args=list(text='Species Richness'))

Calculation: This is calculated based on column sums of the CBS matrix

Range Size

Range size can be defined in a variety of ways; we define it here as area of occurrence at the native resolution of the SDMS (here ~10km)

sp.ind=readRDS(paste0(sumDirs$sumBaseDir,'/speciesIndexTable.rds'))
ra=lapply(allScen,function(scn){
        rangeArea(cbsDir=sumDirs$cbsDir,scenario=scn,
                  sp.ind=sp.ind,outDir=sumDirs$rangeSizeDir,mc.cores=mc.cores)
})
str(ra)
hist(ra[[1]]$rangeArea)

Calculation: row sums over CBS matrices.

Now let's get in the habit of storing species attributes. Throughout this workflow, we'll repeatedly calculate something about a species, like it's range size, and it's helpful to store it in the speciesIndexTable for later use. Of course this is entirely optional, but just how I like to do it, so that I keep everything about a species in one place. The plan is to read in the speciesIndexTable, join the range areas just calculated to it, and then overwrite the old speciesIndexTable (you might also choose not to rewrite, and store this as a separate attribute table. Whatever.)

newSpInd= sp.ind %>% full_join(ra[[1]],by=c('species','index')) %>%
  rename(rangeAreaPresent=rangeArea) %>% 
  full_join(ra[[2]],by=c('species','index')) %>% 
  rename(rangeArea8580=rangeArea) 
str(newSpInd)

Rarity

Here's an example using a 'species attribute', which is a common application. Here we use species attributes to refer to any property of an individual species. Common operations are to (1) summarize species attributes within a spatial unit, or (2) group species based on different values of an attribute and calculate a summary statistic. As an example of (1) we map species rarity (average value of 1/range area over species within a cell). Here's an example where we add a new place to store outputs, since rarity probably isn't common enough to include as a default.

sumDirs$rarityDir=file.path(sumDirs$rangeSizeDir,'Rarity')
if(!file.exists(sumDirs$rarityDir)) dir.create(sumDirs$rarityDir)

rar=lapply(allScen,function(scn){
    # generate the value of 1/range size for each species in an attribute table
    raritySpAttr=sumDirs$rangeSizeDir %>% 
      list.files(full.names=T,pattern=scn) %>% 
      readRDS %>% 
      mutate(rarity=1/rangeArea) %>% 
      select(-rangeArea)
    r=speciesAttributeByCell(cbsDir=sumDirs$cbsDir,scenario=scn,
                             attrTable=raritySpAttr, method='mean', 
                             env=envGrid, outDir=sumDirs$rarityDir) 
    fdMapPlot(log(r),plotFile=paste0(sumDirs$figDir,'/Rarity_',scn,'.pdf'), 
              shp=world.shp2,legend.args=list(text='log(rarity)',line=2,side=4))
    r
}) %>% stack
fdMapPlot(log(rar),shp=world.shp2,legend.args=list(text='log(rarity)'))

Calculation: Define a 1/range size as a species attribute. Use matrix multiplication of the CBS matrix and species attribute vector.

Other species and cell summaries

At present, we support three main types of operations to build summaries from CBS matrices over taxa or spatial units which can be combined in a variety of ways: (1) row/column sums, (2) collapsing rows/columns, and (3) moments over rows/columns.

The first type, row/column sums have already been demonstrated above. Column sums of the CBS matrices correspond to

The second type, collapsing operations, involves combining related rows or columns of the CBS matrix. When a summary statistic only requires that we know whether a given taxon (e.g., species) occurs in a given spatial unit (e.g., cell), we can make binary sparse matrices that collapse the CBS matrices over cells (rows) or species (columns). For example, if we want to study diversity patterns of genera, we can collapse the columns of the CBS matrices so that each represents a genus (rather than a species). Similarly if one wants to study the richness patterns in different countries, we can collapse the rows of the CBS matrices so that each row represents a country (rather than a cell). We define species attribute tables below that specify how species should be collapsed and cell attribute tables that specify how cells should be collapsed.

The third type, moments over rows/columns, currently supports estimating the mean value (eventually first four moments) of an attribute over rows or columns. For example, this could be the mean attribute of species that occur in the same cell; in fact we just saw this when calculating the cell-level rarity above by taking the mean of 1/range_size over species. Relatedly, as we'll demonstrate below, one might also be interested in the mean values of cells where a species occurs.

Richness by species attributes

Species might have different attributes, like growth form, clade, etc., such that it is useful to split up analyses by group. Here, we'll make a separate richness map for each genus. This corresponds to row sum operations, as described above.

sumDirs$richByGenusDir=file.path(sumDirs$richDir,'byGenus')
if(!file.exists(sumDirs$richByGenusDir)) dir.create(sumDirs$richByGenusDir)

We begin by looking at richness within each genus separately.

sp.ind=readRDS(paste0(sumDirs$sumBaseDir, '/speciesIndexTable.rds'))
genusSpAttr=sp.ind %>% select(species) %>%
  separate(species,c("genus",NA),sep='_')  
sp.ind = sp.ind %>% bind_cols(genusSpAttr)
saveRDS(sp.ind,file=paste0(sumDirs$sumBaseDir,'/speciesIndexTable.rds'))
head(sp.ind) 
richWithinGen=lapply(allScen,function(scn){
    r=richnessFromCBS(cbsDir=sumDirs$cbsDir,scenario=scn,
                      envGrid=envGrid,mc.cores=mc.cores,
                      attrTable=sp.ind,attrName='genus',outDir=sumDirs$richByGenusDir)
    names(r)=paste0(names(r),'_',scn)
    fdMapPlot(r,plotFile=paste0(sumDirs$figDir,'/RichnessByGenus_',scn,'.pdf'), 
              shp=world.shp2, legend.args=list(text='# species',line=2,side=4))
    r
})
fdMapPlot(richWithinGen[[1]],shp=world.shp2,legend.args=list(text='Species Richness')) 
fdMapPlot(richWithinGen[[2]],shp=world.shp2,legend.args=list(text='Species Richness')) 

Next we'll look at the richness of genera - that is, how many genera are in each pixel? This is our first demonstration of collapsing column operations, as described above, because we combine all the columns related to a particular genus together. We'll store a new set of matrices - cell by genus (CBG) matrices that are analogous to the CBS matrices, except that he columns now correspond to genera. Then we can calculate richness of genera using the same richnessFromCBS used above.

# make a directory to put the new cell by genus matrices in
sumDirs$cbgDir=paste0(sumDirs$sparseDir,'/CellByGenus')
if(!file.exists(sumDirs$cbgDir)) dir.create(sumDirs$cbgDir)
# overwrite existing directory settings to save this.
saveRDS(sumDirs,file=paste0(sumDirs$sumBaseDir,'/dirList.rds'))

sp.ind=readRDS(paste0(sumDirs$sumBaseDir, '/speciesIndexTable.rds'))  %>% select(species) %>%
  separate(species,c("genus",NA),sep='_')

# collapse columns of the cbs matrices to make cell by genus matrices
lapply(allScen,function(scn){
  outDirScn=paste0(sumDirs$cbgDir,'/',scn)
  if(!file.exists(outDirScn)) dir.create(outDirScn)
  collapseCols(inDir=sumDirs$cbsDir,outDir=outDirScn,scenario=scn,
               spAttrTable=sp.ind,attrName='genus',
               type='binary',keepChunks=T,mc.cores=mc.cores)
})

# make genus index table (analogous to species index table) so we know which CBG columns correspond  to which species
gen.ind=data.frame(genus=unique(sp.ind[,'genus']) %>% na.omit)
gen.ind$index=1:nrow(gen.ind)
saveRDS(gen.ind,file=paste0(sumDirs$sumBaseDir,'/generaIndexTable.rds'))

# calculate richness (row sums) from cell by genus matrices 
richGen=lapply(allScen,function(scn){
    r=richnessFromCBS(cbsDir=sumDirs$cbgDir,scenario=scn,
                      env=envGrid,mc.cores=mc.cores,outDir=sumDirs$richByGenusDir)
    names(r)=paste0(names(r),'_',scn)
    fdMapPlot(r,plotFile=paste0(sumDirs$figDir,'/RichnessByGenus_',scn,'.pdf'), 
              shp=world.shp2, legend.args=list(text='# species',line=2,side=4))
    r
}) %>% stack
fdMapPlot(richGen[[1]],shp=world.shp2,legend.args=list(text='Genus Richness'))
fdMapPlot(richGen[[2]],shp=world.shp2,legend.args=list(text='Genus Richness'))

Metrics depending on cell attributes

In contrast to species attributes, cells can also have attributes and similarly it is common to (1) summarize cell attributes over a species range, or (2) group cells based on different values of an attribute and calculate a summary statistic of the species it contains. A wide variety of analyses can be written in terms of cell attributes: here we classify cells by which ecoregion they occur in. Other possibilities include whether cells are protected, the level of disturbance they have experienced, the level of human impact they have experienced, etc. The key similarity is that each classify every cell into one of a collection of categories.

eco=raster(system.file('extdata/treeDemo/GlobalEcoregions.tif',package='changeRangeR'))
# mask to australia where there are 28 ecoregions (the masking is slow, so we just read in a stored result from the commented code)
  # you may need to reproject rasters so that they align with your envGrid
eco2=projectRaster(eco,envGrid,method='ngb')
aus=getData('GADM', country='AUS', level=0)
aa=sp::spTransform(aus,projection(eco2))
ecoAus=raster::mask(eco,aa)
ecoAus2=projectRaster(ecoAus,envGrid,method='ngb')
writeRaster(ecoAus2,paste0(sumDirs$miscDir,'/aus.tif'),overwrite=T)

The masking is slow, so we just read in a stored result.

ecoAus2=raster(system.file('extdata/treeDemo/Misc/aus.tif',package='changeRangeR'))         

As an example of summarize cell attributes over a species range (1) we find the proportion of each species range in each ecoregion.

sp.ind=readRDS(paste0(sumDirs$sumBaseDir, '/speciesIndexTable.rds'))
# check a few species that occur in australia in this demo
m=mapSpecies(cbsDir=sumDirs$cbsDir,26:29,scenario='present',sp.ind,cell.ind,envGrid)
m # a stack  with each species map as a layer
# TEMP: for some reason the package doesn't see the correct function
#source('/Users/ctg/Dropbox/Projects/Wallace/changeRangeR/R/fdSpecies.r')
# Step 1: get the total number of cells in each species range in each ecoregion

#the problem is the ecoAUS2 doesn't have the same dimensions as envGrid so the cellIDs don't match up
rar=rangeAreaCategoricalRaster(someRaster=ecoAus2,cbsDir=sumDirs$cbsDir,scenario='present',
                               sp.ind=sp.ind,cell.ind=cell.ind,mc.cores=mc.cores)
# Step 2: divide these counts by the total range size to get the proportion of each species range in each ecoregion.  join the data frames and calculate proportions
rpbc=rar %>% mutate_at(vars(X53:X783), funs(round(./rangeAreaPresent,2)))

head(rpbc[27:31,])

-->


Assemblage/Community metrics

Some species attributes depend on which species co-occur with them.

Find unique communities

uc=lapply(allScen, function(scn){
    makeUniqueCommunities(cbsDir=sumDirs$cbsDir, scenario=scn, mc.cores = mc.cores,overwrite=T)
})

Calculation: find unique rows of CBS matrices.

Phylogenetic Diversity

Phylogenetic diversity is calculated using the PD index (Faith, 1992) and implemented through the function pd.query in the PhyloMeasures package (Tsirogiannis, 2015). The phylogenetic tree was pruned to the species selected from a complete plant phylogeny currently available at Smith et al. (2018).

library(PhyloMeasures)
library(ape)
library(tictoc)
phyloTree=read.tree(system.file('extdata/treeDemo/Misc/selecPhyTree.tree',
                    package='changeRangeR'))
sp.ind=readRDS(paste0(sumDirs$sumBaseDir,'/speciesIndexTable.rds'))

#CM: hey pep - why am i getting this warning? 
#  Warning: the input matrix has fewer columns than the number of species in the tree. 

pd=lapply(allScen, function(scn){
  pd1=metricFromCBS(cbsDir=sumDirs$cbsDir,scenario = scn,env = envGrid ,
                    spIndTable=sp.ind, mc.cores = mc.cores,
                    #specify function, here `phyloDiv`; 1st argument must be the CBS matrix
                    FUN = phyloDiv,outputFUNnames = 'PD',
                    #specify other arguments to pass to FUN
                    fullMatch=F,tree=phyloTree)
  writeRaster(pd1,file=paste0(sumDirs$phyloDivDir,'/PD_',scn,'.tif'),overwrite=T)
  pd1
})

pd = raster::stack(pd)
names(pd) = allScen
plot(pd)

specify function, here phyloDiv; 1st argument must be the cellBySp matrix

Functional Diversity

Functional diversity is applied using the same function as the Phylogenetic diversity, but using a dendogram tree based upon imputed values of tree functional traits using BHPMF technique (Schrodt et al. 2015). The functional tree dendogram was finally built with the stats::hclust function from a principal component analysis based on TRY and BIEN databases (see further details in Guo et al. 2020 preprint)

sp.ind=readRDS(paste0(sumDirs$sumBaseDir,'/speciesIndexTable.rds'))
traitTable=readRDS(system.file('/extdata/treeDemo/speciesAttributes.rds',
                   package='changeRangeR'))
#create a tree of traits based on trait distances (euclidean)
#the fucntion is set up to write the trees and set up rules for the PCA (e.g. how many PC you retain, etc.)
#with writeOut=T it automatically creates the funcDivFolder and writes the "functional tree" there.
trait.phy=buildFuncDivTree(sumDirs = sumDirs,
                                                     speciesAttributeTable =traitTable,
                                                     colFuncTraits = 2:9)

fd=lapply(allScen, function(scn){
  fd1=metricFromCBS(cbsDir=sumDirs$cbsDir,scenario=scn,env=envGrid,
                    spIndTable=sp.ind,
                    mc.cores = mc.cores,
                    #specify function, here `phyloDiv`; 1st argument must be the CBS matrix
                    FUN = phyloDiv,outputFUNnames = 'FD',
                    #specify other arguments to pass to FUN
                    fullMatch=F,tree=trait.phy)
})

fd=raster::stack (fd)
names(fd) =  allScen
plot(fd)

Changes between scenarios


Masking

spMaskDir=system.file("extdata/treeDemo/Migrationmask/120km/",package='changeRangeR')
allSpeciesMasks=tibble(rasterFiles=list.files(spMaskDir, recursive=T, full.names=T)) %>%
  mutate(sp.names= rasterFiles %>% basename %>% file_path_sans_ext)
maskCBSscenario(sumDirs = sumDirs,
                sp.ind = sp.ind,
                allSpeciesMasks = allSpeciesMasks,
                scenario = 'present',
                maskName = 'dispTry',
                mc.cores = 6)

Some additional utilites

If you want to use the results from a different computer based on the sumDirs stored as '.../sumDirs.rds', you'll need to convert the baseDir to reflect the computer you're working on. Below we convert to my local paths, and then back again.

sumDirs=convertSummaryBaseDir(sumDirs, newBaseDir='someDir')
sumDirs=convertSummaryBaseDir(sumDirs, summaryBaseDir) # convert back
m=mapSpecies(cbsDir=sumDirs$cbsDir,27:30,scenario='present',sp.ind,cell.ind,envGrid)
m # a stack  with each species map as a layer

References



andrepazv/changeRangeR documentation built on May 7, 2022, 3:38 p.m.