knitr::opts_chunk$set(
  #collapse = TRUE,
  #comment = "#>",
  message = F
)

Setup database

Using PostgreSQL with PostGIS extension in Docker:

create volume pg_data

docker volume create pg_data

create container postgis, database am user admin

On command line:

# set password as environment variable read in from file
DB_PASS=`cat ~/private/pg_pass`

# run docker with image
docker run --name=postgis \
  -d -e POSTGRES_USER=admin \
  -e POSTGRES_PASS=$DB_PASS \
  -e POSTGRES_DBNAME=am \
  -e ALLOW_IP_RANGE=0.0.0.0/0 \
  -p 5432:5432 \
  -v pg_data:/var/lib/postgresql \
  -v /Users/bbest/docker_pg_data:/data \
  --restart=unless-stopped \
  --shm-size=1g \
  kartoza/postgis:11.0-2.5

# list all containers
docker containers ls -a

# stop
docker container stop postgis

# start
docker container start postgis

# check logs
docker logs postgis

Connect to database

library(tidyverse)
library(here)
library(glue)
library(raster)
library(DBI)
library(DT)
library(RPostgreSQL)
library(tictoc)
library(furrr)
library(RColorBrewer)
library(zeallot)

library(devtools); load_all() # library(gmbi)

# paths
dir_tmp      <- here("inst/tmp")
dir_out      <- here("inst/data/rasters")
spp_iucn_csv <- here("inst/data/spp_iucn.csv")
spp_csv      <- here("inst/data/spp.csv")
dir_nc       <- "~/docker_pg_data/aquamaps2100_nc"
spp_2100_csv <- "~/docker_pg_data/aquamaps2100_nc.csv"
spp_bbnj_csv      <- here("inst/data/spp_bbnj.csv")
spp_bbnj_grp_csv  <- here("inst/data/spp_bbnj_grouped.csv")
spp_bbnj_grps_csv <- here("inst/data/spp_bbnj_groups.csv")

# plotting parameters
pal <- colorRampPalette(brewer.pal(11, "Spectral"))
cols <- rev(pal(255))
par(mar=c(0.5,0.5,0.5,0.5))

# helper functions ----
tbl_cols <- function(con, tbl){
  tbl(con, tbl) %>% head(1) %>% collect() %>% names()
}

# conntect to database
con <- get_db_con(password=readLines("~/private/pg_pass"))
con

#Error: in postgresqlCloseConnection(conn, ...) : 
#  connection has pending rows (close open results set first)
#Fix: dbClearResult(dbListResults(con)[[1]])

Load AquaMaps csvs into database

All the species distribution data was generously provided as comma-seperated value (csv) files in a zip package aquamaps_ver0816c.zip by Kristin Kaschner and Cristina Garilao to Ben Best ben@ecoquants.com on Jun 21, 2018 from the extensive work available at http://AquaMaps.org to be fully cited [@kaschnerAquaMapsPredictedRange2016] whenever used.

Generated here are derived products. Please contact Kristin Kaschner kristin.kaschner@biologie.uni-freiburg.de and Cristina Garilao cgarilao@geomar.de for access to the full database.

csv_to_db(con)

# drop odd columns
dbExecute(con, 'ALTER TABLE spp DROP COLUMN "X11", DROP COLUMN "X12";')
spp <- tbl(con, "spp") %>% 
  collect() %>% 
  mutate(
    genus_species = glue("{Genus} {Species}") %>% as.character())

copy_to(
  con, spp, "spp", temporary=F, overwrite=T,
  unique_indexes=list("SPECIESID"),
  indexes = list("group", "genus_species"))

Load tables for querying.

spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
env_cells <- tbl(con, "env_cells")
spp_cells <- tbl(con, "spp_cells")
obs_cells <- tbl(con, "obs_cells")

Assign taxonomic groups

Assign taxonomic groups borrowing from previous global OBIS analysis [@tittensorGlobalPatternsPredictors2010].

spp_cols <- tbl(con, "spp") %>% head(1) %>% collect() %>% names()

# group = NA: 1176
# library(tidyverse)
# read_csv("~/Gdrive Ecoquants/projects/High Seas MPAs/bbnj/data/raw/AquaMaps/spp.csv") %>% 
#   filter(is.na(group)) %>% 
#   nrow()
if (!"groups01" %in% spp_cols){

  spp <- tbl(con, "spp") %>% 
    mutate(
      groups01 = case_when(
        Class == "Actinopterygii" & !Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ "Bony fishes",
        #Class == "Cephalopoda" & !Order %in% c("Myopsida","Oegopsida","Teuthida") ~ "Cephalopods",
        #Order %in% c("Myopsida","Oegopsida","Teuthida") ~ "Squids",
        Class == "Cephalopoda" ~ "Cephalopods",
        Family %in% c("Odobenidae", "Otariidae", "Phocidae") ~ "Pinnipeds",
        Class == "Anthozoa" ~ "Corals",
        Order == "Alismatales" ~ "Seagrasses",
        Genus == "Avicennia" & Species == "germinans" ~ "Mangroves",
        Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ "Tunas & billfishes",
        Order == "Cetacea" ~ "Cetaceans",
        Order == "Euphausiacea" ~ "Euphausiids",
        Class %in% c("Malacostraca","Ostracoda","Branchiopoda","Cephalocarida","Maxillopoda") ~ "Crustaceans",
        Class == "Pycnogonida" ~ "Sea spiders",
        Class == "Gastropoda" ~ "Gastropods",
        Class == "Bivalvia" ~ "Bivalves",
        Class == "Polyplacophora" ~ "Chitons",
        Class %in% c("Ascidiacea","Thaliacea","Appendicularia") ~ "Tunicates",
        Class == "Elasmobranchii" ~ "Sharks & rays",
        Class == "Reptilia"~ "Reptiles",
        Phylum == "Echinodermata"~ "Echinoderms",
        Phylum == "Porifera" ~ "Sponges",
        Phylum %in% c("Annelida", "Sipuncula") ~ "Worms",
        Class == "Hydrozoa" ~ "Hydrozoans",
        Phylum == "Foraminifera" ~ "Forams",
        Class %in% c("Cubozoa","Hydrozoa","Scyphozoa","Staurozoa") ~ "Jellyfish",
        TRUE ~ "Other")) %>% 
    collect()

  copy_to(
    con, spp, "spp", temporary=F, overwrite=T,
    unique_indexes=list("SPECIESID"),
    indexes = list("group", "groups01"))
}
tbl(con, "spp") %>% 
  collect() %>% 
  pull(groups01) %>% 
  table(useNA="always")
if (!"groups04" %in% spp_cols){

  spp <- tbl(con, "spp") %>% 
    mutate(
      groups04 = case_when(
        Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ 
          "Tunas & billfishes",
        Class == "Actinopterygii" & 
          !Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ 
          "Bony fishes",
        Class == "Cephalopoda" ~ 
          "Cephalopods",
        Class == "Mammalia"    ~ 
          "Marine Mammals",
        Class == "Anthozoa"    ~ 
          "Corals",
        Order == "Alismatales" ~ 
          "Seagrasses",
        Phylum %in% c("Mollusca", "Arthropoda", "Echinodermata", "Porifera","Annelida", "Sipuncula","Cnidaria") & 
          !Family %in% c("Cephalopoda") & 
          !Class %in% c("Anthozoa") | 
          Class %in% c("Ascidiacea","Thaliacea","Appendicularia") ~ 
          "Invertebrates",
        Class == "Elasmobranchii" ~ 
          "Sharks & rays",
        TRUE ~ "Other")) %>% 
    collect()

  copy_to(
    con, spp, "spp", temporary=F, overwrite=T,
    unique_indexes=list("SPECIESID"),
    indexes = list("group", "groups01","groups04"))
}
tbl(con, "spp") %>% 
  collect() %>% 
  pull(groups04) %>% 
  table(useNA="always")

Determine percent in high seas for each species

Based on 0.5 probability threshold.

spp_prob_threshold <- 0.5

# in_bbnj: cells in high seasm, from BBNJ package ----
r_pu_id <- raster(here("../bbnj/inst/data/pu_id.tif")) # mapview::mapview(r_pu_id)
cells_bbnj <- tibble(
  cellid = raster::values(r_pu_id)) %>% 
  filter(!is.na(cellid)) %>% 
  mutate(in_bbnj = T)

cells2 <- tbl(con, "cells") %>% 
  select(-OceanArea) %>% 
  # join with ocean area per cell
  left_join(
    tbl(con, "env_cells"),
    by = c("CenterLat","CenterLong")) %>% 
  select(cellid, CenterLat, CenterLong, OceanArea) %>% 
  collect() %>% 
  # join with in_bbnj
  left_join(
    cells_bbnj,
    by = "cellid") %>% 
  mutate(
    in_bbnj = replace_na(in_bbnj, F),
    cellid = as.integer(cellid))

# plot OceanArea
hist(cells2$OceanArea)
r <- setValues(r_pu_id, NA)
r[cells2$cellid] <- cells2$OceanArea
mapview::mapview(r)

# plot in_bbnj
table(cells2$in_bbnj, useNA="ifany")
r <- setValues(r_pu_id, NA)
r[cells2$cellid] <- cells2$in_bbnj
mapview::mapview(r, method="ngb")

# update cells table
copy_to(
  con, cells2, "cells", temporary=F, overwrite=T,
  unique_indexes=list("cellid"),
  indexes = list("CenterLat", "CenterLong", "in_bbnj", "OceanArea"))

# area_ocean & pct_bbnj -----

# set spp_cells.cellid
r <- DBI::dbSendStatement(con, 
  'UPDATE spp_cells AS sc
  SET cellid = c.cellid
  FROM cells AS c
  WHERE 
    sc."CenterLat"  = c."CenterLat" AND 
    sc."CenterLong" = c."CenterLong";')

pct_brks <- c(0,0.001,0.25,1)

spp_sum <- tbl(con, "spp_cells") %>% 
  filter(probability >= spp_prob_threshold) %>% 
  left_join(
    tbl(con, "cells"), by="cellid") %>% 
  select(SpeciesID, cellid, OceanArea, in_bbnj) %>% 
  group_by(SpeciesID) %>% 
  summarize(
    n_cells        = n(),
    area_ocean_km2 = sum(OceanArea, na.rm=T),
    area_bbnj_km2  = sum(OceanArea * as.integer(in_bbnj), na.rm=T)) %>% 
  mutate(
    area_bbnj_pct = area_bbnj_km2 / area_ocean_km2) %>% 
  collect() %>% 
  mutate(
    area_bbnj_pct_cat = cut(
      area_bbnj_pct, pct_brks, include.lowest = T)) # 2 min
table(spp_sum$area_bbnj_pct_cat, useNA = "ifany")
# [0,0.001] (0.001,0.25]     (0.25,1] 
#     13217         9008         2673
#     
table(spp_sum$area_bbnj_pct_cat, useNA = "ifany")/nrow(spp_sum)
# [0,0.001] (0.001,0.25]     (0.25,1] 
# 0.5308459    0.3617961    0.1073580

# copy_to(
#   con, spp_sum, "spp", temporary=F, overwrite=T,
#   unique_indexes=list("SPECIESID"),
#   indexes = list(
#     "groups00", 
#     "groups01", "groups02", "groups03"))
copy_to(
  con, spp_sum, "spp_sum", temporary=F, overwrite=T,
  unique_indexes=list("SpeciesID"))

spp2 <- tbl(con, "spp") %>% 
  collect() %>% 
  #paste(intersect(dbListFields(con, "spp"), names(spp_sum)), collapse=", -")
  select(-n_cells, -area_ocean_km2, -area_bbnj_km2, -area_bbnj_pct, -area_bbnj_pct_cat) %>% 
  left_join(
    spp_sum, by=c("SPECIESID"="SpeciesID"))

copy_to(
  con, spp2, "spp", temporary=F, overwrite=T,
  unique_indexes=list("SPECIESID"),
  indexes = c("group", sprintf("groups%02d", 0:6)) %>% as.list())

# spp_sum <- spp_sum %>% 
#   mutate(
#     area_bbnj_pct_cat = replace_na(
#       area_bbnj_pct_cat, 
#       levels(spp_sum$area_bbnj_pct_cat)[1]))

spp_bbnj <- tbl(con, "spp") %>% 
  collect() %>% 
  mutate(
    # groups01: in high seas (groups02), 25% in high seas (groups03)
    groups02 = case_when(
      area_bbnj_km2 > 0 ~ groups01),
    groups03 = case_when(
      area_bbnj_pct >= 0.25 ~ groups01),
    # groups04: in high seas (groups05), 25% in high seas (groups06)
    groups05 = case_when(
      area_bbnj_km2 > 0 ~ groups04),
    groups06 = case_when(
      area_bbnj_pct >= 0.25 ~ groups04))
write_csv(spp_bbnj, spp_bbnj_csv)

copy_to(
  con, spp_bbnj, "spp", temporary=F, overwrite=T,
  unique_indexes=list("SPECIESID"),
  indexes = c("group", sprintf("groups%02d", 0:6)) %>% as.list())

# TODO: update below to summarize for all groups##
#setdiff(names(spp_bbnj), tbl_cols(con, "spp"))
# spp_bbnj_grp <- spp_bbnj %>% 
#   mutate(
#     area_bbnj_pct_cat = fct_explicit_na(
#       area_bbnj_pct_cat, na_level = "(NA)")) %>% 
#   group_by(groups01, area_bbnj_pct_cat) %>% 
#   summarize(
#     nspp = n()) %>% 
#   spread(area_bbnj_pct_cat, nspp)
# write_csv(spp_bbnj_grp, spp_bbnj_grp_csv, na = "")
# 
# spp_bbnj_grps <- count(spp_bbnj, groups01, name="groups01_all") %>% 
#   rename(group=groups01) %>% 
#   left_join(
#     count(spp_bbnj, groups02, name="groups02_hs"),
#     by = c("group"="groups02")) %>% 
#   left_join(
#     count(spp_bbnj, groups03, name="groups03_hs25"),
#     by = c("group"="groups03")) %>% 
#   mutate(
#     groups02_pct = groups02_hs / groups01_all,
#     groups03_pct = groups03_hs25 / groups01_all)
# write_csv(spp_bbnj_grps, spp_bbnj_grps_csv, na = "")
read_csv(spp_bbnj_grps_csv, na = "") %>%
  write_csv(spp_bbnj_grps_csv, na = "")
  datatable()

Get extinction risk from IUCN

RedList token issued to ben@ecoquants.com via http://apiv3.iucnredlist.org/api/v3/token.

if (!file.exists(spp_iucn_csv)){

  # speed up get_iucn() by running 
  # as multi-threaded process w/ furrr::future_map()

  plan(multiprocess)

  # get private IUCN RedList token key
  rl_token <- readLines("~/private/iucn_api_key")
  spp <- tbl(con, "spp") %>% collect()

  # iterate in chunks of 1000
  n <- 1000
  for ( i in seq(1, nrow(spp), by=n) ){
    i_beg <- i
    i_end <- i - 1 + n
    csv <- glue("{dir_tmp}/spp_iucn_{str_pad(i_beg, 5, 'left', '0')}-{str_pad(i_end, 5, 'left', '0')}.csv")
    cat(glue("{basename(csv)} - {Sys.time()}\n\n"))

    spp_i <- slice(spp, i_beg:i_end)
    tic()  
    spp_i$iucn <- future_map(spp_i$genus_species, get_iucn, key=rl_token)
    toc() # 262.211 sec; 262.211 / 1000 * 24904 / 60 = 108.8 min for all rows

    spp_i <- spp_i %>% 
      mutate(
        iucn_category         = map_chr(iucn, "category"),
        iucn_criteria         = map_chr(iucn, "criteria"),
        iucn_population_trend = map_chr(iucn, "population_trend"),
        iucn_published_year   = map_chr(iucn, "published_year")) %>% 
      select(SPECIESID, starts_with("iucn_"))

    write_csv(spp_i, csv)
  }

  # consolidate csvs
  csvs <- list.files(dir_tmp, "^spp_iucn_.*", full.names = T)
  df <- map(csvs, function(x) read_csv(x)) %>% bind_rows()
  write_csv(df, csv)
  unlink(dir_tmp, recursive = T)
}
iucn <- read_csv(spp_iucn_csv)

iucn_cols <- iucn %>% select(starts_with("iucn_")) %>% names()
spp_cols <- tbl(con, "spp") %>% head(1) %>% collect() %>% names()
if (!all(iucn_cols %in% spp_cols)){
  # write to db
  spp <- tbl(con, "spp") %>% 
    collect() %>% 
    left_join(df, by="SPECIESID")
  copy_to(
    con, spp, "spp", temporary=F, overwrite=T,
    unique_indexes=list("SPECIESID"),
    indexes = list("group", "genus_species", "iucn_category"))
}

spp_iucn_category <- tbl(con, "spp") %>% 
  pull(iucn_category)
table(spp_iucn_category, useNA="always")

Calculate species richness

Using AquaMaps probability ≥ 0.5 [@kleinShortfallsGlobalProtected2015; @oharaAligningMarineSpecies2017] to establish presence.

Calculate nspp, all taxa

spp_prob_threshold <- 0.5

spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells")

tif <- (glue("{dir_out}/nspp_all.tif"))

if (!file.exists(tif)){

  # get species richness, with default col_cellid="cellid"
  cells_nspp <- spp_cells %>% 
    filter(
      probability >= spp_prob_threshold) %>% 
    left_join(cells, by=c("CenterLat","CenterLong")) %>% 
    group_by(cellid) %>% 
    summarize(
      nspp = n()) %>% 
    collect()
  r_nspp_all <- df_to_raster(cells_nspp, "nspp", tif)
}

if (!file.exists(tif)){
  # OR get species richness, with cols_lonlat=c("CenterLat","CenterLong")
  cells_nspp <- spp_cells %>% 
    filter(
      probability >= spp_prob_threshold) %>% 
    group_by(CenterLat, CenterLong) %>% 
    summarize(
      nspp = n()) %>% 
    collect()
  r_nspp_all <- df_to_raster(
    cells_nspp, "nspp", tif, 
    col_cellid=NULL, cols_lonlat=c("CenterLat","CenterLong"))
}
r_nspp_all <- raster(tif)

plot(r_nspp_all, col = cols, main="Species Richness, All")

Calculate nspp, by taxonomic group

redo      <- F
spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells")

spp_groups <- tbl(con, "spp") %>% 
  arrange(group) %>% 
  distinct(group) %>% 
  pull(group)

for (i in seq_along(spp_groups)){
  group <- spp_groups[i]
  grp   <- group_to_grp(group)
  tif   <- (glue("{dir_out}/nspp_{grp}.tif"))

  if (file.exists(tif) & !redo) next()

  cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n"))

  # get species diversity, with default col_cellid="cellid"
  if (is.na(group)){
    spp_grp <- filter(spp, is.na(group))
  } else{
    spp_grp <- filter(spp, group == !!group)
  }

  cells_nspp <- spp_grp %>% 
    select(SPECIESID) %>% 
    left_join(
      spp_cells %>% 
        select(
          SpeciesID, probability,
          CenterLat, CenterLong) %>% 
        filter(
          probability >= spp_prob_threshold), 
      by=c("SPECIESID"="SpeciesID")) %>% 
    left_join(
      cells, 
      by=c("CenterLat","CenterLong")) %>% 
    group_by(cellid) %>% 
    summarize(
      nspp = n()) %>% 
    collect()
  r_grp <- df_to_raster(cells_nspp, "nspp", tif)
  plot(r_grp, col = cols, main=glue("nspp {group}"))
}

# get all tifs, except first one "nspp_all.tif"
tifs <- list.files(dir_out, "^nspp_.*.tif$", full.names = T)[-1]
s_nspp <- stack(tifs)
names(s_nspp) <- str_replace(names(s_nspp), "nspp_", "")
plot(s_nspp, col = cols)

Calculate extinction risk

The Red List Index ([@butchartImprovementsRedList2007] et al. 2007) is summarized [@juslenApplicationRedList2016] as:

The RLI [Red List Index] value was calculated by multiplying the number of taxa in each red-list category by the category weight (0 for LC, 1 for NT, 2 for VU, 3 for EN, 4 for CR, 5 for RE/EX). These products were summed and then divided by the number of taxa multiplied by the maximum weight 5 (“maximum possible denominator”). To obtain the RLI value, this sum is subtracted from 1. The index value varies between 0 and 1 (Butchart et al. 2007). The lower the value, the closer the set of taxa is heading towards extinction. If the value is 0 all the taxa are (regionally) extinct. If the value is 1 all the taxa are assessed as Least Concern.

We will use simply the numerator, the Red List Sum (RLS), of the Red List Index (RLI) to quantify the "endangeredness" of a cell without dilution from being in a species-rich place as the RLI does when averaging the extinction risk for all assessed species.

$$RLS = \sum_{i=1}^{n_{spp}} w_i$$ $$RLI = 1 - \frac{RLS}{n_{spp} * max(w)}$$

Weights used for Red List Sum (RLS) indicator:

Note that only 1 species is listed as extinct (EX) or extinct in the wild (EW). Since the species is no longer considered present, protection is assumed to not afford any conservation value.

Set iucn_weight to species

iucn_category_weights = c(
  DD=NA, 
  LC=0, `LR/lc`=0, 
  NT=1, `LR/cd`=1, `LR/nt`=1, 
  VU=2, 
  EN=3, 
  CR=4, 
  EW=NA,
  EX=NA)

spp_cols <- tbl(con, "spp") %>% head(1) %>% collect() %>% names()

if (!"iucn_weight" %in% spp_cols){

  spp <- tbl(con, "spp") %>% 
    collect() %>% 
    mutate(
      iucn_weight = recode(iucn_category, !!!iucn_category_weights))

  copy_to(
    con, spp, "spp", temporary=F, overwrite=T,
    unique_indexes=list("SPECIESID"),
    indexes = list("group", "genus_species", "iucn_weight"))
}

# save spp_csv
if (!file.exists(spp_csv)){
  tbl(con, "spp") %>% 
    collect() %>% 
    write_csv(spp_csv)
}

spp_iucn_weight <- tbl(con, "spp") %>% 
  pull(iucn_weight)
table(spp_iucn_weight, useNA="always")

Calculate rls, all taxa

spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells")

tif <- (glue("{dir_out}/rls_all.tif"))

if (!file.exists(tif)){

  # calculate RedList sum of weights
  cells_rls <- spp %>% 
    filter(
      !is.na(iucn_weight), 
      iucn_weight != 0) %>% 
    left_join(
      spp_cells %>% 
        select(
          SpeciesID, probability,
          CenterLat, CenterLong) %>% 
        filter(
          probability >= spp_prob_threshold), 
      by=c("SPECIESID"="SpeciesID")) %>% 
    left_join(
      cells, by=c("CenterLat","CenterLong")) %>% 
    group_by(cellid) %>% 
    summarize(
      rls = sum(iucn_weight)) %>% 
    collect()
  r_rls_all <- df_to_raster(cells_rls, "rls", tif)
}

r_rls_all <- raster(tif)
plot(r_rls_all, col = cols, main="Red List Sum, All")

Calculate rls, by taxonomic group

redo      <- F
spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells")

spp_groups <- tbl(con, "spp") %>% 
  arrange(group) %>% 
  distinct(group) %>% 
  pull(group)

for (i in seq_along(spp_groups)){
  group <- spp_groups[i]
  grp   <- group_to_grp(group)
  tif   <- (glue("{dir_out}/rls_{grp}.tif"))

  if (file.exists(tif) & !redo) next()

  cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n"))

  # filter by group
  if (is.na(group)){
    spp_grp <- filter(spp, is.na(group))
  } else{
    spp_grp <- filter(spp, group == !!group)
  }

  spp_grp_w <- spp_grp %>% 
    filter(
      !is.na(iucn_weight), 
      iucn_weight != 0) %>% 
    collect()

  if (nrow(spp_grp_w) == 0){
    #cat(glue("  all iucn_weight == NA\n", .trim=F))
  } else {
    # calculate RedList sum of weights
    cells_rls <- spp_grp %>% 
      filter(
        !is.na(iucn_weight), 
        iucn_weight != 0) %>% 
      left_join(
        spp_cells %>% 
          select(
            SpeciesID, probability,
            CenterLat, CenterLong) %>% 
          filter(
            probability >= spp_prob_threshold), 
        by=c("SPECIESID"="SpeciesID")) %>% 
      left_join(
        cells, by=c("CenterLat","CenterLong")) %>% 
      group_by(cellid) %>% 
      summarize(
        rls = sum(iucn_weight)) %>% 
      collect()

    r_grp <- df_to_raster(cells_rls, "rls", tif) 
    plot(r_grp, col = cols, main=glue("Red List Sum, {group}"))
  }
}

# get all tifs, except first one "nspp_all.tif"
tifs <- list.files(dir_out, "^rls_.*.tif$", full.names = T)[-1]
s_rls <- stack(tifs)
names(s_rls) <- str_replace(names(s_rls), "rls_", "")
plot(s_rls, col = cols)

Climate Change Forecasts for 2100

The data from forecasting climate change effects on marine species distributions in 2050 using the AquaMaps model [@coroAutomaticClassificationClimate2016] were made publicly available through the server http://thredds.d4science.org and linked from AquaMaps species profiles.

TODO: resolve MISMATCH: - [@coroAutomaticClassificationClimate2016]: 2050 - Aquamaps.org: 2100

To investigate accessing these data layers via iMarine, I logged into https://i-marine.d4science.org/group/biodiversitylab and see this entry:

Gianpaolo Coro Dear VRE Members,the AquaMaps Web site is now officially linked to D4Science! All automatically generated maps have links to download content in NetCDF format through D4Science and to interactively inspect the maps trough Web applications hosted by the e-Infrastructure. A total of 24,935 AquaMaps NetCDF files were produced for both today and 2100 through the DataMiner "Csv To NetCDF" conversion algorithms. Please, click on the link below to see an example for yellowback anthias and follow the "NetCDF" and the "view in Godiva" links on the page.

https://www.aquamaps.org/preMap2.php?cache=1&SpecID=Fis-10199

Following this link leads to:

http://thredds.d4science.org/thredds/fileServer/public/netcdf/AquaMaps_08_2016/Pseudanthias_evansi.nc

library(raster)
library(glue)
library(here)

get_am2100nc <- function(sp, nc){
  # sp <- "Acanthopagrus_berda"
  # nc <- file.path(dir_nc, basename(url)) # unlink(nc)

  # get netcdf file
  url <- glue("http://thredds.d4science.org/thredds/fileServer/public/netcdf/AquaMaps_08_2016/{sp}.nc")

  if (file.exists(nc)) return (glue("{sp}: already exists"))
  tryCatch(
    download.file(url, nc, mode = "wb", quiet = T),
    error=function(e) e)
  return (glue("{sp}: {ifelse(file.exists(nc), 'downloaded', 'missing')}"))
    #spp_2100 <- spp_2100 %>% 
    #  mutate(
    #    downloaded = ifelse(sp_str == sp, file.exists(nc), downloaded))
    # head(spp_2100)
  #sp_downloaded <- filter(spp_2100, sp_str == sp) %>% pull(downloaded)
  #return(glue("{sp}: {sp_downloaded}"))
}

if (!file.exists(spp_2100_csv)){
  # get all species
  spp_2100 <- spp %>% 
    select(SPECIESID, Genus, Species) %>% 
    collect() %>% 
    mutate(
      sp_str     = glue("{Genus}_{Species}"),
      path_nc    = glue("{dir_nc}/{sp_str}.nc"),
      downloaded = NA) %>% 
    arrange(sp_str) # n= 24,894

  plan(multiprocess)

  # iterate in chunks of 1000
  n <- 1000
  for ( i in seq(1, nrow(spp_2100), by=n) ){ # i = 1

    i_beg <- i
    i_end <- i - 1 + n
    csv <- glue("{dirname(dir_nc)}/spp_2100_{str_pad(i_beg, 5, 'left', '0')}-{str_pad(i_end, 5, 'left', '0')}.csv")
    cat(glue("{basename(csv)} - {Sys.time()}\n\n"))

    # speed up fetching by running in parallel
    future_map2_chr(
      spp_2100$sp_str[i_beg:i_end], 
      spp_2100$path_nc[i_beg:i_end],
      get_am2100nc)

    # record species searched and not found
    spp_i <- slice(spp_2100, i_beg:i_end) %>%  
      mutate(
        downloaded = file.exists(path_nc)) # View(spp_i)
    write_csv(spp_i, csv)
  }

  # combine temporary csvs
  csvs <- list.files(dirname(dir_nc), "*\\.csv", full.names = T)
  spp_df <- map_df(csvs, read_csv) %>% dplyr::bind_rows() %>% 
    filter(!duplicated(sp_str))
  write_csv(spp_df, spp_2100_csv)
  unlink(csvs)
}

# quickfix
# spp_df2 <- spp_df %>% 
#   left_join(
#     spp_2100 %>% 
#       select(sp_str, SPECIESID),
#     by="sp_str") %>% 
#   select(SPECIESID, Genus, Species, sp_str, path_nc, downloaded)
# write_csv(spp_df2, spp_2100_csv)


if (!"spp_cells_2100" %in% dbListTables(con)){

  dbExecute(con, 'CREATE TABLE spp_cells_2100 (
  speciesid text,
  cellid int4,
  probability float8);
  CREATE UNIQUE INDEX idx_spp_cells_2100_unique ON spp_cells_2100 (speciesid, cellid);')

  r_na   <- raster_na()
  spp_df <- read_csv(spp_2100_csv) %>% 
    filter(downloaded) # 24894 - 24831 = 63 unavailable
  # of 24,894 spp: 63 unavailable, 20 mismatch and 5 unreadable

  #for (i in 1:nrow(spp_df)){ # i = 1087
  for (i in 18009:nrow(spp_df)){ # i = 18009
    if (i %% 1000 == 0) cat(glue("{sprintf('%06d',i)} - {Sys.time()}\n\n"))

    nc    <- spp_df$path_nc[i]
    sp_id <- spp_df$SPECIESID[i]

    #r <- raster(nc, varname="probability")
    r <- tryCatch(raster(nc, varname="probability"),
      error = function(e) cat(glue(
        "  can't read {sprintf('%06d',i)} {sp_id} {basename(nc)} - {Sys.time()}\n\n")))
    if (is.null(r)) next

    tryCatch(
      compareRaster(
        r, r_na, 
        extent=F, rowcol=F, 
        crs=T, res=T, orig=T, rotation=T),
      error = function(e) cat(glue(
        "  mismatch {sprintf('%06d',i)} {sp_id} {basename(nc)} - {Sys.time()}\n\n")))
    # i = 1087  # 2019-06-10 00:03:23
    # Error in compareRaster(): different resolution
    #   extent      : -81, -78, -37.5, -15  (xmin, xmax, ymin, ymax)
    #   data source : /Users/bbest/docker_pg_data/aquamaps2100_nc/Amphichaetodon_melbae.nc 
    # mismatch 001087 Fis-33857 Amphichaetodon_melbae.nc - 2019-06-10 00:15:56
    # mismatch 001433 URM-2-1014 Anoplodactylus_laminifer.nc - 2019-06-10 00:17:01
    # mismatch 003352 Fis-144824 Bovichtus_veneris.nc - 2019-06-10 00:23:45
    # mismatch 004814 NZ-24280 Charitometra_incisa.nc - 2019-06-10 00:28:13
    # mismatch 005270 Fis-60663 Chromis_pamae.nc - 2019-06-10 00:29:39
    # mismatch 007885 Fis-147877 Ebinania_macquariensis.nc - 2019-06-10 00:37:37
    # mismatch 009752 Fis-27991 Genicanthus_semicinctus.nc - 2019-06-10 00:42:44
    # mismatch 009852 Fis-134861 Girella_albostriata.nc - 2019-06-10 00:43:03
    # mismatch 011115 URM-2-3593 Heterostigma_singulare.nc - 2019-06-10 00:46:36
    # mismatch 011989 ITS-552955 Jasus_paulensis.nc - 2019-06-10 00:49:06
    # mismatch 012027 W-Iso-256075 Joeropsis_sanctipauli.nc - 2019-06-10 00:49:11
    # mismatch 014326 SLB-142070 Microdictyon_okamurae.nc - 2019-06-10 00:55:57
    # mismatch 015881 Fis-168180 Novaculops_koteamea.nc - 2019-06-10 06:00:43
    # mismatch 016386 Fis-130905 Ophichthus_kunaloa.nc - 2019-06-10 06:02:24
    # mismatch 016430 Fis-133519 Ophidion_metoecus.nc - 2019-06-10 06:02:30
    # mismatch 017495 Fis-123220 Parapercis_dockinsi.nc - 2019-06-10 06:05:43
    # mismatch 020940 Fis-153727 Scartella_poiti.nc - 2019-06-10 07:21:30
    # mismatch 020942 Fis-61194 Scartichthys_variolatus.nc - 2019-06-10 07:21:30
    # mismatch 022003 Fis-164212 Sparisoma_rocha.nc - 2019-06-10 07:24:20
    # mismatch 023107 URM-2-336 Tanystylum_beuroisi.nc - 2019-06-10 07:27:33
    # mismatch rasters: n=20

    # i = 17793 Patagonotothen_sima.nc
    # Error in .local(x, ...) : min and max x are the same
    # 128-111-61-209:aquamaps2100_nc bbest$ gdalinfo Patagonotothen_sima.nc
    # Warning 1: No UNIDATA NC_GLOBAL:Conventions attribute
    # Driver: netCDF/Network Common Data Format
    # Files: Patagonotothen_sima.nc
    # Size is 512, 512
    # Coordinate System is `'
    # Subdatasets:
    #   SUBDATASET_1_NAME=NETCDF:"Patagonotothen_sima.nc":probability
    #   SUBDATASET_1_DESC=[107x66] probability (64-bit floating-point)
    #   SUBDATASET_2_NAME=NETCDF:"Patagonotothen_sima.nc":faoareayn
    #   SUBDATASET_2_DESC=[107x66] faoareayn (32-bit integer)
    #   SUBDATASET_3_NAME=NETCDF:"Patagonotothen_sima.nc":boundboxyn
    #   SUBDATASET_3_DESC=[107x66] boundboxyn (32-bit integer)
    # Corner Coordinates:
    # Upper Left  (    0.0,    0.0)
    # Lower Left  (    0.0,  512.0)
    # Upper Right (  512.0,    0.0)
    # Lower Right (  512.0,  512.0)
    # Center      (  256.0,  256.0)
    # can't read 17793 ? Patagonotothen_sima.nc
    # can't read 018009 ITS-612391 Periclimenes_brevicarpalis.nc - 2019-06-10 07:12:45
    # can't read 018176 W-Por-246569 Phakellia_columnata.nc - 2019-06-10 07:13:18
    # can't read 018009 ITS-612391 Periclimenes_brevicarpalis.nc - 2019-06-10 07:12:45
    # can't read 018176 W-Por-246569 Phakellia_columnata.nc - 2019-06-10 07:13:18
    # can't read rasters: n=5    

    #r <- extend(r, r_na)
    r <- raster::projectRaster(r, r_na)
    d <- tibble(
      speciesid = sp_id,
      probability = raster::values(r)) %>% 
      tibble::rowid_to_column(var = "cellid") %>% 
      filter(!is.na(probability))

    #dbExecute(con, glue("DELETE FROM spp_cells_2100;"))
    dbExecute(con, glue("DELETE FROM spp_cells_2100 WHERE SpeciesID='{sp_id}';"))
    dbWriteTable(con, "spp_cells_2100", d, append=T, row.names=F)
  }
  # i = 1
}  

Calculate species richness

Calculate nspp, all taxa

spp_prob_threshold <- 0.5

spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells_2100")

tif <- (glue("{dir_out}/nspp_all_2100.tif"))

if (!file.exists(tif)){

  # get species richness, with default col_cellid="cellid"
  cells_nspp <- spp_cells %>% 
    filter(
      probability >= spp_prob_threshold) %>% 
    group_by(cellid) %>% 
    summarize(
      nspp = n()) %>% 
    collect()
  r_nspp_all_2100 <- df_to_raster(cells_nspp, "nspp", tif)
}

r_nspp_all_2100 <- raster(tif)

plot(r_nspp_all_2100, col = cols, main="Species Richness, All 2100")

Calculate nspp, by taxonomic group

redo      <- F
#options(warn=2) # error on warning
spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells_2100")

spp_groups <- tbl(con, "spp") %>% 
  arrange(group) %>% 
  distinct(group) %>% 
  pull(group)

for (i in seq_along(spp_groups)){ # i = 1
  group <- spp_groups[i]
  grp   <- group_to_grp(group)
  tif   <- (glue("{dir_out}/nspp_{grp}_2100.tif"))

  if (file.exists(tif) & !redo) next()

  cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n"))

  # get species diversity, with default col_cellid="cellid"
  if (is.na(group)){
    spp_grp <- filter(spp, is.na(group))
  } else{
    spp_grp <- filter(spp, group == !!group)
  }

  cells_nspp_2100 <- spp_grp %>% 
    select(SPECIESID) %>% 
    left_join(
      spp_cells %>% 
        select(
          speciesid, cellid, probability) %>% 
        filter(
          probability >= spp_prob_threshold), 
      by=c("SPECIESID"="speciesid")) %>% 
    group_by(cellid) %>% 
    summarize(
      nspp = n()) %>% 
    collect()
  r_grp_2100 <- df_to_raster(cells_nspp_2100, "nspp", tif)
  plot(r_grp_2100, col = cols, main=glue("nspp {group} 2100"))
}

# get all tifs, except first one "nspp_all_2100.tif"
tifs   <- list.files(dir_out, "^nspp_.*_2100\\.tif$", full.names = T)[-1]
s_nspp_2100 <- stack(tifs)
names(s_nspp_2100) <- names(s_nspp_2100) %>% 
  str_replace("nspp_", "") %>% 
  str_replace("_2100", "")
plot(s_nspp_2100, col = cols)

Calculate extinction risk

Calculate rls, all taxa

spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells_2100")

tif <- (glue("{dir_out}/rls_all_2100.tif"))

if (!file.exists(tif)){

  # calculate RedList sum of weights
  cells_rls_2100 <- spp %>% 
    filter(
      !is.na(iucn_weight), 
      iucn_weight != 0) %>% 
    left_join(
      spp_cells %>% 
        select(
          speciesid, cellid, probability) %>% 
        filter(
          probability >= spp_prob_threshold), 
      by=c("SPECIESID"="speciesid")) %>% 
    group_by(cellid) %>% 
    summarize(
      rls = sum(iucn_weight, na.rm=T)) %>% 
    collect()
  r_rls_all_2100 <- df_to_raster(cells_rls_2100, "rls", tif)
}

r_rls_all_2100 <- raster(tif)
plot(r_rls_all_2100, col = cols, main="Red List Sum, All 2100")

Calculate rls, by taxonomic group

redo      <- F
spp       <- tbl(con, "spp")
cells     <- tbl(con, "cells")
spp_cells <- tbl(con, "spp_cells_2100")

spp_groups <- tbl(con, "spp") %>% 
  arrange(group) %>% 
  distinct(group) %>% 
  pull(group)

for (i in seq_along(spp_groups)){ # i = 1
  group <- spp_groups[i]
  grp   <- group_to_grp(group)
  tif   <- (glue("{dir_out}/rls_{grp}_2100.tif"))

  if (file.exists(tif) & !redo) next()

  cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n"))

  # filter by group
  if (is.na(group)){
    spp_grp <- filter(spp, is.na(group))
  } else{
    spp_grp <- filter(spp, group == !!group)
  }

  spp_grp_w <- spp_grp %>% 
    filter(
      !is.na(iucn_weight), 
      iucn_weight != 0) %>% 
    collect()

  if (nrow(spp_grp_w) == 0){
    cat(glue("  all iucn_weight == NA\n", .trim=F))
  } else {
    # calculate RedList sum of weights
    cells_rls_2100 <- spp_grp %>% 
      filter(
        !is.na(iucn_weight), 
        iucn_weight != 0) %>% 
      left_join(
        spp_cells %>% 
          select(
            speciesid, cellid, probability) %>% 
          filter(
            probability >= spp_prob_threshold), 
        by=c("SPECIESID"="speciesid")) %>% 
      group_by(cellid) %>% 
      summarize(
        rls = sum(iucn_weight, na.rm = T)) %>% 
      collect()

    r_grp_2100 <- df_to_raster(cells_rls_2100, "rls", tif) 
    plot(r_grp_2100, col = cols, main=glue("Red List Sum, {group} 2100"))
  }
}

# get all tifs, except first one "rls_all_2100.tif"
tifs   <- list.files(dir_out, "^rls_.*_2100\\.tif$", full.names = T)[-1]
s_rls_2100 <- stack(tifs)
names(s_rls_2100) <- names(s_rls_2100) %>% 
  str_replace("rls_", "") %>% 
  str_replace("_2100", "")
plot(s_rls_2100, col = cols)

Redo: Multiple Groups, Now and Future

nspp|rli_all, per mdl (now|2100)

spp_prob_threshold <- 0.5

lut_mdl <- tribble(
  ~tbl_spp_cells  , ~mdl_sfx,
  "spp_cells"     , "",
  "spp_cells_2100", "_2100")

col_grp <- "groups00"
dir_grp <- glue("{dir_out}/{col_grp}{sfx_mdl}")

dir.create(dir_grp, showWarnings = F)

if (!"groups00" %in% tbl_cols(con, "spp")){
  dbSendStatement(
    con, 
    "ALTER TABLE spp ADD COLUMN groups00 TEXT NULL;
    UPDATE spp SET groups00 = 'All';")
  # tbl(con, "spp") %>% select(SPECIESID, groups00)
}

for (i in seq_along(lut_mdl)){ # i=1

  tbl_spp_cells <- lut_mdl$tbl_spp_cells[i]
  mdl_sfx       <- lut_mdl$mdl_sfx[i]
  nspp_all_tif  <- glue("{dir_grp}/nspp_all{sfx_mdl}.tif")

  cells_nspp <- tbl(con, tbl_spp_cells) %>% 
    filter(
      probability >= spp_prob_threshold) %>% 
    group_by(cellid) %>% 
    summarize(
      nspp = n()) %>% 
    collect()
  r_nspp_all <- df_to_raster(cells_nspp, "nspp", nspp_all_tif)


}

if (!file.exists(nspp_all_tif)){

  # get species richness, with default col_cellid="cellid"

}

r_nspp_all_2100 <- raster(tif)

plot(r_nspp_all_2100, col = cols, main="Species Richness, All 2100")
# con <- get_db_con(password=readLines("~/private/pg_pass"))
# Error: in postgresqlCloseConnection(conn, ...) : 
#  connection has pending rows (close open results set first)
# Fix: dbClearResult(dbListResults(con)[[1]])

spp_prob_threshold <- 0.5
redo               <- F

# lookup table for modeling period (now, 2100) to get suffix and tables of species distribution
tbls_mdls <- tribble(
  ~tbl_spp_cells  , ~col_spp   , ~mdl_sfx,
  "spp_cells"     , "SpeciesID", "",
  "spp_cells_2100", "speciesid", "_2100")

# columns in species table describing taxonomic groups
#cols_grps <- str_subset(tbl_cols(con, "spp"), "groups[0-9]+$") %>% sort()
#cols_grps <- c("groups05", "groups06")
# cols_grps <- str_subset(tbl_cols(con, "spp"), "groups[0-9]+$") %>% 
#   sort() %>% 
#   setdiff(c("groups05", "groups06"))
cols_grps <- str_subset(tbl_cols(con, "spp"), "groups0[1-3]{1}") %>% sort()

products <- c("nspp", "rli")

# iterate over modeling period (now/2100) given by tbl_spp_cells
for (i in 1:nrow(tbls_mdls)){ # i=1 # i=2

  tbl_spp_cells <- tbls_mdls$tbl_spp_cells[i]
  col_spp       <- tbls_mdls$col_spp[i]
  mdl_sfx       <- tbls_mdls$mdl_sfx[i]
  message(glue("mdl_sfx: {mdl_sfx} -- {Sys.time()}"))

  # iterate over taxonomic group (groups01, groups02,...) given by col_grps
  for (col_grps in cols_grps){ # col_grps = cols_grps[1] # col_grps <- "groups00"

    grpsmdl     <- glue("{col_grps}{mdl_sfx}")
    dir_grpsmdl <- glue("{dir_out}/{grpsmdl}")
    dir.create(dir_grpsmdl, showWarnings = F)
    message(glue("  dir_grpsmdl: {basename(dir_grpsmdl)}/ -- {Sys.time()}"))

    groups <- dbGetQuery(con, glue("SELECT DISTINCT {col_grps} FROM SPP;")) %>% 
      pull(!!col_grps) %>% na.omit() %>% sort()

    for (group in groups){ # group = groups[3]

      grp <- group_to_grp(group) 
      message(glue("    grp: {grp} -- {Sys.time()}"))

      for (product in products){ # product = products[1]

        tif <- glue("{dir_grpsmdl}/{grpsmdl}_{product}_{grp}.tif")

        if (file.exists(tif) & !redo) next()

        if (product == "nspp"){
          #devtools::load_all()
          message(glue("      tif: {basename(tif)} -- {Sys.time()}"))
          calc_nspp(con, tif, col_grps, group, tbl_spp_cells, col_spp, spp_prob_threshold)
        }

        if (product == "rli"){
          rls_tif <- str_replace(tif, "_rli_", "_rls_")
          message(glue("      tif: {basename(tif)}; rls_tif: {basename(rls_tif)} -- {Sys.time()}"))
          calc_rli(con, tif, col_grps, group, tbl_spp_cells, col_spp, spp_prob_threshold, rls_tif=rls_tif)
        }

      } # end: for (product in products){
    } # end: for (group in groups){
  } # end: for (col_grps in cols_grps){
} # end: for (i in seq_along(tbls_mdls)){

TODO: Archive DB

## TODO: Archive db

# [arkdb](https://ropensci.github.io/arkdb/): archive and unarchive databases using flat files

References



marinebon/gmbi documentation built on Oct. 6, 2020, 10:23 p.m.