knitr::opts_chunk$set( #collapse = TRUE, #comment = "#>", message = F )
Using PostgreSQL with PostGIS extension in Docker:
pg_data
docker volume create pg_data
postgis
, database am
user admin
On command line:
# set password as environment variable read in from file DB_PASS=`cat ~/private/pg_pass` # run docker with image docker run --name=postgis \ -d -e POSTGRES_USER=admin \ -e POSTGRES_PASS=$DB_PASS \ -e POSTGRES_DBNAME=am \ -e ALLOW_IP_RANGE=0.0.0.0/0 \ -p 5432:5432 \ -v pg_data:/var/lib/postgresql \ -v /Users/bbest/docker_pg_data:/data \ --restart=unless-stopped \ --shm-size=1g \ kartoza/postgis:11.0-2.5 # list all containers docker containers ls -a # stop docker container stop postgis # start docker container start postgis # check logs docker logs postgis
library(tidyverse) library(here) library(glue) library(raster) library(DBI) library(DT) library(RPostgreSQL) library(tictoc) library(furrr) library(RColorBrewer) library(zeallot) library(devtools); load_all() # library(gmbi) # paths dir_tmp <- here("inst/tmp") dir_out <- here("inst/data/rasters") spp_iucn_csv <- here("inst/data/spp_iucn.csv") spp_csv <- here("inst/data/spp.csv") dir_nc <- "~/docker_pg_data/aquamaps2100_nc" spp_2100_csv <- "~/docker_pg_data/aquamaps2100_nc.csv" spp_bbnj_csv <- here("inst/data/spp_bbnj.csv") spp_bbnj_grp_csv <- here("inst/data/spp_bbnj_grouped.csv") spp_bbnj_grps_csv <- here("inst/data/spp_bbnj_groups.csv") # plotting parameters pal <- colorRampPalette(brewer.pal(11, "Spectral")) cols <- rev(pal(255)) par(mar=c(0.5,0.5,0.5,0.5)) # helper functions ---- tbl_cols <- function(con, tbl){ tbl(con, tbl) %>% head(1) %>% collect() %>% names() } # conntect to database con <- get_db_con(password=readLines("~/private/pg_pass")) con #Error: in postgresqlCloseConnection(conn, ...) : # connection has pending rows (close open results set first) #Fix: dbClearResult(dbListResults(con)[[1]])
All the species distribution data was generously provided as comma-seperated value (csv) files in a zip package aquamaps_ver0816c.zip
by Kristin Kaschner and
Cristina Garilao to Ben Best ben@ecoquants.com on Jun 21, 2018 from the extensive work available at http://AquaMaps.org to be fully cited [@kaschnerAquaMapsPredictedRange2016] whenever used.
Generated here are derived products. Please contact Kristin Kaschner kristin.kaschner@biologie.uni-freiburg.de and Cristina Garilao cgarilao@geomar.de for access to the full database.
csv_to_db(con) # drop odd columns dbExecute(con, 'ALTER TABLE spp DROP COLUMN "X11", DROP COLUMN "X12";')
spp <- tbl(con, "spp") %>% collect() %>% mutate( genus_species = glue("{Genus} {Species}") %>% as.character()) copy_to( con, spp, "spp", temporary=F, overwrite=T, unique_indexes=list("SPECIESID"), indexes = list("group", "genus_species"))
Load tables for querying.
spp <- tbl(con, "spp") cells <- tbl(con, "cells") env_cells <- tbl(con, "env_cells") spp_cells <- tbl(con, "spp_cells") obs_cells <- tbl(con, "obs_cells")
Assign taxonomic groups borrowing from previous global OBIS analysis [@tittensorGlobalPatternsPredictors2010].
Class == "Actinopterygii" & !Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae")
Class == "Cephalopoda" & !Order %in% c("Myopsida","Oegopsida","Teuthida")
]Class == "Cephalopoda"
Family %in% c("Odobenidae", "Otariidae", "Phocidae")
Class == "Anthozoa"
(soft/hard + anemones + gorgonians)Order == "Alismatales"
Genus == "Avicennia" & Species == "germinans"
(n=1)Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae")
Order %in% c("Myopsida","Oegopsida","Teuthida")
]Order == "Cetacea"
Order == "Euphausiacea"
Class %in% c("Malacostraca","Ostracoda","Branchiopoda","Cephalocarida","Maxillopoda")
Class == "Pycnogonida"
Class == "Gastropoda"
Class == "Bivalvia"
Class == "Polyplacophora"
Class %in% c("Ascidiacea","Thaliacea","Appendicularia")
Class == "Elasmobranchii"
(actually Class=Chondrichthyes, Subclass=Elasmobranchii)Class == "Reptilia"
crocodiles, sea snakes & sea turtlesPhylum == "Echinodermata"
Phylum == "Porifera"
Phylum %in% c("Annelida", "Sipuncula")
Class == "Hydrozoa"
Class %in% c("Cubozoa","Hydrozoa","Scyphozoa","Staurozoa")
Phylum == "Foraminifera"
spp_cols <- tbl(con, "spp") %>% head(1) %>% collect() %>% names() # group = NA: 1176 # library(tidyverse) # read_csv("~/Gdrive Ecoquants/projects/High Seas MPAs/bbnj/data/raw/AquaMaps/spp.csv") %>% # filter(is.na(group)) %>% # nrow() if (!"groups01" %in% spp_cols){ spp <- tbl(con, "spp") %>% mutate( groups01 = case_when( Class == "Actinopterygii" & !Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ "Bony fishes", #Class == "Cephalopoda" & !Order %in% c("Myopsida","Oegopsida","Teuthida") ~ "Cephalopods", #Order %in% c("Myopsida","Oegopsida","Teuthida") ~ "Squids", Class == "Cephalopoda" ~ "Cephalopods", Family %in% c("Odobenidae", "Otariidae", "Phocidae") ~ "Pinnipeds", Class == "Anthozoa" ~ "Corals", Order == "Alismatales" ~ "Seagrasses", Genus == "Avicennia" & Species == "germinans" ~ "Mangroves", Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ "Tunas & billfishes", Order == "Cetacea" ~ "Cetaceans", Order == "Euphausiacea" ~ "Euphausiids", Class %in% c("Malacostraca","Ostracoda","Branchiopoda","Cephalocarida","Maxillopoda") ~ "Crustaceans", Class == "Pycnogonida" ~ "Sea spiders", Class == "Gastropoda" ~ "Gastropods", Class == "Bivalvia" ~ "Bivalves", Class == "Polyplacophora" ~ "Chitons", Class %in% c("Ascidiacea","Thaliacea","Appendicularia") ~ "Tunicates", Class == "Elasmobranchii" ~ "Sharks & rays", Class == "Reptilia"~ "Reptiles", Phylum == "Echinodermata"~ "Echinoderms", Phylum == "Porifera" ~ "Sponges", Phylum %in% c("Annelida", "Sipuncula") ~ "Worms", Class == "Hydrozoa" ~ "Hydrozoans", Phylum == "Foraminifera" ~ "Forams", Class %in% c("Cubozoa","Hydrozoa","Scyphozoa","Staurozoa") ~ "Jellyfish", TRUE ~ "Other")) %>% collect() copy_to( con, spp, "spp", temporary=F, overwrite=T, unique_indexes=list("SPECIESID"), indexes = list("group", "groups01")) } tbl(con, "spp") %>% collect() %>% pull(groups01) %>% table(useNA="always")
if (!"groups04" %in% spp_cols){ spp <- tbl(con, "spp") %>% mutate( groups04 = case_when( Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ "Tunas & billfishes", Class == "Actinopterygii" & !Family %in% c("Scombridae", "Istiophoridae", "Xiphiidae") ~ "Bony fishes", Class == "Cephalopoda" ~ "Cephalopods", Class == "Mammalia" ~ "Marine Mammals", Class == "Anthozoa" ~ "Corals", Order == "Alismatales" ~ "Seagrasses", Phylum %in% c("Mollusca", "Arthropoda", "Echinodermata", "Porifera","Annelida", "Sipuncula","Cnidaria") & !Family %in% c("Cephalopoda") & !Class %in% c("Anthozoa") | Class %in% c("Ascidiacea","Thaliacea","Appendicularia") ~ "Invertebrates", Class == "Elasmobranchii" ~ "Sharks & rays", TRUE ~ "Other")) %>% collect() copy_to( con, spp, "spp", temporary=F, overwrite=T, unique_indexes=list("SPECIESID"), indexes = list("group", "groups01","groups04")) } tbl(con, "spp") %>% collect() %>% pull(groups04) %>% table(useNA="always")
Based on 0.5 probability threshold.
spp_prob_threshold <- 0.5 # in_bbnj: cells in high seasm, from BBNJ package ---- r_pu_id <- raster(here("../bbnj/inst/data/pu_id.tif")) # mapview::mapview(r_pu_id) cells_bbnj <- tibble( cellid = raster::values(r_pu_id)) %>% filter(!is.na(cellid)) %>% mutate(in_bbnj = T) cells2 <- tbl(con, "cells") %>% select(-OceanArea) %>% # join with ocean area per cell left_join( tbl(con, "env_cells"), by = c("CenterLat","CenterLong")) %>% select(cellid, CenterLat, CenterLong, OceanArea) %>% collect() %>% # join with in_bbnj left_join( cells_bbnj, by = "cellid") %>% mutate( in_bbnj = replace_na(in_bbnj, F), cellid = as.integer(cellid)) # plot OceanArea hist(cells2$OceanArea) r <- setValues(r_pu_id, NA) r[cells2$cellid] <- cells2$OceanArea mapview::mapview(r) # plot in_bbnj table(cells2$in_bbnj, useNA="ifany") r <- setValues(r_pu_id, NA) r[cells2$cellid] <- cells2$in_bbnj mapview::mapview(r, method="ngb") # update cells table copy_to( con, cells2, "cells", temporary=F, overwrite=T, unique_indexes=list("cellid"), indexes = list("CenterLat", "CenterLong", "in_bbnj", "OceanArea")) # area_ocean & pct_bbnj ----- # set spp_cells.cellid r <- DBI::dbSendStatement(con, 'UPDATE spp_cells AS sc SET cellid = c.cellid FROM cells AS c WHERE sc."CenterLat" = c."CenterLat" AND sc."CenterLong" = c."CenterLong";') pct_brks <- c(0,0.001,0.25,1) spp_sum <- tbl(con, "spp_cells") %>% filter(probability >= spp_prob_threshold) %>% left_join( tbl(con, "cells"), by="cellid") %>% select(SpeciesID, cellid, OceanArea, in_bbnj) %>% group_by(SpeciesID) %>% summarize( n_cells = n(), area_ocean_km2 = sum(OceanArea, na.rm=T), area_bbnj_km2 = sum(OceanArea * as.integer(in_bbnj), na.rm=T)) %>% mutate( area_bbnj_pct = area_bbnj_km2 / area_ocean_km2) %>% collect() %>% mutate( area_bbnj_pct_cat = cut( area_bbnj_pct, pct_brks, include.lowest = T)) # 2 min table(spp_sum$area_bbnj_pct_cat, useNA = "ifany") # [0,0.001] (0.001,0.25] (0.25,1] # 13217 9008 2673 # table(spp_sum$area_bbnj_pct_cat, useNA = "ifany")/nrow(spp_sum) # [0,0.001] (0.001,0.25] (0.25,1] # 0.5308459 0.3617961 0.1073580 # copy_to( # con, spp_sum, "spp", temporary=F, overwrite=T, # unique_indexes=list("SPECIESID"), # indexes = list( # "groups00", # "groups01", "groups02", "groups03")) copy_to( con, spp_sum, "spp_sum", temporary=F, overwrite=T, unique_indexes=list("SpeciesID")) spp2 <- tbl(con, "spp") %>% collect() %>% #paste(intersect(dbListFields(con, "spp"), names(spp_sum)), collapse=", -") select(-n_cells, -area_ocean_km2, -area_bbnj_km2, -area_bbnj_pct, -area_bbnj_pct_cat) %>% left_join( spp_sum, by=c("SPECIESID"="SpeciesID")) copy_to( con, spp2, "spp", temporary=F, overwrite=T, unique_indexes=list("SPECIESID"), indexes = c("group", sprintf("groups%02d", 0:6)) %>% as.list()) # spp_sum <- spp_sum %>% # mutate( # area_bbnj_pct_cat = replace_na( # area_bbnj_pct_cat, # levels(spp_sum$area_bbnj_pct_cat)[1])) spp_bbnj <- tbl(con, "spp") %>% collect() %>% mutate( # groups01: in high seas (groups02), 25% in high seas (groups03) groups02 = case_when( area_bbnj_km2 > 0 ~ groups01), groups03 = case_when( area_bbnj_pct >= 0.25 ~ groups01), # groups04: in high seas (groups05), 25% in high seas (groups06) groups05 = case_when( area_bbnj_km2 > 0 ~ groups04), groups06 = case_when( area_bbnj_pct >= 0.25 ~ groups04)) write_csv(spp_bbnj, spp_bbnj_csv) copy_to( con, spp_bbnj, "spp", temporary=F, overwrite=T, unique_indexes=list("SPECIESID"), indexes = c("group", sprintf("groups%02d", 0:6)) %>% as.list()) # TODO: update below to summarize for all groups## #setdiff(names(spp_bbnj), tbl_cols(con, "spp")) # spp_bbnj_grp <- spp_bbnj %>% # mutate( # area_bbnj_pct_cat = fct_explicit_na( # area_bbnj_pct_cat, na_level = "(NA)")) %>% # group_by(groups01, area_bbnj_pct_cat) %>% # summarize( # nspp = n()) %>% # spread(area_bbnj_pct_cat, nspp) # write_csv(spp_bbnj_grp, spp_bbnj_grp_csv, na = "") # # spp_bbnj_grps <- count(spp_bbnj, groups01, name="groups01_all") %>% # rename(group=groups01) %>% # left_join( # count(spp_bbnj, groups02, name="groups02_hs"), # by = c("group"="groups02")) %>% # left_join( # count(spp_bbnj, groups03, name="groups03_hs25"), # by = c("group"="groups03")) %>% # mutate( # groups02_pct = groups02_hs / groups01_all, # groups03_pct = groups03_hs25 / groups01_all) # write_csv(spp_bbnj_grps, spp_bbnj_grps_csv, na = "")
groups00
: All original AquaMaps species groups01
: 1st taxonomic groupinggroups02
: 1st grouping, species (prob≥ 0.5) in high seasgroups03
: 1st grouping, species (prob ≥ 0.5) and 25% of its range in high seasgroups04
: 2nd taxonomic groupinggroups05
: 2nd grouping, species (prob≥ 0.5) in high seasgroups06
: 2nd grouping, species (prob ≥ 0.5) and 25% of its range in high seasread_csv(spp_bbnj_grps_csv, na = "") %>% write_csv(spp_bbnj_grps_csv, na = "") datatable()
RedList token issued to ben@ecoquants.com via http://apiv3.iucnredlist.org/api/v3/token.
if (!file.exists(spp_iucn_csv)){ # speed up get_iucn() by running # as multi-threaded process w/ furrr::future_map() plan(multiprocess) # get private IUCN RedList token key rl_token <- readLines("~/private/iucn_api_key") spp <- tbl(con, "spp") %>% collect() # iterate in chunks of 1000 n <- 1000 for ( i in seq(1, nrow(spp), by=n) ){ i_beg <- i i_end <- i - 1 + n csv <- glue("{dir_tmp}/spp_iucn_{str_pad(i_beg, 5, 'left', '0')}-{str_pad(i_end, 5, 'left', '0')}.csv") cat(glue("{basename(csv)} - {Sys.time()}\n\n")) spp_i <- slice(spp, i_beg:i_end) tic() spp_i$iucn <- future_map(spp_i$genus_species, get_iucn, key=rl_token) toc() # 262.211 sec; 262.211 / 1000 * 24904 / 60 = 108.8 min for all rows spp_i <- spp_i %>% mutate( iucn_category = map_chr(iucn, "category"), iucn_criteria = map_chr(iucn, "criteria"), iucn_population_trend = map_chr(iucn, "population_trend"), iucn_published_year = map_chr(iucn, "published_year")) %>% select(SPECIESID, starts_with("iucn_")) write_csv(spp_i, csv) } # consolidate csvs csvs <- list.files(dir_tmp, "^spp_iucn_.*", full.names = T) df <- map(csvs, function(x) read_csv(x)) %>% bind_rows() write_csv(df, csv) unlink(dir_tmp, recursive = T) } iucn <- read_csv(spp_iucn_csv) iucn_cols <- iucn %>% select(starts_with("iucn_")) %>% names() spp_cols <- tbl(con, "spp") %>% head(1) %>% collect() %>% names() if (!all(iucn_cols %in% spp_cols)){ # write to db spp <- tbl(con, "spp") %>% collect() %>% left_join(df, by="SPECIESID") copy_to( con, spp, "spp", temporary=F, overwrite=T, unique_indexes=list("SPECIESID"), indexes = list("group", "genus_species", "iucn_category")) } spp_iucn_category <- tbl(con, "spp") %>% pull(iucn_category) table(spp_iucn_category, useNA="always")
Using AquaMaps probability ≥ 0.5 [@kleinShortfallsGlobalProtected2015; @oharaAligningMarineSpecies2017] to establish presence.
nspp
, all taxaspp_prob_threshold <- 0.5 spp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells") tif <- (glue("{dir_out}/nspp_all.tif")) if (!file.exists(tif)){ # get species richness, with default col_cellid="cellid" cells_nspp <- spp_cells %>% filter( probability >= spp_prob_threshold) %>% left_join(cells, by=c("CenterLat","CenterLong")) %>% group_by(cellid) %>% summarize( nspp = n()) %>% collect() r_nspp_all <- df_to_raster(cells_nspp, "nspp", tif) } if (!file.exists(tif)){ # OR get species richness, with cols_lonlat=c("CenterLat","CenterLong") cells_nspp <- spp_cells %>% filter( probability >= spp_prob_threshold) %>% group_by(CenterLat, CenterLong) %>% summarize( nspp = n()) %>% collect() r_nspp_all <- df_to_raster( cells_nspp, "nspp", tif, col_cellid=NULL, cols_lonlat=c("CenterLat","CenterLong")) } r_nspp_all <- raster(tif) plot(r_nspp_all, col = cols, main="Species Richness, All")
nspp
, by taxonomic groupredo <- F spp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells") spp_groups <- tbl(con, "spp") %>% arrange(group) %>% distinct(group) %>% pull(group) for (i in seq_along(spp_groups)){ group <- spp_groups[i] grp <- group_to_grp(group) tif <- (glue("{dir_out}/nspp_{grp}.tif")) if (file.exists(tif) & !redo) next() cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n")) # get species diversity, with default col_cellid="cellid" if (is.na(group)){ spp_grp <- filter(spp, is.na(group)) } else{ spp_grp <- filter(spp, group == !!group) } cells_nspp <- spp_grp %>% select(SPECIESID) %>% left_join( spp_cells %>% select( SpeciesID, probability, CenterLat, CenterLong) %>% filter( probability >= spp_prob_threshold), by=c("SPECIESID"="SpeciesID")) %>% left_join( cells, by=c("CenterLat","CenterLong")) %>% group_by(cellid) %>% summarize( nspp = n()) %>% collect() r_grp <- df_to_raster(cells_nspp, "nspp", tif) plot(r_grp, col = cols, main=glue("nspp {group}")) } # get all tifs, except first one "nspp_all.tif" tifs <- list.files(dir_out, "^nspp_.*.tif$", full.names = T)[-1] s_nspp <- stack(tifs) names(s_nspp) <- str_replace(names(s_nspp), "nspp_", "") plot(s_nspp, col = cols)
The Red List Index ([@butchartImprovementsRedList2007] et al. 2007) is summarized [@juslenApplicationRedList2016] as:
The RLI [Red List Index] value was calculated by multiplying the number of taxa in each red-list category by the category weight (0 for LC, 1 for NT, 2 for VU, 3 for EN, 4 for CR, 5 for RE/EX). These products were summed and then divided by the number of taxa multiplied by the maximum weight 5 (“maximum possible denominator”). To obtain the RLI value, this sum is subtracted from 1. The index value varies between 0 and 1 (Butchart et al. 2007). The lower the value, the closer the set of taxa is heading towards extinction. If the value is 0 all the taxa are (regionally) extinct. If the value is 1 all the taxa are assessed as Least Concern.
We will use simply the numerator, the Red List Sum (RLS), of the Red List Index (RLI) to quantify the "endangeredness" of a cell without dilution from being in a species-rich place as the RLI does when averaging the extinction risk for all assessed species.
$$RLS = \sum_{i=1}^{n_{spp}} w_i$$ $$RLI = 1 - \frac{RLS}{n_{spp} * max(w)}$$
Weights used for Red List Sum (RLS) indicator:
NA
- Data Deficient0
- Least Concern0
- Lower Risk, least concern (1994 category)1
- Near Threatened1
- Lower Risk, conservation dependent (1994 category)1
- Lower Risk, near threatened (1994 category)2
- VUlnerable3
- ENdangered4
- CRitically endangeredNA
(*not 5
) - Extinct in the Wild (n=0)NA
(*not 5
) - EXtinct (n=1)Note that only 1 species is listed as extinct (EX) or extinct in the wild (EW). Since the species is no longer considered present, protection is assumed to not afford any conservation value.
iucn_weight
to speciesiucn_category_weights = c( DD=NA, LC=0, `LR/lc`=0, NT=1, `LR/cd`=1, `LR/nt`=1, VU=2, EN=3, CR=4, EW=NA, EX=NA) spp_cols <- tbl(con, "spp") %>% head(1) %>% collect() %>% names() if (!"iucn_weight" %in% spp_cols){ spp <- tbl(con, "spp") %>% collect() %>% mutate( iucn_weight = recode(iucn_category, !!!iucn_category_weights)) copy_to( con, spp, "spp", temporary=F, overwrite=T, unique_indexes=list("SPECIESID"), indexes = list("group", "genus_species", "iucn_weight")) } # save spp_csv if (!file.exists(spp_csv)){ tbl(con, "spp") %>% collect() %>% write_csv(spp_csv) } spp_iucn_weight <- tbl(con, "spp") %>% pull(iucn_weight) table(spp_iucn_weight, useNA="always")
rls
, all taxaspp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells") tif <- (glue("{dir_out}/rls_all.tif")) if (!file.exists(tif)){ # calculate RedList sum of weights cells_rls <- spp %>% filter( !is.na(iucn_weight), iucn_weight != 0) %>% left_join( spp_cells %>% select( SpeciesID, probability, CenterLat, CenterLong) %>% filter( probability >= spp_prob_threshold), by=c("SPECIESID"="SpeciesID")) %>% left_join( cells, by=c("CenterLat","CenterLong")) %>% group_by(cellid) %>% summarize( rls = sum(iucn_weight)) %>% collect() r_rls_all <- df_to_raster(cells_rls, "rls", tif) } r_rls_all <- raster(tif) plot(r_rls_all, col = cols, main="Red List Sum, All")
rls
, by taxonomic groupredo <- F spp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells") spp_groups <- tbl(con, "spp") %>% arrange(group) %>% distinct(group) %>% pull(group) for (i in seq_along(spp_groups)){ group <- spp_groups[i] grp <- group_to_grp(group) tif <- (glue("{dir_out}/rls_{grp}.tif")) if (file.exists(tif) & !redo) next() cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n")) # filter by group if (is.na(group)){ spp_grp <- filter(spp, is.na(group)) } else{ spp_grp <- filter(spp, group == !!group) } spp_grp_w <- spp_grp %>% filter( !is.na(iucn_weight), iucn_weight != 0) %>% collect() if (nrow(spp_grp_w) == 0){ #cat(glue(" all iucn_weight == NA\n", .trim=F)) } else { # calculate RedList sum of weights cells_rls <- spp_grp %>% filter( !is.na(iucn_weight), iucn_weight != 0) %>% left_join( spp_cells %>% select( SpeciesID, probability, CenterLat, CenterLong) %>% filter( probability >= spp_prob_threshold), by=c("SPECIESID"="SpeciesID")) %>% left_join( cells, by=c("CenterLat","CenterLong")) %>% group_by(cellid) %>% summarize( rls = sum(iucn_weight)) %>% collect() r_grp <- df_to_raster(cells_rls, "rls", tif) plot(r_grp, col = cols, main=glue("Red List Sum, {group}")) } } # get all tifs, except first one "nspp_all.tif" tifs <- list.files(dir_out, "^rls_.*.tif$", full.names = T)[-1] s_rls <- stack(tifs) names(s_rls) <- str_replace(names(s_rls), "rls_", "") plot(s_rls, col = cols)
The data from forecasting climate change effects on marine species distributions in 2050 using the AquaMaps model [@coroAutomaticClassificationClimate2016] were made publicly available through the server http://thredds.d4science.org and linked from AquaMaps species profiles.
TODO: resolve MISMATCH: - [@coroAutomaticClassificationClimate2016]: 2050 - Aquamaps.org: 2100
To investigate accessing these data layers via iMarine, I logged into https://i-marine.d4science.org/group/biodiversitylab and see this entry:
Gianpaolo Coro Dear VRE Members,the AquaMaps Web site is now officially linked to D4Science! All automatically generated maps have links to download content in NetCDF format through D4Science and to interactively inspect the maps trough Web applications hosted by the e-Infrastructure. A total of 24,935 AquaMaps NetCDF files were produced for both today and 2100 through the DataMiner "Csv To NetCDF" conversion algorithms. Please, click on the link below to see an example for yellowback anthias and follow the "NetCDF" and the "view in Godiva" links on the page.
https://www.aquamaps.org/preMap2.php?cache=1&SpecID=Fis-10199
Following this link leads to:
http://thredds.d4science.org/thredds/fileServer/public/netcdf/AquaMaps_08_2016/Pseudanthias_evansi.nc
library(raster) library(glue) library(here) get_am2100nc <- function(sp, nc){ # sp <- "Acanthopagrus_berda" # nc <- file.path(dir_nc, basename(url)) # unlink(nc) # get netcdf file url <- glue("http://thredds.d4science.org/thredds/fileServer/public/netcdf/AquaMaps_08_2016/{sp}.nc") if (file.exists(nc)) return (glue("{sp}: already exists")) tryCatch( download.file(url, nc, mode = "wb", quiet = T), error=function(e) e) return (glue("{sp}: {ifelse(file.exists(nc), 'downloaded', 'missing')}")) #spp_2100 <- spp_2100 %>% # mutate( # downloaded = ifelse(sp_str == sp, file.exists(nc), downloaded)) # head(spp_2100) #sp_downloaded <- filter(spp_2100, sp_str == sp) %>% pull(downloaded) #return(glue("{sp}: {sp_downloaded}")) } if (!file.exists(spp_2100_csv)){ # get all species spp_2100 <- spp %>% select(SPECIESID, Genus, Species) %>% collect() %>% mutate( sp_str = glue("{Genus}_{Species}"), path_nc = glue("{dir_nc}/{sp_str}.nc"), downloaded = NA) %>% arrange(sp_str) # n= 24,894 plan(multiprocess) # iterate in chunks of 1000 n <- 1000 for ( i in seq(1, nrow(spp_2100), by=n) ){ # i = 1 i_beg <- i i_end <- i - 1 + n csv <- glue("{dirname(dir_nc)}/spp_2100_{str_pad(i_beg, 5, 'left', '0')}-{str_pad(i_end, 5, 'left', '0')}.csv") cat(glue("{basename(csv)} - {Sys.time()}\n\n")) # speed up fetching by running in parallel future_map2_chr( spp_2100$sp_str[i_beg:i_end], spp_2100$path_nc[i_beg:i_end], get_am2100nc) # record species searched and not found spp_i <- slice(spp_2100, i_beg:i_end) %>% mutate( downloaded = file.exists(path_nc)) # View(spp_i) write_csv(spp_i, csv) } # combine temporary csvs csvs <- list.files(dirname(dir_nc), "*\\.csv", full.names = T) spp_df <- map_df(csvs, read_csv) %>% dplyr::bind_rows() %>% filter(!duplicated(sp_str)) write_csv(spp_df, spp_2100_csv) unlink(csvs) } # quickfix # spp_df2 <- spp_df %>% # left_join( # spp_2100 %>% # select(sp_str, SPECIESID), # by="sp_str") %>% # select(SPECIESID, Genus, Species, sp_str, path_nc, downloaded) # write_csv(spp_df2, spp_2100_csv) if (!"spp_cells_2100" %in% dbListTables(con)){ dbExecute(con, 'CREATE TABLE spp_cells_2100 ( speciesid text, cellid int4, probability float8); CREATE UNIQUE INDEX idx_spp_cells_2100_unique ON spp_cells_2100 (speciesid, cellid);') r_na <- raster_na() spp_df <- read_csv(spp_2100_csv) %>% filter(downloaded) # 24894 - 24831 = 63 unavailable # of 24,894 spp: 63 unavailable, 20 mismatch and 5 unreadable #for (i in 1:nrow(spp_df)){ # i = 1087 for (i in 18009:nrow(spp_df)){ # i = 18009 if (i %% 1000 == 0) cat(glue("{sprintf('%06d',i)} - {Sys.time()}\n\n")) nc <- spp_df$path_nc[i] sp_id <- spp_df$SPECIESID[i] #r <- raster(nc, varname="probability") r <- tryCatch(raster(nc, varname="probability"), error = function(e) cat(glue( " can't read {sprintf('%06d',i)} {sp_id} {basename(nc)} - {Sys.time()}\n\n"))) if (is.null(r)) next tryCatch( compareRaster( r, r_na, extent=F, rowcol=F, crs=T, res=T, orig=T, rotation=T), error = function(e) cat(glue( " mismatch {sprintf('%06d',i)} {sp_id} {basename(nc)} - {Sys.time()}\n\n"))) # i = 1087 # 2019-06-10 00:03:23 # Error in compareRaster(): different resolution # extent : -81, -78, -37.5, -15 (xmin, xmax, ymin, ymax) # data source : /Users/bbest/docker_pg_data/aquamaps2100_nc/Amphichaetodon_melbae.nc # mismatch 001087 Fis-33857 Amphichaetodon_melbae.nc - 2019-06-10 00:15:56 # mismatch 001433 URM-2-1014 Anoplodactylus_laminifer.nc - 2019-06-10 00:17:01 # mismatch 003352 Fis-144824 Bovichtus_veneris.nc - 2019-06-10 00:23:45 # mismatch 004814 NZ-24280 Charitometra_incisa.nc - 2019-06-10 00:28:13 # mismatch 005270 Fis-60663 Chromis_pamae.nc - 2019-06-10 00:29:39 # mismatch 007885 Fis-147877 Ebinania_macquariensis.nc - 2019-06-10 00:37:37 # mismatch 009752 Fis-27991 Genicanthus_semicinctus.nc - 2019-06-10 00:42:44 # mismatch 009852 Fis-134861 Girella_albostriata.nc - 2019-06-10 00:43:03 # mismatch 011115 URM-2-3593 Heterostigma_singulare.nc - 2019-06-10 00:46:36 # mismatch 011989 ITS-552955 Jasus_paulensis.nc - 2019-06-10 00:49:06 # mismatch 012027 W-Iso-256075 Joeropsis_sanctipauli.nc - 2019-06-10 00:49:11 # mismatch 014326 SLB-142070 Microdictyon_okamurae.nc - 2019-06-10 00:55:57 # mismatch 015881 Fis-168180 Novaculops_koteamea.nc - 2019-06-10 06:00:43 # mismatch 016386 Fis-130905 Ophichthus_kunaloa.nc - 2019-06-10 06:02:24 # mismatch 016430 Fis-133519 Ophidion_metoecus.nc - 2019-06-10 06:02:30 # mismatch 017495 Fis-123220 Parapercis_dockinsi.nc - 2019-06-10 06:05:43 # mismatch 020940 Fis-153727 Scartella_poiti.nc - 2019-06-10 07:21:30 # mismatch 020942 Fis-61194 Scartichthys_variolatus.nc - 2019-06-10 07:21:30 # mismatch 022003 Fis-164212 Sparisoma_rocha.nc - 2019-06-10 07:24:20 # mismatch 023107 URM-2-336 Tanystylum_beuroisi.nc - 2019-06-10 07:27:33 # mismatch rasters: n=20 # i = 17793 Patagonotothen_sima.nc # Error in .local(x, ...) : min and max x are the same # 128-111-61-209:aquamaps2100_nc bbest$ gdalinfo Patagonotothen_sima.nc # Warning 1: No UNIDATA NC_GLOBAL:Conventions attribute # Driver: netCDF/Network Common Data Format # Files: Patagonotothen_sima.nc # Size is 512, 512 # Coordinate System is `' # Subdatasets: # SUBDATASET_1_NAME=NETCDF:"Patagonotothen_sima.nc":probability # SUBDATASET_1_DESC=[107x66] probability (64-bit floating-point) # SUBDATASET_2_NAME=NETCDF:"Patagonotothen_sima.nc":faoareayn # SUBDATASET_2_DESC=[107x66] faoareayn (32-bit integer) # SUBDATASET_3_NAME=NETCDF:"Patagonotothen_sima.nc":boundboxyn # SUBDATASET_3_DESC=[107x66] boundboxyn (32-bit integer) # Corner Coordinates: # Upper Left ( 0.0, 0.0) # Lower Left ( 0.0, 512.0) # Upper Right ( 512.0, 0.0) # Lower Right ( 512.0, 512.0) # Center ( 256.0, 256.0) # can't read 17793 ? Patagonotothen_sima.nc # can't read 018009 ITS-612391 Periclimenes_brevicarpalis.nc - 2019-06-10 07:12:45 # can't read 018176 W-Por-246569 Phakellia_columnata.nc - 2019-06-10 07:13:18 # can't read 018009 ITS-612391 Periclimenes_brevicarpalis.nc - 2019-06-10 07:12:45 # can't read 018176 W-Por-246569 Phakellia_columnata.nc - 2019-06-10 07:13:18 # can't read rasters: n=5 #r <- extend(r, r_na) r <- raster::projectRaster(r, r_na) d <- tibble( speciesid = sp_id, probability = raster::values(r)) %>% tibble::rowid_to_column(var = "cellid") %>% filter(!is.na(probability)) #dbExecute(con, glue("DELETE FROM spp_cells_2100;")) dbExecute(con, glue("DELETE FROM spp_cells_2100 WHERE SpeciesID='{sp_id}';")) dbWriteTable(con, "spp_cells_2100", d, append=T, row.names=F) } # i = 1 }
nspp
, all taxaspp_prob_threshold <- 0.5 spp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells_2100") tif <- (glue("{dir_out}/nspp_all_2100.tif")) if (!file.exists(tif)){ # get species richness, with default col_cellid="cellid" cells_nspp <- spp_cells %>% filter( probability >= spp_prob_threshold) %>% group_by(cellid) %>% summarize( nspp = n()) %>% collect() r_nspp_all_2100 <- df_to_raster(cells_nspp, "nspp", tif) } r_nspp_all_2100 <- raster(tif) plot(r_nspp_all_2100, col = cols, main="Species Richness, All 2100")
nspp
, by taxonomic groupredo <- F #options(warn=2) # error on warning spp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells_2100") spp_groups <- tbl(con, "spp") %>% arrange(group) %>% distinct(group) %>% pull(group) for (i in seq_along(spp_groups)){ # i = 1 group <- spp_groups[i] grp <- group_to_grp(group) tif <- (glue("{dir_out}/nspp_{grp}_2100.tif")) if (file.exists(tif) & !redo) next() cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n")) # get species diversity, with default col_cellid="cellid" if (is.na(group)){ spp_grp <- filter(spp, is.na(group)) } else{ spp_grp <- filter(spp, group == !!group) } cells_nspp_2100 <- spp_grp %>% select(SPECIESID) %>% left_join( spp_cells %>% select( speciesid, cellid, probability) %>% filter( probability >= spp_prob_threshold), by=c("SPECIESID"="speciesid")) %>% group_by(cellid) %>% summarize( nspp = n()) %>% collect() r_grp_2100 <- df_to_raster(cells_nspp_2100, "nspp", tif) plot(r_grp_2100, col = cols, main=glue("nspp {group} 2100")) } # get all tifs, except first one "nspp_all_2100.tif" tifs <- list.files(dir_out, "^nspp_.*_2100\\.tif$", full.names = T)[-1] s_nspp_2100 <- stack(tifs) names(s_nspp_2100) <- names(s_nspp_2100) %>% str_replace("nspp_", "") %>% str_replace("_2100", "") plot(s_nspp_2100, col = cols)
rls
, all taxaspp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells_2100") tif <- (glue("{dir_out}/rls_all_2100.tif")) if (!file.exists(tif)){ # calculate RedList sum of weights cells_rls_2100 <- spp %>% filter( !is.na(iucn_weight), iucn_weight != 0) %>% left_join( spp_cells %>% select( speciesid, cellid, probability) %>% filter( probability >= spp_prob_threshold), by=c("SPECIESID"="speciesid")) %>% group_by(cellid) %>% summarize( rls = sum(iucn_weight, na.rm=T)) %>% collect() r_rls_all_2100 <- df_to_raster(cells_rls_2100, "rls", tif) } r_rls_all_2100 <- raster(tif) plot(r_rls_all_2100, col = cols, main="Red List Sum, All 2100")
rls
, by taxonomic groupredo <- F spp <- tbl(con, "spp") cells <- tbl(con, "cells") spp_cells <- tbl(con, "spp_cells_2100") spp_groups <- tbl(con, "spp") %>% arrange(group) %>% distinct(group) %>% pull(group) for (i in seq_along(spp_groups)){ # i = 1 group <- spp_groups[i] grp <- group_to_grp(group) tif <- (glue("{dir_out}/rls_{grp}_2100.tif")) if (file.exists(tif) & !redo) next() cat(glue("{str_pad(i, 2, pad='0')} of {length(spp_groups)}: {grp} - {Sys.time()}\n\n")) # filter by group if (is.na(group)){ spp_grp <- filter(spp, is.na(group)) } else{ spp_grp <- filter(spp, group == !!group) } spp_grp_w <- spp_grp %>% filter( !is.na(iucn_weight), iucn_weight != 0) %>% collect() if (nrow(spp_grp_w) == 0){ cat(glue(" all iucn_weight == NA\n", .trim=F)) } else { # calculate RedList sum of weights cells_rls_2100 <- spp_grp %>% filter( !is.na(iucn_weight), iucn_weight != 0) %>% left_join( spp_cells %>% select( speciesid, cellid, probability) %>% filter( probability >= spp_prob_threshold), by=c("SPECIESID"="speciesid")) %>% group_by(cellid) %>% summarize( rls = sum(iucn_weight, na.rm = T)) %>% collect() r_grp_2100 <- df_to_raster(cells_rls_2100, "rls", tif) plot(r_grp_2100, col = cols, main=glue("Red List Sum, {group} 2100")) } } # get all tifs, except first one "rls_all_2100.tif" tifs <- list.files(dir_out, "^rls_.*_2100\\.tif$", full.names = T)[-1] s_rls_2100 <- stack(tifs) names(s_rls_2100) <- names(s_rls_2100) %>% str_replace("rls_", "") %>% str_replace("_2100", "") plot(s_rls_2100, col = cols)
nspp|rli_
all
, per mdl
(now|2100)spp_prob_threshold <- 0.5 lut_mdl <- tribble( ~tbl_spp_cells , ~mdl_sfx, "spp_cells" , "", "spp_cells_2100", "_2100") col_grp <- "groups00" dir_grp <- glue("{dir_out}/{col_grp}{sfx_mdl}") dir.create(dir_grp, showWarnings = F) if (!"groups00" %in% tbl_cols(con, "spp")){ dbSendStatement( con, "ALTER TABLE spp ADD COLUMN groups00 TEXT NULL; UPDATE spp SET groups00 = 'All';") # tbl(con, "spp") %>% select(SPECIESID, groups00) } for (i in seq_along(lut_mdl)){ # i=1 tbl_spp_cells <- lut_mdl$tbl_spp_cells[i] mdl_sfx <- lut_mdl$mdl_sfx[i] nspp_all_tif <- glue("{dir_grp}/nspp_all{sfx_mdl}.tif") cells_nspp <- tbl(con, tbl_spp_cells) %>% filter( probability >= spp_prob_threshold) %>% group_by(cellid) %>% summarize( nspp = n()) %>% collect() r_nspp_all <- df_to_raster(cells_nspp, "nspp", nspp_all_tif) } if (!file.exists(nspp_all_tif)){ # get species richness, with default col_cellid="cellid" } r_nspp_all_2100 <- raster(tif) plot(r_nspp_all_2100, col = cols, main="Species Richness, All 2100")
# con <- get_db_con(password=readLines("~/private/pg_pass")) # Error: in postgresqlCloseConnection(conn, ...) : # connection has pending rows (close open results set first) # Fix: dbClearResult(dbListResults(con)[[1]]) spp_prob_threshold <- 0.5 redo <- F # lookup table for modeling period (now, 2100) to get suffix and tables of species distribution tbls_mdls <- tribble( ~tbl_spp_cells , ~col_spp , ~mdl_sfx, "spp_cells" , "SpeciesID", "", "spp_cells_2100", "speciesid", "_2100") # columns in species table describing taxonomic groups #cols_grps <- str_subset(tbl_cols(con, "spp"), "groups[0-9]+$") %>% sort() #cols_grps <- c("groups05", "groups06") # cols_grps <- str_subset(tbl_cols(con, "spp"), "groups[0-9]+$") %>% # sort() %>% # setdiff(c("groups05", "groups06")) cols_grps <- str_subset(tbl_cols(con, "spp"), "groups0[1-3]{1}") %>% sort() products <- c("nspp", "rli") # iterate over modeling period (now/2100) given by tbl_spp_cells for (i in 1:nrow(tbls_mdls)){ # i=1 # i=2 tbl_spp_cells <- tbls_mdls$tbl_spp_cells[i] col_spp <- tbls_mdls$col_spp[i] mdl_sfx <- tbls_mdls$mdl_sfx[i] message(glue("mdl_sfx: {mdl_sfx} -- {Sys.time()}")) # iterate over taxonomic group (groups01, groups02,...) given by col_grps for (col_grps in cols_grps){ # col_grps = cols_grps[1] # col_grps <- "groups00" grpsmdl <- glue("{col_grps}{mdl_sfx}") dir_grpsmdl <- glue("{dir_out}/{grpsmdl}") dir.create(dir_grpsmdl, showWarnings = F) message(glue(" dir_grpsmdl: {basename(dir_grpsmdl)}/ -- {Sys.time()}")) groups <- dbGetQuery(con, glue("SELECT DISTINCT {col_grps} FROM SPP;")) %>% pull(!!col_grps) %>% na.omit() %>% sort() for (group in groups){ # group = groups[3] grp <- group_to_grp(group) message(glue(" grp: {grp} -- {Sys.time()}")) for (product in products){ # product = products[1] tif <- glue("{dir_grpsmdl}/{grpsmdl}_{product}_{grp}.tif") if (file.exists(tif) & !redo) next() if (product == "nspp"){ #devtools::load_all() message(glue(" tif: {basename(tif)} -- {Sys.time()}")) calc_nspp(con, tif, col_grps, group, tbl_spp_cells, col_spp, spp_prob_threshold) } if (product == "rli"){ rls_tif <- str_replace(tif, "_rli_", "_rls_") message(glue(" tif: {basename(tif)}; rls_tif: {basename(rls_tif)} -- {Sys.time()}")) calc_rli(con, tif, col_grps, group, tbl_spp_cells, col_spp, spp_prob_threshold, rls_tif=rls_tif) } } # end: for (product in products){ } # end: for (group in groups){ } # end: for (col_grps in cols_grps){ } # end: for (i in seq_along(tbls_mdls)){
## TODO: Archive db # [arkdb](https://ropensci.github.io/arkdb/): archive and unarchive databases using flat files
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.