library(data.table, exclude = c("between", "first", "last"))
library(magrittr, include.only = "%>%")
library(RPostgres)
library(fst)
library(dplyr, mask.ok = c("filter", "lag", "intersect",
"setdiff", "setequal", "union"))
library(tidyr)
library(stringr)
library(fortebaseline)
con <- dbConnect(
Postgres(),
user = "bety",
password = "bety",
host = "localhost",
port = 7990
)
fst_file <- "~/Projects/try-raw-data/4143.fst"
if (!file.exists(fst_file)) {
message("Creating new FST file.")
tryfile <- "~/Projects/try-raw-data/4143.txt"
trydata <- fread(tryfile)
fst::write_fst(trydata, fst_file)
} else {
message("Reading existing FST file")
trydata <- read_fst(fst_file, as.data.table = TRUE)
}
# Subset to selected species
species_df <- pfts_species()
species <- species_df %>% pull(scientificname)
setkey(trydata, "AccSpeciesName")
species_data <- trydata[
species,
.(AccSpeciesName, ObservationID,
TraitID, TraitName,
DataID, DataName,
ValueKindName, StdValue, UnitName,
Reference)
][!is.na(StdValue)][ValueKindName == "Single"]
bety2try <- tibble::tribble(
~bety_name, ~DataID,
"SLA", 6582,
## "c2n_leaf", 455,
## "c2n_fineroot", 489,
## "leaf_width", 447,
"Vcmax", 550,
"fineroot2leaf", 5383,
"leaf_respiration_rate_m2", 69,
"root_respiration_rate", 1189
) %>%
inner_join(
tbl(con, "variables") %>% select(id, name),
by = c("bety_name" = "name"),
copy = TRUE
)
setDT(bety2try)
species_data_sub <- species_data[bety2try, on = "DataID"]
if (FALSE) {
ggplot(species_data_sub) +
aes(x = AccSpeciesName, y = StdValue) +
geom_violin() +
geom_jitter(size = 0.5, alpha = 0.5) +
facet_wrap(vars(bety_name), scales = "free_y")
}
bety_species <- species_df %>%
select(AccSpeciesName = scientificname, specie_id)
setDT(bety_species)
species_data_final <- bety_species[species_data_sub, on = "AccSpeciesName"]
species_data_final[, notes := paste0("TRY.ObservationID = ", ObservationID, "|",
"TRY.DataID = ", DataID)]
# Exclude TRY data already in BETY
bety_try_data <- tbl(con, "traits") %>%
filter(notes %like% "TRY%") %>%
collect() %>%
separate(notes, c("ObservationID", "DataID"), sep = "\\|") %>%
mutate_at(vars(ObservationID, DataID), ~as.numeric(str_extract(., "[[:digit:]]+"))) %>%
select(ends_with("ID"))
setDT(bety_try_data)
# data.table equivalent of `anti_join(species_data_final, bety_try_data)`
species_data_insert <- species_data_final[!bety_try_data,
on = c("ObservationID", "DataID")]
ninsert <- dbExecute(con, paste(
"INSERT INTO traits (variable_id, specie_id, mean, n, notes)",
"VALUES ($1, $2, $3, 1, $4)"
), with(species_data_insert, list(id, specie_id, StdValue, notes)))
message("Inserted ", ninsert, " trait records.")
# Retrieve leaf temperature from TRY
## tempdat <- trydata[species, ][grepl("temperature", DataName)][grepl("leaf", DataName, ignore.case = TRUE)]
tempdat <- trydata[
species, ][
DataID == 1666][
, .(DatasetID,
AccSpeciesID,
AccSpeciesName,
ObservationID,
leaf_temperature = as.numeric(OrigValueStr))]
bety_try_data_raw <- tbl(con, "traits") %>%
filter(notes %like% "%TRY.ObservationID%") %>%
collect()
bety_try_data <- bety_try_data_raw %>%
mutate(
ObservationID = str_match(notes, "TRY.ObservationID = ([[:digit:]]+)")[, 2] %>%
as.numeric(),
DataID = str_match(notes, "TRY.DataID = ([[:digit:]]+)")[, 2] %>%
as.numeric()
)
temp_insert <- bety_try_data %>%
distinct(trait_id = id, ObservationID) %>%
inner_join(tempdat, by = "ObservationID") %>%
mutate(variable_id = bit64::as.integer64(81)) %>% # leaf temperature
select(trait_id, variable_id, stat = leaf_temperature)
temp_in_bety <- tbl(con, "covariates") %>%
filter(variable_id == 81,
trait_id %in% !!temp_insert[["trait_id"]]) %>%
collect()
ninsert <- anti_join(temp_insert, temp_in_bety, by = c("trait_id", "variable_id")) %>%
dbAppendTable(conn = con, name = "covariates")
message("Inserted ", ninsert, " new covariate records.")
## con %>%
## dplyr::tbl("traits") %>%
## dplyr::filter(notes %like% "__TRY-DB__") %>%
## dplyr::count(specie_id)
# The rest of these are not available in TRY, at least for the
# species we're using.
## "stomatal_slope"
## "f_labile", ,
## "seedling_mortality",
## "nonlocal_dispersal",
## "growth_resp_factor",
## "mort2",
## "Vm_low_temp",
## "leaf_turnover_rate",
## "root_turnover_rate",
## "quantum_efficiency",
## "water_conductance",
## "r_fract",
# Code for searching TRY data names
## species_data[grepl("stomat", DataName, ignore.case = TRUE),
## .N,
## by = .(DataID, DataName)][order(N, decreasing = TRUE)]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.