## Requirement: 'tibble' + 'stringr' + 'spocc'
# Info: Same than fishbase_ecology() but get the ecology of non-fish taxa by quering OBIS, GBIF and VertNet databases.
# Info: This function takes much more time to process since it query multiple online databases and does that independently for each species.
# Info: To assess the latitudinal repartition of each species, a random sample of defined length (default: 50 per database) need to be defined.
# Info: Extreme latitudinal values are detected by is_extreme() (i.e. > Q3 + 3xIQR) and removed before assessing the maximum latitude.
# Note: Takes around 1h per 1,000 species with a 50 random samples and the 3 databases.
# Note: For large datasets, it is strongly recommended to use it in the division_ecology() function for progressive saving of data.
multiple_databases_ecology = function(data, col_species = "SPECIES", col_id = "ID", random_samples = 50,
databases = c("obis", "gbif", "vertnet")){
print_time = function(start_time, end_time, message){
time = round(end_time - start_time, 2)
cat(paste0(message, " - Time taken : ", time, " seconds", "\n", ""))
cat("\n")
}
is_extreme = function(x){
res = x
Q1 = quantile(x, 0.25, na.rm = TRUE)
Q3 = quantile(x, 0.75, na.rm = TRUE)
.IQR = IQR(x, na.rm = TRUE)
upper.limit = Q3 + (10 * .IQR)
lower.limit = Q1 - (10 * .IQR)
outlier = ifelse(x < lower.limit | x > upper.limit, TRUE, FALSE)
outlier
}
species_spaced = data.frame(name = data[[col_species]], taxid = data[[col_id]])
number_genus = length(grep(" sp.", species_spaced$Spaced))
species_spaced[grep(" sp.", species_spaced$name), ] = suppressWarnings(word(species_spaced[grep(" sp.", species_spaced$name), ], 1,1, " "))
species_unspaced = gsub(" ", "_", species_spaced$name)
start_time_retrieving = Sys.time()
databases_info = suppressWarnings(occ(query = species_spaced$name, from = databases, limit = random_samples))
end_time_retrieving = Sys.time()
cat(paste0("Data retrieved in ", round((end_time_retrieving - start_time_retrieving) / 60, 2), " minutes\n"))
start_time_processing = Sys.time()
taxonomy_infos = function(data, species_list){
aphia_id = list()
taxon_status = list()
initial_name = list()
obis_name = list()
rm_NA = function(data){
data = data[!is.na(data)]
if(length(data) == 0) data = NA
data
}
max_name = function(databases, species, variable, providers){
data = list()
for(j in 1:length(providers)){
dat = databases[[providers[j]]][["data"]][[species]][[variable]]
data[j] = ifelse(length(table(rm_NA(dat))) == 0, NA,
names(which.max(table(rm_NA(dat)))))
}
ifelse(length(table(rm_NA(unlist(data)))) == 0, NA,
names(which.max(table(rm_NA(unlist(data))))))
}
for(i in 1:length(species_spaced$name)){
aphia_id[i] = max_name(data, species_unspaced[i], variable = "speciesid", providers = "obis")
taxon_status[i] = max_name(data, species_unspaced[i], variable = "taxonomicStatus", providers = "gbif")
}
data.frame(STATUS = unlist(taxon_status), APHIA_ID = unlist(aphia_id))
}
ecosystem_infos = function(data, species, variable){
info = list()
for(i in 1:length(species)){
if(variable == "latitude"){
information = as.numeric(c(data[["obis"]][["data"]][[species_unspaced[i]]][[variable]],
data[["gbif"]][["data"]][[species_unspaced[i]]][[variable]],
data[["vertnet"]][["data"]][[species_unspaced[i]]][[variable]]))
information = information[!is.na(information)]
information = information[which(!is_extreme(information))]
if(length(information) == 0) info[i] = NA
else if(all(information > 40) || all(information < -40)) info[i] = "> 40"
else if(all(information > 40) && all(information < -40)) info[i] = "> 40"
else if(all(information < 40) && all(information > -40)) info[i] = "< 40"
else info[i] = "both"
}
else if(variable == "zone"){
terrestrial = data[["obis"]][["data"]][[species_unspaced[i]]][["terrestrial"]]
terrestrial = terrestrial[!is.na(terrestrial)]
marine = data[["obis"]][["data"]][[species_unspaced[i]]][["marine"]]
marine = marine[!is.na(marine)]
if(length(terrestrial) == 0 && length(marine) == 0) info[i] = NA
else if(any(terrestrial)) info[i] = "terrestrial"
else if(all(marine)) info[i] = "marine"
else if(!all(marine)) info[i] = "freshwater"
else info[i] = "both"
}
}
if(variable == "latitude") data.frame(CLIMATE = unlist(info))
else data.frame(ENVIRONMENT = unlist(info))
}
end_time_processing = Sys.time()
cat(paste0("Data processed in ", round(end_time_processing - start_time_processing, 2), " seconds\n"))
tibble(cbind(ID = species_spaced$taxid, SPECIES = species_spaced$name,
taxonomy_infos(databases_info),
ecosystem_infos(databases_info, species_unspaced, "zone"),
ecosystem_infos(databases_info, species_unspaced, "latitude")))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.