## Requirement: 'worrms' + 'tibble'
# Info: Assessment of the missing environmental informations for all species with an Aphia ID number (retrieved from OBIS)
# Note: For tables with more than 1500 species names, it must be used in the division_ecology() function because too much requests are not supported.
worms_ecology = function(data, name_aphia_col = "APHIA_ID", name_status_col = "STATUS", name_species_col = "SPECIES",
name_envir_col = "ENVIRONMENT", add_aphia_id = F){
zone = list()
species = list()
aphia_id_missing = list()
aphia_id = list()
status = list()
rm_NA = function(col){
col = col[!is.na(col)]
if(length(col) == 0) col = NA
col
}
if(add_aphia_id){
missing_aphia_data_pos = which(apply(data.frame(is.na(data[, name_aphia_col]), is.na(data[, name_envir_col])), 1, all))
not_missing_aphia_data_pos = which(!apply(data.frame(is.na(data[, name_aphia_col]), is.na(data[, name_envir_col])), 1, all))
missing_aphia_data = data[missing_aphia_data_pos, ]
not_missing_aphia_data = data[not_missing_aphia_data_pos, ]
if(anyNA(data[[name_aphia_col]])){
cat("Retrieving missing Aphia IDs\n")
cat("-----------------------------------------------------\n")
start = Sys.time()
for(j in 1:nrow(missing_aphia_data)){
cat(paste0(missing_aphia_data[[name_species_col]][j], " (", j, "/", length(missing_aphia_data[[name_species_col]]), ")\n"))
aphia_id_missing[j] = tryCatch(wm_name2id(missing_aphia_data[[name_species_col]][j]), error = function(e) { NA })
}
end = Sys.time()
duration = difftime(end, start)
cat("-----------------------------------------------------\n")
cat(paste("Aphia ID retrieved in:", round(duration[[1]], 2), units(duration), "\n"))
missing_aphia_data[[name_aphia_col]] = unlist(aphia_id_missing)
}
completed_data = rbind(missing_aphia_data, not_missing_aphia_data)
return(completed_data)
}
else {
completed_data = data
start_time_retrieving = Sys.time()
id = as.numeric(rm_NA(completed_data[[name_aphia_col]]))
if(!all(is.na(id))){
infos = wm_record(id)
end_time_retrieving = Sys.time()
cat(paste0("Data retrieved in ", round(end_time_retrieving - start_time_retrieving, 2), " seconds\n"))
start_time_processing = Sys.time()
for(i in 1:nrow(infos)){
species[i] = infos$scientificname[i]
aphia_id[i] = infos$AphiaID[i]
status[i] = infos$status[i]
if(is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i]) && is.na(infos$isTerrestrial[i])) zone[i] = NA
else if(!is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i]) && is.na(infos$isTerrestrial[i])){
if(infos$isMarine[i] == 1) zone[i] = "marine"
else zone[i] = NA
}
else if(!is.na(infos$isFreshwater[i]) && is.na(infos$isMarine[i]) && is.na(infos$isTerrestrial[i])){
if(infos$isFreshwater[i] == 1) zone[i] = "freshwater"
else zone[i] = NA
}
else if(!is.na(infos$isTerrestrial[i]) && is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i])){
if(infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
else zone[i] = NA
}
else if(!is.na(infos$isMarine[i]) && !is.na(infos$isFreshwater[i]) && is.na(infos$isTerrestrial[i])){
if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 1) zone[i] = "both"
else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 0) zone[i] = "marine"
else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 1) zone[i] = "freshwater"
else zone[i] = NA
}
else if(!is.na(infos$isMarine[i]) && is.na(infos$isFreshwater[i]) && !is.na(infos$isTerrestrial[i])){
if(infos$isMarine[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
else if(infos$isMarine[i] == 1) zone[i] = "marine"
else zone[i] = NA
}
else if(is.na(infos$isMarine[i]) && !is.na(infos$isFreshwater[i]) && !is.na(infos$isTerrestrial[i])){
if(infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
else if(infos$isFreshwater[i] == 1) zone[i] = "freshwater"
else zone[i] = NA
}
else if(!is.na(infos$isMarine[i]) && !is.na(infos$isFreshwater[i]) && !is.na(infos$isTerrestrial[i])){
if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 1) zone[i] = "both"
else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 0) zone[i] = "both"
else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "marine"
else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 1) zone[i] = "freshwater"
else if(infos$isMarine[i] == 1 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 0) zone[i] = "marine"
else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 1 && infos$isTerrestrial[i] == 0) zone[i] = "freshwater"
else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 1) zone[i] = "terrestrial"
else if(infos$isMarine[i] == 0 && infos$isFreshwater[i] == 0 && infos$isTerrestrial[i] == 0) zone[i] = NA
}
else stop("Case not planned")
}
end_time_processing = Sys.time()
cat(paste0("Data processed in ", round(end_time_processing - start_time_processing, 2), " seconds\n"))
cat("-----------------------------------------------------\n")
cat("\n")
new_infos = data.frame(APHIA_ID = unlist(aphia_id), STATUS = unlist(status), ENVIRONMENT = unlist(zone), WORMS_VALID_SPECIES = unlist(species))
completed_data = cbind(completed_data, WORMS_VALID_SPECIES = rep(NA, nrow(completed_data)), INITIAL_ENVIRONMENT = rep(NA, nrow(completed_data)),
INITIAL_STATUS = rep(NA, nrow(completed_data)))
for(j in 1:nrow(new_infos)){
position = which(new_infos[j, ]$APHIA_ID == completed_data[[name_aphia_col]])
completed_data$INITIAL_STATUS = replace(completed_data$INITIAL_STATUS, position, completed_data[[name_status_col]][position])
completed_data$INITIAL_ENVIRONMENT = replace(completed_data$INITIAL_ENVIRONMENT, position, completed_data[[name_envir_col]][position])
if(!is.na(new_infos[j, ]$STATUS)) completed_data[[name_status_col]] =
replace(completed_data[[name_status_col]], position, toupper(new_infos[j, ]$STATUS))
if(!is.na(new_infos[j, ]$ENVIRONMENT))
completed_data[[name_envir_col]] = replace(completed_data[[name_envir_col]], position, new_infos[j, ]$ENVIRONMENT)
if(any(new_infos[j, ]$WORMS_VALID_SPECIES != data[position, ]$SPECIES))
completed_data$WORMS_VALID_SPECIES = replace(completed_data$WORMS_VALID_SPECIES, position, new_infos[j, ]$WORMS_VALID_SPECIES)
}
tibble(completed_data)
}
else{
tibble(completed_data)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.