## Requirement: 'tibble' + 'algaeClassify'
# Info: Automatically detect genus with missing taxonomic informations, and launch a search for each of them in ALGAEBASE, before returning
# Info: the initial dataframe completed with algaebase informations.
algaebase_taxonomy = function(data, genus_col = "GENUS", family_col = "FAMILY"){
if(!any(colnames(data) == genus_col) || !any(colnames(data) == family_col))
stop('The data must contain columns provided in the genus_col and family_col argument (default: "GENUS" & "FAMILY").')
genus_list = unique(data[rowSums(is.na(data)) > 0, ][[genus_col]])
grouped_genus = consensus_deduplification(data[data[[genus_col]] %in% genus_list, ], family_col)
grouped_genus = grouped_genus[!is.na(grouped_genus[[family_col]]), ]
listed_genus = strsplit(grouped_genus[[genus_col]], " or ")
if(length(which(duplicated(listed_genus))) != 0)
stop('Some genus with missing taxonomic infos belong to multiple families!')
for(i in 1:length(genus_list)){
start_time = Sys.time()
cat("\n")
cat(paste0('Searching info for "', genus_list[i],'":\n'))
cat("\n")
ecology = tryCatch(algaeClassify::algae_search(genus = genus_list[i], long = T), error = function(e) {rep(NA, 13)})
if(i == 1) algaebase = ecology
else algaebase = rbind(algaebase, ecology)
if(length(genus_list) != 1) {
end_time = Sys.time()
duration = difftime(end_time, start_time)
cat(paste("Time taken:", round(duration[[1]], 2), units(duration), "\n"))
time_left = round(duration[[1]] * length(genus_list) - duration[[1]] * i, 2)
if(time_left < 60) cat("Time left : ", time_left, " seconds", "\n", "")
else if(time_left < 3600) cat("Time left : ", time_left/60, " minutes", "\n", "")
else cat("Time left : ", time_left/3600, " hours", "\n", "")
cat("\n")
}
}
algaebase_table = data.frame(algaebase$genus, algaebase$Family, algaebase$Order, algaebase$Class,
algaebase$Phylum, algaebase$Kingdom, algaebase$Empire)
algaebase_table = algaebase_table[apply(algaebase_table, 1, function(x) !all(is.na(x))), ]
algaebase_table = data.frame(t(apply(algaebase_table, 1, function(x) stringr::word(x, 1))))
colnames(algaebase_table) = c("GENUS", "FAMILY", "ORDER", "CLASS", "PHYLUM", "KINGDOM", "SUPERKINGDOM")
output = add_infos(data, algaebase_table, genus_col)
tibble::tibble(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.