## Requirement: 'tibble' + 'stringr'
# Info: Since the multiple_databases_ecology or the worms_ecology functions they are very long to compute, this function aims at splitting the request.
# Info: Every X divisions provided by the user, it saves the data as a CSV file in the directory.
# Info: At the end, it unites all the CSV files and moves the intermediate files to the temporary directory (deleted at the end of session).
# Info: It also return a full dataframe and splitted lists for easy calls.
# Note: If the "write_csv" option is set to TRUE, do not open the temporary csv files during the process.
division_ecology = function(data, mode, division_number, get_aphia_id = F, random_samples = 50, write_csv = F, file_name = NULL,
databases = c("obis", "gbif", "vertnet"), aphia_col = "APHIA_ID", genus_col = "GENUS",
climate_col = "CLIMATE", species_col = "SPECIES", id_col = "ID", print_division = T){
start = list()
end = list()
file_names = list()
output = data.frame()
if(write_csv && is.null(file_name)) stop('You must provide a file name if the "write_csv" option is set to TRUE.')
if(!(mode %in% c("ogv","worms","algaebase"))) stop('Mode must be set to "ogv" for querying OBIS, GBIF and Vertnet databases, "worms" to query WORMS, or "algaebase" to query ALGAEBASE.')
if(mode == "ogv" && !is.data.frame(data)) stop('The data must be a dataframe with a column containing the species names and their IDs.')
if(mode == "worms" && !is.data.frame(data)) stop('The data must be a dataframe with a column containing APHIA IDs.')
if(mode == "worms" && !any(colnames(data) == aphia_col)) stop('The data must contain a the column provided in the aphia_col argument (default: "APHIA_ID").')
if(write_csv && is.null(file_name)) stop("The option write_csv has been set to true but no file name has been provided.")
if(mode == "ogv") data_list = data[[species_col]]
else if(mode == "worms") data_list = data[[aphia_col]]
if(division_number > length(data_list)) stop("Division number must be inferior to the number of species names.")
multiple = round(length(data_list)/division_number)
for(i in 1:multiple){
j = division_number*i
k = i-1
if(i == 1) {
start[i] = 0
end[i] = division_number
}
else {
end[i] = j
start[i] = end[k]
}
}
start = unlist(start) + 1
end = unlist(end)
diff = length(data_list) - division_number*multiple
if(length(data_list) > division_number*multiple) {
start = c(start, end[length(end)] + 1)
end = c(end, end[length(end)] + diff)
}
else if(length(data_list) < division_number*multiple) end[length(end)] = end[length(end)] + diff
if(print_division) {
test = list()
for(i in 1:length(start)){
test[i] = paste(start[i], end[i], sep = ":")}
print(unlist(test))
}
if(get_aphia_id) completed_data = worms_ecology(data, name_aphia_col = aphia_col, add_aphia_id = T)
else completed_data = data
for(i in 1:length(start)){
start_time = Sys.time()
cat(paste(i,"/",length(start), " - State: ", sep = ""))
if(mode == "ogv") ecology = multiple_databases_ecology(data[start[i]:end[i], ], col_species = species_col, col_id = id_col,
random_samples = random_samples, databases = databases)
else if(mode == "worms") ecology = worms_ecology(completed_data[start[i]:end[i], ], name_aphia_col = aphia_col, add_aphia_id = F)
if(i == 1) ecology_final = ecology
else ecology_final = rbind(ecology_final, ecology)
if(write_csv){
file_names[i] = paste0(file_name,i,".csv")
write.csv(ecology, file = file_names[i][[1]], row.names = F)
cat("\n")
cat(paste("DONE:", file_names[i][[1]],"\n"))
cat("\n")
}
if(length(start) != 1) {
end_time = Sys.time()
time = round(end_time - start_time, 2)
time_left = round(time * length(start) - time * i, 2)
if(time_left < 60) cat("Time left : ", time_left, " seconds", "\n", "")
else if(time_left < 3600) cat("Time left : ", time_left/60, " minutes", "\n", "")
else cat("Time left : ", time_left/3600, " hours", "\n", "")
cat("\n")
}
}
if(write_csv){
divided_files = unlist(file_names)
if(any(list.files() == paste0(file_name, ".csv"))) {
file.rename(from = paste0(getwd(), "/", paste0(file_name, ".csv")), to = paste0(getwd(), "/", file_name, "- previous version.csv"))
warning("A previous version with the same file name was renamed to avoid overwritting it.")
}
for(j in 1:length(divided_files)){
if (j == 1){
unique_file = read.csv(divided_files[j], header = TRUE, sep = ",")
file.rename(from = paste0(getwd(), "/", divided_files[j]), to = paste0(tempdir(), "/", divided_files[j]))
}
else{
temporary_file = read.csv(divided_files[j], header = TRUE, sep = ",")
unique_file = rbind(unique_file, temporary_file)
file.rename(from = paste0(getwd(), "/", divided_files[j]), to = paste0(tempdir(), "/", divided_files[j]))
}
}
write.csv(unique_file, file = paste0(file_name,".csv"), row.names = F)
}
tibble(ecology_final)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.