#' @title Creating a summary file of counts of germplasm, herbarium and records for a given species using a CSV file (Pre-analysis function)
#' @name clean_records
#' @description Creates a summary file of counts of germplasm, herbarium as well split records
#' in csv files per species to allow calculate all indicators provided by GapAnalysisR R package
#'
#' @param Workspace A forder where the pipeline will be executed
#' @param species_csv CSV file name located at /Workspace/parameters/input to be splitted
#' @param species_list A list of species to be analyzed
#' @param run_version The version of the analysis used (e.g 'v1')
#'
#' @return It returns a raster file saved in /Workspace/parameters/input to be used to calculate ex-situ conservation indicators
#'
#' @examples clean_records(Workspace,species_csv,species_list,run_version)
#'
#' dir <- 'E:/CIAT/workspace/Workspace_test'
#' run_version <- 'v1'
#' species_csv <- 'Cucurbita_CWR_2019_09_30.csv'
#'
#' x <- clean_records(Workspace,species_csv,species_list,run_version)
#'
#'@references
#'
#' Khoury, C. K., Amariles, D., Soto, J. S., Diaz, M. V., Sotelo, S., Sosa, C. C., … Jarvis, A. (2019).
#' Comprehensiveness of conservation of useful wild plants: An operational indicator for biodiversity
#' and sustainable development targets. Ecological Indicators. https://doi.org/10.1016/j.ecolind.2018.11.016
#'
#' @export
#species_list <- speciesList
#occurrenceData <- occData
generate_Counts <- function(species_list,occurrenceData){
library(dplyr)
# create an empty dataframe to store counts information
df <- data.frame(matrix(NA, nrow = length(species_list), ncol = 9))
colNames <- c("species","totalRecords", "hasLat", "hasLong","totalUseful", "totalGRecords",
"totalGUseful","totalHRecords", "totalHUseful")
colnames(df) <- colNames
for(i in 1:length(species_list)){
sppOccAll <- occurrenceData %>%
filter(taxon == species_list[i])
speciesOcc <- occurrenceData %>%
dplyr::filter(taxon == species_list[i])%>%
mutate(hasLat = !is.na(latitude) & latitude != "\\N" & latitude != "" & !is.null(latitude) & latitude != "NULL") %>%
mutate(hasLong = !is.na(longitude) & longitude != "\\N"& longitude != "" & !is.null(longitude)& longitude != "NULL") %>%
mutate(hasLatLong = hasLat & hasLong)
# group by type and has coordinates
tbl <- speciesOcc %>%
dplyr::group_by(type, hasLatLong ) %>%
dplyr::summarize(total = n())
# assign values to the counts dataframe for the species
df$species[i] <- as.character(species_list[i])
df$totalRecords[i] <- nrow(speciesOcc)
df$totalUseful[i] <- sum((subset(tbl, hasLatLong == TRUE))$total)
df$totalGRecords[i] <- sum((subset(tbl, type == "G"))$total)
df$totalGUseful[i] <- sum((subset(tbl, type == "G" & hasLatLong == TRUE))$total)
df$totalHRecords[i] <- sum((subset(tbl, type == "H"))$total)
df$totalHUseful[i] <- sum((subset(tbl, type == "H" & hasLatLong == TRUE))$total)
df$hasLat[i] <- sum(speciesOcc$hasLat)
df$hasLong[i] <- sum(speciesOcc$hasLong)
}
# returns the counts dataframe
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.