R/generateCounts.R

Defines functions generate_Counts

#' @title Creating a summary file of counts of germplasm, herbarium and records for a given species using a CSV file   (Pre-analysis function)
#' @name clean_records
#' @description Creates a summary file of counts of germplasm, herbarium as well split records
#'  in csv files per species to allow calculate all indicators provided by GapAnalysisR R package
#'
#' @param Workspace A forder where the pipeline will be executed
#' @param species_csv CSV file name located at /Workspace/parameters/input to be splitted
#' @param species_list A list of species to be analyzed
#' @param  run_version The version of the analysis used (e.g 'v1')
#'
#' @return It returns a raster file saved in /Workspace/parameters/input to be used to calculate ex-situ conservation indicators
#'
#' @examples clean_records(Workspace,species_csv,species_list,run_version)
#'
#'  dir  <-  'E:/CIAT/workspace/Workspace_test'
#'  run_version  <- 'v1'
#'  species_csv <- 'Cucurbita_CWR_2019_09_30.csv'
#'
#'  x <- clean_records(Workspace,species_csv,species_list,run_version)
#'
#'@references
#'
#' Khoury, C. K., Amariles, D., Soto, J. S., Diaz, M. V., Sotelo, S., Sosa, C. C., … Jarvis, A. (2019).
#' Comprehensiveness of conservation of useful wild plants: An operational indicator for biodiversity
#' and sustainable development targets. Ecological Indicators. https://doi.org/10.1016/j.ecolind.2018.11.016
#'
#' @export

#species_list <- speciesList
#occurrenceData <- occData

generate_Counts <- function(species_list,occurrenceData){
  library(dplyr)
  # create an empty dataframe to store counts information
  df <- data.frame(matrix(NA, nrow = length(species_list), ncol = 9))
  colNames <- c("species","totalRecords",	"hasLat", "hasLong","totalUseful", "totalGRecords",
                "totalGUseful","totalHRecords",	"totalHUseful")
  colnames(df) <- colNames


  for(i in 1:length(species_list)){
    sppOccAll <- occurrenceData %>%
      filter(taxon == species_list[i])


      speciesOcc <- occurrenceData %>%
        dplyr::filter(taxon == species_list[i])%>%
        mutate(hasLat = !is.na(latitude) & latitude != "\\N" & latitude != "" & !is.null(latitude) & latitude != "NULL") %>%
        mutate(hasLong = !is.na(longitude) & longitude != "\\N"& longitude != "" & !is.null(longitude)& longitude != "NULL") %>%
        mutate(hasLatLong = hasLat & hasLong)
    # group by type and has coordinates
    tbl <- speciesOcc %>%
      dplyr::group_by(type, hasLatLong ) %>%
      dplyr::summarize(total = n())
    # assign values to the counts dataframe for the species
    df$species[i] <- as.character(species_list[i])
    df$totalRecords[i] <- nrow(speciesOcc)
    df$totalUseful[i] <- sum((subset(tbl, hasLatLong == TRUE))$total)
    df$totalGRecords[i] <- sum((subset(tbl, type == "G"))$total)
    df$totalGUseful[i] <- sum((subset(tbl, type == "G" & hasLatLong == TRUE))$total)
    df$totalHRecords[i] <- sum((subset(tbl, type == "H"))$total)
    df$totalHUseful[i] <- sum((subset(tbl, type == "H" & hasLatLong == TRUE))$total)
    df$hasLat[i] <- sum(speciesOcc$hasLat)
    df$hasLong[i] <- sum(speciesOcc$hasLong)
  }
  # returns the counts dataframe
  return(df)
}
dcarver1/gapAnalysisR documentation built on Feb. 29, 2020, 12:13 p.m.