R/select_reefish_species.R

library(tidyverse)

#' @export
select_reefish_species <- function (prparedResBold.fishbaseValid, reefishSurveySpecies, countSequencesbySpeciesThreshold=2) {
  speciesSurvey <- as.character(unique(sort(reefishSurveySpecies$TAXONOMIC_NAME)))
  ## select sequences and species from BOLD which are in reeflifesurvey database
  speciesBOLD <- unique(sort(prparedResBold.fishbaseValid$fishbase_species_name))
  ## species both in BOLD and reeflifesurvey
  speciesBoth <- speciesBOLD[which(speciesBOLD %in% speciesSurvey)]
  ## select BOLD sequences with species in reeflifesurvey
  datBoth <- prparedResBold.fishbaseValid[which(prparedResBold.fishbaseValid$species_name %in% speciesBoth),]
  ## how many sequences by species
  countSeqSpecies <- datBoth %>% dplyr::select(species_name) %>% dplyr::count(species_name) %>% dplyr::rename(countSequencesbySpecies = n)
  ## species in both BOLD and reeflifesurvey with at least N sequences
  countNSeqSpecies <- countSeqSpecies %>% dplyr::filter(countSequencesbySpecies > countSequencesbySpeciesThreshold)
  ## select BOLD sequences with at least N sequences
  datBothNseq <- datBoth[which(datBoth$species_name %in% countNSeqSpecies$species_name),]
  return(datBothNseq)
}

 
Grelot/geogendivr documentation built on Sept. 3, 2020, 6:25 p.m.