Nothing
#' Identify records outside natural ranges according to Flora e Funga do Brasil
#'
#' @description This function removes or flags records outside of the species'
#' natural ranges according to information provided by the Flora e Funga do
#' Brasil database.
#'
#' @param data (data.frame) the data.frame imported with the
#' \code{\link{load_florabr}} function.
#' @param occ (data.frame) a data.frame with the records of the species.
#' @param species (character) column name in occ with species names.
#' Default = "species"
#' @param long (character) column name in occ with longitude data. Default = "x"
#' @param lat (character) column name in occ with latitude data. Default = "y"
#' @param by_state (logical) filter records by state? Default = TRUE
#' @param buffer_state (numeric) buffer (in km) around the polygons of the
#' states of occurrence of the specie. Default = 20.
#' @param by_biome (logical) filter records by biome? Default = TRUE
#' @param buffer_biome (numeric) buffer (in km) around the polygons of the
#' biomes of occurrence of the specie. Default = 20.
#' @param by_endemism (logical) filter records by endemism? Default = TRUE
#' @param buffer_brazil (numeric) buffer (in km) around the polygons of the
#' brazil. Default = 20.
#' @param state_vect (SpatVector) a SpatVector of the Brazilian states. By
#' default, it uses the SpatVector provided by geobr::read_state(). It can be
#' another Spatvector, but the structure must be identical to
#' geobr::read_state().
#' @param state_column (character) name of the column in state_vect containing
#' state abbreviations. Only use if biome_vect is not null.
#' @param biome_vect (SpatVector) a SpatVector of the Brazilian biomes. By
#' default, it uses the SpatVector provided by geobr::read_biomes(). It can be
#' another SpatVector, but the structure must be identical to
#' geobr::read_biomes() with biome names in English.
#' @param biome_column (character) name of the column in biome_vect containing
#' names of brazilian biomes (in English: "Amazon", "Atlantic_Forest",
#' "Caatinga", "Cerrado", "Pampa" and "Pantanal". Only use if biome_vect is not
#' null.
#' @param br_vect (SpatVector) a SpatVector of brazil. By default, it uses the
#' SpatVector provided by geobr::read_state() after being aggregated/dissolved,
#' @param value (character) Defines output values. See Value section.
#' Default = "flag&clean".
#' @param keep_columns (logical) if TRUE, keep all the original columns of the
#' input occ. If False, keep only the columns species, long and lat.
#' Default = TRUE
#' @param verbose (logical) Whether to display species being filtered during
#' function execution. Set to TRUE to enable display, or FALSE to run silently.
#' Default = TRUE.
#' @details
#' If by_state = TRUE and/or by_biome = TRUE, the function takes polygons
#' representing the states and/or biomes with confirmed occurrences of the
#' specie, draws a buffer around the polygons, and tests if the records of the
#' species fall inside it.
#' If by_endemism = TRUE, the function checks if the species is endemic to
#' brazil. If it is endemic, the function tests if the records of the specie
#' fall inside a polygon representing the boundaries of brazil (with a buffer).
#'
#'
#' @return Depending on the 'value' argument. If value = "flag", it returns the
#' same data.frame provided in data with additional columns indicating if the
#' record falls inside the natural range of the specie (TRUE) or outside
#' (FALSE).
#' If value = "clean", it returns a data.frame with only the records that passes
#' all the tests (TRUE for all the filters). If value = "flag&clean" (Default),
#' it returns a list with two data.frames: one with the flagged records and one
#' with the cleaned records.
#' @usage filter_florabr(data, occ, species = "species", long = "x", lat = "y",
#' by_state = TRUE, buffer_state = 20, by_biome = TRUE,
#' buffer_biome = 20, by_endemism = TRUE,
#' buffer_brazil = 20, state_vect = NULL,
#' state_column = NULL, biome_vect = NULL,
#' biome_column = NULL, br_vect = NULL,
#' value = "flag&clean", keep_columns = TRUE,
#' verbose = TRUE)
#' @export
#'
#' @importFrom terra aggregate subset buffer unwrap mask as.data.frame vect
#' @importFrom data.table rbindlist
#' @importFrom stats na.omit
#' @importFrom stats na.omit
#'
#' @references
#' Flora e Funga do Brasil. Jardim Botânico do Rio de Janeiro. Available at:
#' http://floradobrasil.jbrj.gov.br/
#' @examples
#' data("bf_data") #Load Flora e Funga do Brasil data
#' data("occurrences") #Load occurrences
#' pts <- subset(occurrences, species == "Myrcia hatschbachii")
#' fd <- filter_florabr(data = bf_data, occ = pts,
#' by_state = TRUE, buffer_state = 20,
#' by_biome = TRUE, buffer_biome = 20,
#' by_endemism = TRUE, buffer_brazil = 20,
#' state_vect = NULL,
#' biome_vect = NULL, br_vect = NULL,
#' value = "flag&clean", keep_columns = TRUE,
#' verbose = FALSE)
filter_florabr <- function(data,
occ,
species = "species", long = "x", lat = "y",
by_state = TRUE, buffer_state = 20,
by_biome = TRUE, buffer_biome = 20,
by_endemism = TRUE, buffer_brazil = 20,
state_vect = NULL, state_column = NULL,
biome_vect = NULL, biome_column = NULL,
br_vect = NULL,
value = "flag&clean", keep_columns = TRUE,
verbose = TRUE) {
if (missing(data)) {
stop("Argument data is not defined")
}
if (missing(occ)) {
stop("Argument occ is not defined")
}
if (!inherits(data, "data.frame")) {
stop(paste0("Argument data must be a data.frame, not ",
class(data)))
}
if (!inherits(occ, "data.frame")) {
stop(paste0("Argument occ must be a data.frame, not ",
class(occ)))
}
if (!is.character(species)) {
stop(paste0("Argument species must be a character, not ",
class(species)))
}
if (!is.character(long)) {
stop(paste0("Argument long must be a character, not ",
class(long)))
}
if (!is.character(lat)) {
stop(paste0("Argument lat must be a character, not ",
class(lat)))
}
if (!is.logical(by_state)) {
stop(paste0("Argument by_state must be logical, not ",
class(by_state)))
}
if (!is.logical(by_biome)) {
stop(paste0("Argument by_biome must be logical, not ",
class(by_biome)))
}
if (!is.logical(by_endemism)) {
stop(paste0("Argument by_endemism must be logical, not ",
class(by_endemism)))
}
if (!is.numeric(buffer_state)) {
stop(paste0("Argument buffer_state must be numeric, not ",
class(buffer_state)))
}
if (!is.numeric(buffer_biome)) {
stop(paste0("Argument buffer_biome must be numeric, not ",
class(buffer_biome)))
}
if (!is.null(state_vect) && !inherits(state_vect, "SpatVector")) {
stop(paste0("Argument state_vect must be a SpatVector, not ",
class(state_vect)))
}
if (!is.null(biome_vect) && !inherits(biome_vect, "SpatVector")) {
stop(paste0("Argument biome_vect must be a SpatVector, not ",
class(biome_vect)))
}
if (!is.null(br_vect) && !inherits(br_vect, "SpatVector")) {
stop(paste0("Argument br_vect must be a SpatVector, not ",
class(br_vect)))
}
if (!is.null(state_vect) && !is.character(state_column)) {
stop(paste0("Argument state_column must be a character, not ",
class(state_column)))
}
if (!is.null(biome_vect) && !is.character(biome_column)) {
stop(paste0("Argument biome_column must be a character, not ",
class(biome_column)))
}
allowed_values <- c("flag&clean", "flag", "clean")
if (!(value %in% allowed_values)) {
stop("Argument value must be 'flag', 'clean' or 'flag&clean'")
}
if (!is.logical(keep_columns)) {
stop(paste0("Argument keep_columns must be logical, not ",
class(keep_columns)))
}
if (!is.logical(verbose)) {
stop(paste0("Argument verbose must be logical, not ",
class(verbose)))
}
#Convert colnames to lower case
original_colnames <- colnames(data)
colnames(data) <- tolower(colnames(data))
if (!all(c("species", "states", "biome", "endemism") %in%
colnames(data))) {
stop("Important columns are missing in data. Check if data is an object\n created by 'load_florabr()")
}
if (!all(c(species, long, lat) %in% colnames(occ))) {
stop("Important columns are missing in occurrence data. Check if correct\n column names were set in species, long and lat")
}
d <- data[, c("species", "states", "biome", "endemism")]
occ$id_f <- seq_len(nrow(occ))
occ_info <- occ[, c(species, long, lat, "id_f")]
colnames(occ_info) <- c("species", "x", "y", "id_f")
spp <- unique(occ_info$species)
spp_out <- setdiff(spp, unique(data$species))
if (length(spp_out) > 0) {
stop(paste(length(spp_out), "species are not in the data. Check the species\n names using the check_names() function or remove the species from\n data.frame"))
}
d_info <- subset(d, d$species %in% unique(occ_info$species))
d_info[d_info == ""] <- NA
sp_info <- lapply(seq_along(spp), function(i) {
sp <- subset(d_info, d_info$species == spp[i])
sp$states <- paste0(na.omit(unique(sp$states)), collapse = ";")
sp$biome <- paste0(na.omit(unique(sp$biome)), collapse = ";")
sp$endemism <- paste0(na.omit(unique(sp$endemism)),
collapse = ";")
return(sp)
})
sp_info <- unique(data.table::rbindlist(sp_info))
occ_info <- merge(occ_info, sp_info, by = "species")
occ_info <- terra::vect(occ_info, geom = c("x", "y"),
crs = "+init=epsg:4326")
#Get vector of Brazil
if(is.null(br_vect)){
brazil <- terra::unwrap(florabr::brazil) } else {
brazil <- br_vect
}
#Buffer around Brazil
br_v <- terra::buffer(brazil, width = buffer_brazil *
1000)
#Check records inside Brazil
occ_info$inside_br <- terra::is.related(occ_info, br_v, "within")
if (by_state == TRUE) {
states <- terra::unwrap(florabr::states)
}
if (by_biome == TRUE) {
biomes <- terra::unwrap(florabr::biomes)
}
if (!is.null(state_vect)) {
names(state_vect)[which(names(state_vect) == state_column)] <- "abbrev_state"
check_matches <- setdiff(states$abbrev_state, state_vect$abbrev_state)
if (length(check_matches) > 0) {
stop(paste0("Invalid states in ", state_column,
"\nCheck the structure of the Spatvector provided in state_vect"))
}
else {
states <- state_vect
}
}
if (!is.null(biome_vect)) {
names(biome_vect)[which(names(biome_vect) == biome_column)] <- "name_biome"
check_matches <- setdiff(biomes$name_biome, biome_vect$name_biome)
if (length(check_matches) > 0) {
stop(paste0("Invalid biomes in ", biome_column,
"\nCheck the structure of the Spatvector provided in\n biome_vect"))
}
else {
biomes <- biome_vect
}
}
if (by_state == FALSE) {
occ_info$inside_state <- NA
}
if (by_state == TRUE) {
l_state <- lapply(seq_along(spp), function(i) {
if (verbose) {
message("Filtering", spp[i], "by state\n")
}
occ_i <- occ_info[occ_info$species == spp[i]]
sp_i_state <- unique(gsub(";", "|", occ_i$states[1]))
if (sp_i_state == "" | is.na(sp_i_state)) {
if (verbose) {
message(spp[i], "lacks info about state - Filter not applicable\n")
}
states_final <- occ_i
states_final$inside_state <- "No info"
}
else {
states_v <- terra::aggregate(terra::subset(states,
grepl(sp_i_state, states$abbrev_state)))
states_v <- terra::buffer(states_v, width = buffer_state *
1000)
#Create columns
occ_i$inside_state <- NA
#Fill inside_state of records inside br
occ_i$inside_state[occ_i$inside_br] <- terra::is.related(occ_i[occ_i$inside_br],
states_v,
"within")
}
return(occ_i)
})
occ_info <- do.call("rbind", l_state)
}
if (by_biome == FALSE) {
occ_info$inside_biome <- NA
}
if (by_biome == TRUE) {
l_biome <- lapply(seq_along(spp), function(i) {
if (verbose) {
message("Filtering", spp[i], "by biome\n")
}
occ_i <- terra::subset(occ_info, occ_info$species ==
spp[i])
sp_i_biome <- unique(gsub(";", "|", occ_i$biome[1]))
if (sp_i_biome == "" | is.na(sp_i_biome)) {
if (verbose) {
message(spp[i], "lacks info about biome - Filter not applicable\n")
}
biomes_final <- occ_i
biomes_final$inside_biome <- "No info"
}
else {
biomes_v <- terra::aggregate(terra::subset(biomes,
grepl(sp_i_biome, biomes$name_biome)))
biomes_v <- terra::buffer(biomes_v, width = buffer_biome *
1000)
#Create columns
occ_i$inside_biome <- NA
#Fill inside_state of records inside br
occ_i$inside_biome[occ_i$inside_br] <- terra::is.related(occ_i[occ_i$inside_br],
biomes_v,
"within")
}
return(occ_i)
})
occ_info <- do.call("rbind", l_biome)
}
#Fix flags by endemism: if is non-endemic, is NA
if (by_endemism) {
occ_info$inside_br[occ_info$endemism != "Endemic"] <- NA
}
occ_flag <- as.data.frame(occ_info)
if (keep_columns) {
occ_flag <- merge(occ_flag, occ, by = c("species", "id_f"))
occ_flag$id_f <- NULL
occ_flag <- occ_flag[, c(species, long, lat, colnames(occ_flag)[!(colnames(occ_flag) %in%
c(species, long, lat))])]
colnames(occ_flag)[colnames(occ_flag) %in% c(species,
long, lat)] <- c(species, long, lat)
}
if (!keep_columns) {
occ_flag <- merge(occ_flag, occ[, c(species, lat, long,
"id_f")], by = c("species", "id_f"))
occ_flag$id_f <- NULL
occ_flag <- occ_flag[, c(species, long, lat, names(occ_flag)[!(names(occ_flag) %in%
c(species, long, lat))])]
colnames(occ_flag)[colnames(occ_flag) %in% c(species,
long, lat)] <- c(species, long, lat)
}
if (!by_state) {
occ_flag$inside_state <- NULL
}
if (!by_biome) {
occ_flag$inside_biome <- NULL
}
if (!by_endemism) {
occ_flag$inside_br <- NULL
}
col_check <- intersect(c("inside_state", "inside_biome",
"inside_br"), names(occ_flag))
if (length(col_check) == 1) {
occ_flag$filters_ok <- ifelse(occ_flag[, col_check] ==
TRUE | is.na(occ_flag[, col_check]), TRUE, FALSE)
} else {
occ_flag$filters_ok <- ifelse(rowSums(!is.na(occ_flag[,
col_check]) & occ_flag[, col_check] == FALSE) >
0, FALSE, TRUE)
}
occ_clean <- subset(occ_flag, occ_flag$filters_ok == TRUE)
occ_clean <- subset(occ_clean, select = setdiff(colnames(occ_clean),
c(col_check, "filters_ok")))
if (value == "flag&clean") {
res_final <- list(occ_flag, occ_clean)
names(res_final) <- c("flagged", "cleaned")
if (verbose) {
message("Returning list with flagged and cleaned occurrences\n")
}
}
if (value == "flag") {
res_final <- occ_flag
if (verbose) {
message("Returning dataframe with flagged occurrences\n")
}
}
if (value == "clean") {
res_final <- occ_clean
if (verbose) {
message("Returning dataframe with cleaned occurrences")
}
}
return(res_final)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.