Nothing
#' Identify records outside natural ranges according to Fauna do Brasil
#'
#' @description This function removes or flags records outside of the species'
#' natural ranges according to information provided by the Fauna do Brasil database
#'
#' @param data (data.frame) the data.frame imported with the
#' \code{\link{load_faunabr}} function.
#' @param occ (data.frame) a data.frame with the records of the species.
#' @param species (character) column name in occ with species names.
#' Default = "species"
#' @param long (character) column name in occ with longitude data. Default = "x"
#' @param lat (character) column name in occ with latitude data. Default = "y"
#' @param by_state (logical) filter records by state? Default = TRUE
#' @param buffer_state (numeric) buffer (in km) around the polygons of the
#' states of occurrence of the specie. Default = 20.
#' @param by_country (logical) filter records by country? Default = TRUE
#' @param buffer_country (numeric) buffer (in km) around the polygons of the
#' countries of occurrence of the specie. Default = 20.
#' @param value (character) Defines output values. See Value section.
#' Default = "flag&clean".
#' @param keep_columns (logical) if TRUE, keep all the original columns of the
#' input occ. If False, keep only the columns species, long and lat.
#' Default = TRUE
#' @param spat_state (SpatVector) a SpatVector of the Brazilian states. By
#' default, it uses the SpatVector provided by geobr::read_state(). It can be
#' another Spatvector, but the structure must be identical to
#' 'faunabr::states', with a column called "abbrev_state" identifying the states
#' codes.
#' @param spat_country (SpatVector) a SpatVector of the world countries. By
#' default, it uses the SpatVector provided by rnaturalearth::ne_countries. It
#' can be another Spatvector, but the structure must be identical to
#' 'faunabr::world_fauna', with a column called "country_code" identifying the
#' country codes.
#' @param verbose (logical) Whether to display species being filtered during
#' function execution. Set to TRUE to enable display, or FALSE to run silently.
#' Default = TRUE.
#' @details
#' If by_state = TRUE and/or by_country = TRUE, the function takes polygons
#' representing the states and/or countrys with confirmed occurrences of the
#' specie, draws a buffer around the polygons, and tests if the records of the
#' species fall inside it.
#'
#'
#' @return Depending on the 'value' argument. If value = "flag", it returns the
#' same data.frame provided in data with additional columns indicating if the
#' record falls inside the natural range of the specie (TRUE) or outside
#' (FALSE).
#' If value = "clean", it returns a data.frame with only the records that passes
#' all the tests (TRUE for all the filters). If value = "flag&clean" (Default),
#' it returns a list with two data.frames: one with the flagged records and one
#' with the cleaned records.
#' @usage filter_faunabr(data, occ, species = "species", long = "x", lat = "y",
#' by_state = TRUE, buffer_state = 20, by_country = TRUE,
#' buffer_country = 20, value = "flag&clean",
#' keep_columns = TRUE, spat_state = NULL,
#' spat_country = NULL, verbose = TRUE)
#' @export
#'
#' @importFrom terra aggregate subset buffer unwrap mask as.data.frame vect
#' @importFrom data.table rbindlist
#' @importFrom stats na.omit
#' @examples
#' data("fauna_data") #Load fauna e Funga do Brasil data
#' data("occurrences") #Load occurrences
#' pts <- subset(occurrences, species == "Panthera onca")
#' fd <- filter_faunabr(data = fauna_data,
#' occ = pts, long = "x", lat = "y", species = "species",
#' by_state = TRUE, buffer_state = 20,
#' by_country = TRUE, buffer_country = 20,
#' value = "flag&clean", keep_columns = TRUE,
#' verbose = FALSE)
filter_faunabr <- function(data,
occ,
species = "species", long = "x", lat = "y",
by_state = TRUE, buffer_state = 20,
by_country = TRUE, buffer_country = 20,
value = "flag&clean", keep_columns = TRUE,
spat_state = NULL,
spat_country = NULL,
verbose = TRUE) {
if (missing(data)) {
stop("Argument data is not defined")
}
if (missing(occ)) {
stop("Argument occ is not defined")
}
if (!inherits(data, "data.frame")) {
stop(paste0("Argument data must be a data.frame, not ",
class(data)))
}
if (!inherits(occ, "data.frame")) {
stop(paste0("Argument occ must be a data.frame, not ",
class(occ)))
}
if (!is.character(species)) {
stop(paste0("Argument species must be a character, not ",
class(species)))
}
if (!is.character(long)) {
stop(paste0("Argument long must be a character, not ",
class(long)))
}
if (!is.character(lat)) {
stop(paste0("Argument lat must be a character, not ",
class(lat)))
}
if (!is.logical(by_state)) {
stop(paste0("Argument by_state must be logical, not ",
class(by_state)))
}
if (!is.logical(by_country)) {
stop(paste0("Argument by_country must be logical, not ",
class(by_country)))
}
if (!is.numeric(buffer_state)) {
stop(paste0("Argument buffer_state must be numeric, not ",
class(buffer_state)))
}
if (!is.numeric(buffer_country)) {
stop(paste0("Argument buffer_country must be numeric, not ",
class(buffer_country)))
}
allowed_values <- c("flag&clean", "flag", "clean")
if (!(value %in% allowed_values)) {
stop("Argument value must be 'flag', 'clean' or 'flag&clean'")
}
if (!is.logical(keep_columns)) {
stop(paste0("Argument keep_columns must be logical, not ",
class(keep_columns)))
}
if (!is.logical(verbose)) {
stop(paste0("Argument verbose must be logical, not ",
class(verbose)))
}
#Convert colnames to lower case
original_colnames <- colnames(data)
colnames(data) <- tolower(colnames(data))
if (!all(c("species", "states", "countrycode") %in%
colnames(data))) {
stop("Important columns are missing in data. Check if data is an object\n created by 'load_faunabr()")
}
if (!all(c(species, long, lat) %in% colnames(occ))) {
stop("Important columns are missing in occurrence data. Check if correct\n column names were set in species, long and lat")
}
d <- data[, c("species", "states", "countrycode")]
occ$id_f <- seq_len(nrow(occ))
occ_info <- occ[, c(species, long, lat, "id_f")]
colnames(occ_info) <- c("species", "x", "y", "id_f")
spp <- unique(occ_info$species)
spp_out <- setdiff(spp, unique(data$species))
if (length(spp_out) > 0) {
stop(paste(length(spp_out), "species are not in the data. Check the species
names using the check_names() function or remove the species from
data.frame"))
}
d_info <- subset(d, d$species %in% unique(occ_info$species))
d_info[d_info == ""] <- NA
sp_info <- lapply(seq_along(spp), function(i) {
sp <- subset(d_info, d_info$species == spp[i])
sp$states <- paste0(na.omit(unique(sp$states)), collapse = ";")
sp$countrycode <- paste0(na.omit(unique(sp$countrycode)), collapse = ";")
return(sp)
})
sp_info <- unique(data.table::rbindlist(sp_info))
occ_info <- merge(occ_info, sp_info, by = "species")
occ_info <- terra::vect(occ_info, geom = c("x", "y"),
crs = "+init=epsg:4326")
#Load data
if(is.null(spat_state)){
states <- terra::unwrap(faunabr::states)} else {
states <- spat_state
}
if(is.null(spat_country)){
countrys <- terra::unwrap(faunabr::world_fauna) } else {
countrys <- spat_country
}
#Buffer around Brazil (to select records inside Brazil)
if(by_state){
br_v <- terra::buffer(countrys[countrys$bf_name == "Brazil"],
width = buffer_state * 1000)
#Check records inside Brazil
occ_info$inside_br <- terra::is.related(occ_info, states, "intersects")
} else {
occ_info$inside_br <- NA
}
if (by_state) {
l_state <- lapply(seq_along(spp), function(i) {
if (verbose) {
message("Filtering ", spp[i], " by state\n")
}
occ_i <- occ_info[occ_info$species == spp[i]]
sp_i_state <- unique(gsub(";", "|", occ_i$states[1]))
if (sp_i_state == "" | is.na(sp_i_state)) {
if (verbose) {
message(spp[i], "lacks info about state - Filter not applicable\n")
}
states_final <- occ_i
states_final$inside_state <- "No info"
} else {
states_v <- terra::aggregate(terra::subset(states, grepl(sp_i_state,states$abbrev_state)))
#Get distance
distance_i <- terra::distance(occ_i[occ_i$inside_br], states_v,
unit = "km")[,1]
distance_i <- distance_i <= buffer_state
#Create columns
occ_i$inside_state <- NA
#Fill inside_state of records inside br
occ_i$inside_state[occ_i$inside_br] <- distance_i
}
return(occ_i)
})
occ_info <- do.call("rbind", l_state)
}
if (!by_country) {
occ_info$inside_country <- NA
} else {
l_country <- lapply(seq_along(spp), function(i) {
if (verbose) {
message("Filtering ", spp[i], " by country\n")
}
occ_i <- terra::subset(occ_info, occ_info$species ==
spp[i])
sp_i_country <- unique(gsub(";", "|", occ_i$countrycode[1]))
if (sp_i_country == "" | is.na(sp_i_country)) {
if (verbose) {
message(spp[i], "lacks info about country - Filter not applicable\n")
}
countrys_final <- occ_i
countrys_final$inside_country <- "No info"
} else {
countrys_v <- terra::aggregate(terra::subset(countrys,
grepl(sp_i_country, countrys$country_code)))
#Get distance
distance_i <- terra::distance(occ_i, countrys_v,
unit = "km")[,1]
distance_i <- distance_i <= buffer_country
#Create columns
occ_i$inside_country <- NA
#Fill inside country
occ_i$inside_country <- distance_i
}
return(occ_i)
})
occ_info <- do.call("rbind", l_country)
}
occ_flag <- as.data.frame(occ_info)
colnames(occ_flag)[which(colnames(occ_flag) == "species")] <- species
if (keep_columns) {
occ_flag <- merge(occ_flag, occ, by = c(species, "id_f"))
occ_flag$id_f <- NULL
occ_flag <- occ_flag[, c(species, long, lat, colnames(occ_flag)[!(colnames(occ_flag) %in%
c(species, long, lat))])]
colnames(occ_flag)[colnames(occ_flag) %in% c(species,
long, lat)] <- c(species, long, lat)
}
if (!keep_columns) {
occ_flag <- merge(occ_flag, occ[, c(species, lat, long,
"id_f")], by = c(species, "id_f"))
occ_flag$id_f <- NULL
occ_flag <- occ_flag[, c(species, long, lat, names(occ_flag)[!(names(occ_flag) %in%
c(species, long, lat))])]
colnames(occ_flag)[colnames(occ_flag) %in% c(species,
long, lat)] <- c(species, long, lat)
}
#Remove columns
occ_flag$inside_br <- NULL
if (!by_state) {
occ_flag$inside_state <- NULL
}
if (!by_country) {
occ_flag$inside_country <- NULL
}
col_check <- intersect(c("inside_state", "inside_country",
"inside_br"), names(occ_flag))
if (length(col_check) == 1) {
occ_flag$filters_ok <- ifelse(occ_flag[, col_check] ==
TRUE | is.na(occ_flag[, col_check]), TRUE, FALSE)
} else {
occ_flag$filters_ok <- ifelse(rowSums(!is.na(occ_flag[,
col_check]) & occ_flag[, col_check] == FALSE) >
0, FALSE, TRUE)
}
occ_clean <- subset(occ_flag, occ_flag$filters_ok == TRUE)
occ_clean <- subset(occ_clean, select = setdiff(colnames(occ_clean),
c(col_check, "filters_ok")))
if (value == "flag&clean") {
res_final <- list(occ_flag, occ_clean)
names(res_final) <- c("flagged", "cleaned")
if (verbose) {
message("Returning list with flagged and cleaned occurrences\n")
}
}
if (value == "flag") {
res_final <- occ_flag
if (verbose) {
message("Returning dataframe with flagged occurrences\n")
}
}
if (value == "clean") {
res_final <- occ_clean
if (verbose) {
message("Returning dataframe with cleaned occurrences")
}
}
return(res_final)
}
# #Test function
# data("fauna_data") #Load fauna e Funga do Brasil data
# data("occurrences") #Load occurrences
# pts <- subset(occurrences, species == "Panthera onca")
# fd <- filter_faunabr(data = fauna_data,
# occ = pts, long = "x", lat = "y", species = "species",
# by_state = TRUE, buffer_state = 20,
# by_country = TRUE, buffer_country = 20,
# value = "flag&clean", keep_columns = TRUE,
# verbose = FALSE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.