knitr::opts_chunk$set( warning = FALSE, message = FALSE, collapse = TRUE, comment = "#>" )
Clean Biological Occurrence Records
Clean using the following use cases (checkmarks indicate fxns exist - not necessarily complete):
taxize
(one method so far)A note about examples: We think that using a piping workflow with %>%
makes code easier to
build up, and easier to understand. However, in some examples we provide examples without the pipe
to demonstrate traditional usage.
Stable CRAN version
install.packages("scrubr")
Development version
remotes::install_github("ropensci/scrubr")
library("scrubr")
data("sampledata1")
Remove impossible coordinates (using sample data included in the pkg)
# coord_impossible(dframe(sample_data_1)) # w/o pipe dframe(sample_data_1) %>% coord_impossible()
Remove incomplete coordinates
# coord_incomplete(dframe(sample_data_1)) # w/o pipe dframe(sample_data_1) %>% coord_incomplete()
Remove unlikely coordinates (e.g., those at 0,0)
# coord_unlikely(dframe(sample_data_1)) # w/o pipe dframe(sample_data_1) %>% coord_unlikely()
Do all three
dframe(sample_data_1) %>% coord_impossible() %>% coord_incomplete() %>% coord_unlikely()
Don't drop bad data
dframe(sample_data_1) %>% coord_incomplete(drop = TRUE) %>% NROW dframe(sample_data_1) %>% coord_incomplete(drop = FALSE) %>% NROW
smalldf <- sample_data_1[1:20, ] # create a duplicate record smalldf <- rbind(smalldf, smalldf[10,]) row.names(smalldf) <- NULL # make it slightly different smalldf[21, "key"] <- 1088954555 NROW(smalldf) dp <- dframe(smalldf) %>% dedup() NROW(dp) attr(dp, "dups")
Standardize/convert dates
df <- sample_data_1 # date_standardize(dframe(df), "%d%b%Y") # w/o pipe dframe(df) %>% date_standardize("%d%b%Y")
Drop records without dates
NROW(df) NROW(dframe(df) %>% date_missing())
Create date field from other fields
dframe(sample_data_2) %>% date_create(year, month, day)
Filter by FAO areas
wkt <- 'POLYGON((72.2 38.5,-173.6 38.5,-173.6 -41.5,72.2 -41.5,72.2 38.5))' manta_ray <- rgbif::name_backbone("Mobula alfredi")$usageKey res <- rgbif::occ_data(manta_ray, geometry = wkt, limit=300, hasCoordinate = TRUE) dat <- sf::st_as_sf(res$data, coords = c("decimalLongitude", "decimalLatitude")) dat <- sf::st_set_crs(dat, 4326) mapview::mapview(dat) tmp <- eco_region(dframe(res$data), dataset = "fao", region = "OCEAN:Indian") tmp <- tmp[!is.na(tmp$decimalLongitude), ] tmp2 <- sf::st_as_sf(tmp, coords = c("decimalLongitude", "decimalLatitude")) tmp2 <- sf::st_set_crs(tmp2, 4326) mapview::mapview(tmp2)
scrubr
in R doing citation(package = 'scrubr')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.