Nothing
## ----options, echo = FALSE----------------------------------------------------
knitr::opts_chunk$set(eval = FALSE)
## ----intall_github, eval=FALSE------------------------------------------------
# install.packages("devtools")
# library(devtools)
# install_github("ropensci/CoordinateCleaner")
## ----libraries----------------------------------------------------------------
# library(countrycode)
# library(CoordinateCleaner)
# library(dplyr)
# library(ggplot2)
# library(rgbif)
# library(sf)
## ----obtain data--------------------------------------------------------------
# #obtain data from GBIF via rgbif
# dat <- occ_search(scientificName = "Panthera leo",
# limit = 5000,
# hasCoordinate = TRUE)
#
# dat <- dat$data
#
# # names(dat) # a lot of columns
#
# # select columns of interest
# dat <- dat %>%
# dplyr::select(species, decimalLongitude,
# decimalLatitude, countryCode, individualCount,
# gbifID, family, taxonRank, coordinateUncertaintyInMeters,
# year, basisOfRecord, institutionCode, datasetName)
#
# # remove records without coordinates
# dat <- dat %>%
# filter(!is.na(decimalLongitude)) %>%
# filter(!is.na(decimalLatitude))
## ----map1---------------------------------------------------------------------
# #plot data to get an overview
# wm <- borders("world", colour = "gray50", fill = "gray50")
# ggplot() +
# coord_fixed() +
# wm +
# geom_point(data = dat,
# aes(x = decimalLongitude, y = decimalLatitude),
# colour = "darkred",
# size = 0.5) +
# theme_bw()
## ---- echo=FALSE, eval = TRUE, out.width="100%", fig.cap="Occurrence records for Panthera leo obtained from GBIF."----
knitr::include_graphics("gbif-clgbif5-1.png")
## -----------------------------------------------------------------------------
# #convert country code from ISO2c to ISO3c
# dat$countryCode <- countrycode(dat$countryCode,
# origin = 'iso2c',
# destination = 'iso3c')
#
# #flag problems
# dat <- data.frame(dat)
# flags <- clean_coordinates(x = dat,
# lon = "decimalLongitude",
# lat = "decimalLatitude",
# countries = "countryCode",
# species = "species",
# tests = c("capitals", "centroids",
# "equal", "zeros", "countries")) # most test are on by default
## -----------------------------------------------------------------------------
# summary(flags)
# plot(flags, lon = "decimalLongitude", lat = "decimalLatitude")
## -----------------------------------------------------------------------------
# #Exclude problematic records
# dat_cl <- dat[flags$.summary,]
#
# #The flagged records
# dat_fl <- dat[!flags$.summary,]
## -----------------------------------------------------------------------------
# # To avoid specifying it in each function
# names(dat)[2:3] <- c("decimalLongitude", "decimalLatitude")
#
# clean <- dat %>%
# cc_val() %>%
# cc_equ() %>%
# cc_cap() %>%
# cc_cen() %>%
# cc_coun(iso3 = "countryCode") %>%
# cc_sea() %>%
# cc_zero() %>%
# cc_outl() %>%
# cc_dupl()
## -----------------------------------------------------------------------------
# dat %>%
# as_tibble() %>%
# mutate(val = cc_val(., value = "flagged"),
# sea = cc_sea(., value = "flagged"))
## -----------------------------------------------------------------------------
# flags <- cf_age(x = dat_cl,
# lon = "decimalLongitude",
# lat = "decimalLatitude",
# taxon = "species",
# min_age = "year",
# max_age = "year",
# value = "flagged")
# # Testing temporal outliers on taxon level
# # Flagged 0 records.
#
# dat_cl <- dat_cl[flags, ]
## -----------------------------------------------------------------------------
# #Remove records with low coordinate precision
# dat_cl %>%
# mutate(Uncertainty = coordinateUncertaintyInMeters / 1000) %>%
# ggplot(aes(x = Uncertainty)) +
# geom_histogram() +
# xlab("Coordinate uncertainty in meters") +
# theme_bw()
#
## -----------------------------------------------------------------------------
# dat_cl <- dat_cl %>%
# filter(coordinateUncertaintyInMeters / 1000 <= 100 | is.na(coordinateUncertaintyInMeters))
#
# # Remove unsuitable data sources, especially fossils
# # which are responsible for the majority of problems in this case
# table(dat$basisOfRecord)
#
# ## HUMAN_OBSERVATION MATERIAL_SAMPLE PRESERVED_SPECIMEN
# ## 4979 2 19
#
# dat_cl <- filter(dat_cl, basisOfRecord == "HUMAN_OBSERVATION" |
# basisOfRecord == "OBSERVATION" |
# basisOfRecord == "PRESERVED_SPECIMEN")
## -----------------------------------------------------------------------------
# #Individual count
# table(dat_cl$individualCount)
## -----------------------------------------------------------------------------
# dat_cl <- dat_cl %>%
# filter(individualCount > 0 | is.na(individualCount)) %>%
# filter(individualCount < 99 | is.na(individualCount)) # high counts are not a problem
## -----------------------------------------------------------------------------
# #Age of records
# table(dat_cl$year)
## -----------------------------------------------------------------------------
# dat_cl <- dat_cl %>%
# filter(year > 1945) # remove records from before second world war
## -----------------------------------------------------------------------------
# table(dat_cl$family) #that looks good
# ##
# ## Felidae
# ## 4505
# dat_cl <- dat_cl %>%
# filter(family == 'Felidae')
#
# table(dat_cl$taxonRank) # this is also good
# ##
# ## SPECIES SUBSPECIES
# ## 520 3985
## -----------------------------------------------------------------------------
# #exclude based on study area
# dat_fin <- filter(dat_cl, decimalLatitude < 40)
## -----------------------------------------------------------------------------
# #create simple natural range for lions
# coords_range <- cbind(cbind(c(-23, -7, 31, 71, 83, 42, 41, 24, -23), c(14, 37, 32, 27, 18, 0, -16, -38, 14)))
# wgs84 <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#
# nat_range <- terra::vect(coords_range, "polygons",
# crs = wgs84)
# nat_range$species <- "Panthera leo"
#
# # Visualize range
# plo <- sf::st_as_sf(nat_range)
#
# ## Regions defined for each Polygons
# ggplot() +
# borders("world", colour = "gray50", fill = "gray50") +
# geom_sf(data = plo, aes(fill = species), alpha = 0.5) +
# theme_bw() +
# theme(legend.position = "none",
# axis.title = element_blank())
## -----------------------------------------------------------------------------
#
# # run cc_iucn()
# range_flags <- cc_iucn(x = dat_cl,
# range = nat_range,
# lon = "decimalLongitude",
# lat = "decimalLatitude",
# value = "flagged")
## -----------------------------------------------------------------------------
# dat_fin <- dat_cl[range_flags, ]
## ---- echo = FALSE------------------------------------------------------------
# dat <- dat %>%
# as_tibble() %>%
# mutate(phase = "Raw data")
# dat_cl <- dat_cl %>%
# mutate(phase = "Automatic cleaning")
# dat_fin <- dat_fin %>%
# mutate(phase = "Manual polishing")
#
# dat %>%
# bind_rows(dat_cl, dat_fin) %>%
# mutate(phase = factor(phase, c("Raw data", "Automatic cleaning", "Manual polishing"))) %>%
# ggplot(aes(x = decimalLongitude, y = decimalLatitude, color = phase)) +
# borders("world", colour = "gray50", fill = "gray50") +
# geom_point() +
# theme_bw() +
# theme(legend.position = "none",
# axis.title = element_blank()) +
# facet_wrap(. ~ phase, ncol = 1)
#
## ---- echo=FALSE, eval = TRUE, out.width="100%", fig.cap="\\label{fig:final}The dataset of occurrence of lions after different cleaning phases."----
knitr::include_graphics("gbif-clgbif17-1.png")
## -----------------------------------------------------------------------------
# out.ddmm <- cd_ddmm(dat_cl, lon = "decimalLongitude", lat = "decimalLatitude",
# ds = "species", diagnostic = T, diff = 1,
# value = "dataset")
## -----------------------------------------------------------------------------
# par(mfrow = c(2,2), mar = rep(2, 4))
# out.round <- cd_round(dat_fin, lon = "decimalLongitude",
# lat = "decimalLatitude",
# ds = "species",
# value = "dataset",
# T1 = 7,
# graphs = T)
# ## Testing for rasterized collection
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.