#' Load presence records from SA database
#'
#' @param species Character vector of species scientific name
#' @param region
#' @param save.map Logical value to indicate saving the map to file or not
#' @param map.directory File path to output folder for saving map
#'
#' @return
#' @export
#'
#' @examples
#'
#'
load_pres_bg_data_SA_2 <- function(dir_path,
species,
region = "all",
save.map = TRUE,
map.directory = "."){
#TODO Check coordinate projections
#######################
### Name Processing ###
#######################
## Check if name is properly formed
if(any(grepl("sp. ",species, fixed = TRUE),
grepl("'",species, fixed = TRUE),
grepl("\"",species, fixed = TRUE)
#add any other exceptions here
)){
stop("Not run: scientific name not properly formed!")
}
## Clean name of bracket suffix
### - assuming the brackets denote subpopulation, and not taxonomic notations
species <- stringr::str_remove(species,
"\\(.*\\)")
## Remove trailing whitespace
species <- stringr::str_squish(species)
#################
### Load Data ###
#################
## Load in QLD data from file
SA_data <- read.csv(sprintf("%s/SA_%s.csv",
dir_path,
gsub(" ",
"_",
tolower(species))),
stringsAsFactors = FALSE)
if(nrow(SA_data) == 0){
stop("Not run: no records found")
}
## Format data
df <- data.frame("ID" = seq_len(nrow(SA_data)),
"Origin" = "SA",
"Species" = species,
"Longitude" = SA_data$LONGITUDE,
"Latitude" = SA_data$LATITUDE,
#add date for duplicate processing
"Date" = lubridate::as_date(SA_data$SIGHTINGDATE,
origin = "1900-01-01"),
"Basis.of.Record" = SA_data$METHODDESC,
"Locality" = SA_data$LOCATIONCOMM,
"Institute" = SA_data$SOURCE,
"Collection" = SA_data$SURVEYNAME,
"Coordinate.Uncertainty.in.Metres" = SA_data$RELIABDESC,
stringsAsFactors = FALSE)
#####################
### Data Cleaning ###
#####################
## Coordinate uncertainty
df$Coordinate.Uncertainty.in.Metres <- gsub("0-5m", "5", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("0-10m", "10", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("5-50m", "50", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("51-100m", "100", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("101-250m", "250", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("251-500m", "500", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("501-1000m", "1000", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("1-10km", "10000", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("11-25km", "25000", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("31-125km", "125000", df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- gsub("No entered", NA, df$Coordinate.Uncertainty.in.Metres)
df$Coordinate.Uncertainty.in.Metres <- as.numeric(df$Coordinate.Uncertainty.in.Metres)
## Remove spatial duplicates (other duplicate types may matter, think later)
df$Longitude <- as.numeric(df$Longitude)
df$Latitude <- as.numeric(df$Latitude)
df <- df[!duplicated(df[ , c("Longitude", "Latitude")]), ]
## Get rid of missing or incomplete long and lats
df <- df[!is.na(df$Longitude) | !is.na(df$Latitude), ]
## Get rid of unusable long lat vals
### (Roozbeh says can save some data here will look into it later)
df <- df[df$Longitude > -180 &
df$Longitude < 180 &
df$Latitude > -90 &
df$Latitude < 90, ]
## Check if any record left
if(nrow(df) == 0){
stop("Not run: no data with legitimate coordinates found")
}
# ## Clean records using coord cleaner
#
# df <- CoordinateCleaner::clean_coordinates(df,
# lon = "Longitude",
# lat = "Latitude",
# species = "Species",
# tests = c("capitals",
# "centroids",
# "equal",
# "gbif",
# "institutions",
# "seas",
# "zeros"),
# #skip urban test - keeps giving proj4string errors, will look into later
# # urban_ref = as_Spatial(read_sf("Data/GIS/ne_50m_urban_areas/ne_50m_urban_areas.shp")),
# seas_ref = NULL, #as_Spatial(read_sf("Data/GIS/ne_50m_land/ne_50m_land.shp")),
#
# #ignore outliers for now
# # outliers_method = "distance",
# # outliers_td = 1500, #outlier bit probably needs tweaking, its curently set to be very conservative
# value = "clean")
## Check if duplicate long or lat - could be signal of rounding
suspect.rounding <- ifelse(any(anyDuplicated(df$df),
anyDuplicated(df$Latitude)),
"duplicate long/lat found - suspect rounding",
NA)
##reproject data from GDA94 to WGS84
sp.sf <- sf::st_as_sf(df,
coords = (4:5),
crs = 4283)
sp.sf <- sf::st_transform(sp.sf, 4326)
df[,4:5] <- sf::st_coordinates(sp.sf)
# ####################
# ### Plot Records ###
# ####################
#
# ## Visualise those with fewer than 1k records
# ### (can tweak this - I just think there isn't much point to manual input
# ### when looking at more than 1k data)
#
# if(nrow(df) <= 1000){
#
#
# sp.map <- mapview::mapview(sp.sf,
# layer.name = species,
# homebutton = FALSE)
#
# if(save.map == TRUE){
#
# map_filename <- sprintf("%s/%s.html",
# map.directory,
# gsub(" ",
# "_",
# species))
#
# htmlwidgets::saveWidget(sp.map@map,
# file = map_filename)
#
# cat(paste0("Map is saved to ", map_filename), "\n")
#
# }
#
# } else {
#
# sp.map <- "more than 1k records, not mapped"
#
# }
return(list(processed.data = df,
raw.SA.data = SA_data,
rounding.comment = suspect.rounding))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.