##' @title Extract and append remote sensing environmental data to detection data
##'
##' @description Accesses and download environmental data from the IMOS THREDDS
##' server and append variables to detection data based on date of detection
##'
##' @param df detection data source in data frame with at the minimum a X, Y and
##' date time field
##' @param X name of column with X coordinate or longitude (EPSG 4326)
##' @param Y name of column with Y coordinate or latitude (EPSG 4326)
##' @param datetime name of column with date time stamp (Coordinated Universal
##' Time; UTC)
##' @param env_var variable needed options include ('rs_sst', 'rs_sst_interpolated',
##' 'rs_salinity', 'rs_chl', 'rs_turbidity', 'rs_npp', 'bathy', 'dist_to_land',
##' 'rs_current')
##' @param folder_name name of folder within 'imos.cache' where downloaded rasters
##' should be saved. default NULL produces automatic folder names based on study extent
##' @param verbose should function provide details of what operation is being conducted.
##' Set to `FALSE` to keep it quiet
##' @param cache_layers should the extracted environmental data be cached within
##' the working directory? if FALSE stored in temporary folder and discarded
##' after environmental extraction
##' @param crop_layers should the extracted environmental data be cropped to
##' within the study site
##' @param full_timeperiod should environmental variables extracted for each day
##' across full monitoring period, time and memory consuming for long projects
##' @param fill_gaps should the function use a spatial buffer to estimate
##' environmental variables for detections where there is missing data. Default
##' is `FALSE` to save computational time.
##' @param buffer radius of buffer (in m) around each detection from which
##' environmental variables should be extracted from. A median value of pixels
##' that fall within the buffer will be used if `fill_gaps = TRUE`. If `NULL` a
##' buffer will be chosen based on the resolution of environmental layer. A
##' numeric value (in m) can be used here to customise buffer radius.
##' @param nrt should Near Real-Time current data be used if Delayed-Mode current
##' data is missing. Default is `FALSE`, in which case NA's are appended to current
##' variables for years (currently, all years after 2020) when current data are
##' missing. Note that Near Real-Time data are subject to less quality control
##' than Delayed-Mode data.
##' @param output_format File format for cached environmental layers. You can use
##' \code{gdal(drivers=TRUE)} to see what drivers are available in your installation.
##' The default format is '.grd'.
##' @param .parallel should the function be run in parallel
##' @param .ncores number of cores to use if set to parallel. If none provided,
##' uses \code{\link[parallel]{detectCores}} to determine number.
##'
##' @details The `extractEnv` function allows the user to access, download and
##' append a range of environmental variables to each detection within a telemetry
##' data set. We advocate for users to first undertake a quality control step using
##' the \code{\link{runQC}} function before further analysis, however the
##' functionality to append environmental data will work on any dataset that has
##' at the minimum spatial coordinates (i.e., latitude, longitude; in EPSG 4326)
##' and a timestamp (in UTC) for each detection event. Quality controlled
##' environmental variables housed in the IMOS Thredds server will be extracted
##' for each specific coordinate at the specific timestamp where available. A
##' summary table of the full range of environmental variables currently
##' available can be accessed using the \code{\link{imos_variables}} function.
##'
##'
##' @return a dataframe with the environmental variable appended as an extra
##' column based on date of each detection
##'
##' @examples
##' ## Input example detection dataset that have run through the quality control
##' ## workflow (see 'runQC' function)
##'
##' library(tidyverse)
##' data("TownsvilleReefQC")
##'
##' ## simplify & subset data for example speed-up
##' qc_data <-
##' TownsvilleReefQC %>%
##' unnest(cols = c(QC)) %>%
##' ungroup() %>%
##' filter(Detection_QC %in% c(1,2)) %>%
##' filter(filename == unique(filename)[1]) %>%
##' slice(5:8)
##'
##' ## Extract daily interpolated sea surface temperature
##' ## cache_layers & fill_gaps args set to FALSE for speed
##' data_with_sst <-
##' extractEnv(df = qc_data,
##' X = "receiver_deployment_longitude",
##' Y = "receiver_deployment_latitude",
##' datetime = "detection_datetime",
##' env_var = "rs_sst_interpolated",
##' cache_layers = FALSE,
##' crop_layers = TRUE,
##' full_timeperiod = FALSE,
##' fill_gaps = TRUE,
##' folder_name = "test",
##' .parallel = FALSE)
##'
##' @importFrom dplyr %>% mutate distinct pull left_join select
##' @importFrom terra ext
##' @importFrom lubridate date
##'
##' @export
##'
extractEnv <-
function(df,
X = "longitude",
Y = "latitude",
datetime = "detection_timestamp",
env_var,
folder_name = NULL,
verbose = TRUE,
cache_layers = TRUE,
crop_layers = TRUE,
full_timeperiod = FALSE,
fill_gaps = FALSE,
buffer = NULL,
nrt = FALSE,
output_format = ".grd",
.parallel = TRUE,
.ncores = NULL) {
## Initial checks of parameters
if(!X %in% colnames(df)){stop("Cannot find X coordinate in dataset, provide column name where variable can be found")}
if(!Y %in% colnames(df)){stop("Cannot find Y coordinate in dataset, provide column name where variable can be found")}
if(!datetime %in% colnames(df)){stop("Cannot find date timestamp column in dataset, provide column name where variable can be found")}
if(!env_var %in% c('rs_sst', 'rs_sst_interpolated', 'rs_salinity', 'rs_chl', 'rs_turbidity', 'rs_npp', 'rs_current', 'bathy', 'dist_to_land')){
stop("Environmental variable not recognised, options include:\n'rs_sst', 'rs_sst_interpolated', 'rs_salinity', 'rs_chl', 'rs_turbidity', 'rs_npp', 'rs_current', 'bathy', 'dist_to_land'")}
if(length(env_var) > 1){stop("This function currently only supports extracting a single variable at a time. Please only select one of:\n'rs_sst', 'rs_sst_interpolated', 'rs_salinity', 'rs_chl', 'rs_turbidity', 'rs_npp', 'rs_current', 'bathy', 'dist_to_land'")}
## Turn of un-needed parallelising or gap filling if extracting 'bathy', 'dist_to_land'
if(env_var %in% c("bathy", "dist_to_land")){
.parallel = FALSE
fill_gaps = FALSE}
# if(env_var %in% "rs_current"){.parallel = FALSE}
## define date range
unique_dates <-
df %>%
mutate(date = date(!!as.name(datetime))) %>%
distinct(date) %>%
pull(date)
date_range <- range(unique_dates)
## define spatial extent and extend by 40%
study_extent <- ext(c(min(df[[X]]), max(df[[X]]), min(df[[Y]]), max(df[[Y]]))) * 1.4
## define unique positions (for quicker environmental variable extraction)
unique_positions <-
ungroup(df) %>%
mutate(date = date(!!as.name(datetime))) %>%
distinct(!!as.name(X), !!as.name(Y), date) %>%
dplyr::select(!!as.name(X), !!as.name(Y), date)
## define dates of detection and date range and catalog all dates between start and end if .full_timeperiod = TRUE
if(full_timeperiod){
if(verbose){
message("Extracting environmental data for each day between ",
date_range[1], " and ", date_range[2], " (",
difftime(date_range[2], date_range[1], units = "days"), " days)",
"\nThis may take a little while...")
}
dates <- seq(date_range[1], date_range[2], by = 1)
} else {
if(verbose){
message("Extracting environmental data only on days detections were present; between ",
date_range[1], " and ", date_range[2], " (",
length(unique_dates), " days)",
"\nThis may take a little while...")
}
dates <- unique_dates
}
# Pull environmental netcdf from THREDDS server
if(verbose){
message("Accessing and downloading IMOS environmental variable: ", env_var)
}
suppressWarnings(
env_stack <- .pull_env(
dates = dates,
study_extent = study_extent,
var_name = env_var,
.cache = cache_layers,
folder_name = folder_name,
.crop = crop_layers,
.nrt = nrt,
.output_format = output_format,
verbose = verbose,
.parallel = .parallel,
.ncores = .ncores
)
)
if (cache_layers & verbose) {
message("\nDownloaded layers are cached in the `imos.cache` folder in your working directory")
}
if(!is.null(env_stack)) {
## Extract environmental variable from env_stack
if(verbose){
message("Extracting and appending environmental data")
}
env_data <- .extract_var(unique_positions, env_stack, env_var, .fill_gaps = fill_gaps, .buffer = buffer, verbose = verbose)
## Combine environmental data with input detection data
output <- df %>%
mutate(date = date(!!as.name(datetime))) %>%
left_join(env_data, by = c(X, Y, "date"))
## Calculate additional variables for current data (current direction and velocity)
if(env_var %in% "rs_current"){
windDir <- function(u, v) {(180 / pi) * atan(u/v) + ifelse(v>0,180,ifelse(u>0,360,0))}
output <- output %>%
mutate(rs_current_velocity = sqrt(rs_vcur^2 + rs_ucur^2),
rs_current_bearing = windDir(u = rs_ucur, v = rs_vcur))
# ## Adjust bearing to 0 - 360 degrees clockwise
# output <-
# output %>%
# mutate(rs_current_bearing =
# case_when(rs_current_bearing < 0 ~ rs_current_bearing + 360,
# TRUE ~ rs_current_bearing))
}
# output$date <- NULL
if(max(date_range) >= as.Date("2021-01-01") & nrt) {
message("Near Real-Time Ocean Current data appended in place of unavailable Delayed-Mode data")
}
return(output)
} else {
## if no viable urls found then return input data
message("Returning original input data")
return(df)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.