if(getRversion() >= '2.5.1') {
globalVariables(c('data_dir', 'read.csv'))
}
#'
#'Generates basic metadata about a directory of animal data files and stores the files as data frames as a list with the meta
#'
#'@param data_dir location of animal data files, in list format
#'@param max_rate maximum rate of travel (meters/minute) between consecutive points
#'@param max_course maximum distance (meters) between consecutive points
#'@param max_dist maximum geographic distance (meters) between consecutive points
#'@param max_clean_time maximum time (minutes) between consecutive points
#'@return a list of animal data frames with information about the data
#'@noRd
#'
store_batch_list <- function(data_dir, max_rate = 84, max_dist = 840, max_course = 100, max_clean_time = 3600) {
# unpack documents in the .zip file to a temp folder
dir_name <- gsub(".zip", "", data_dir$name)
unlink(file.path("temp"), recursive=TRUE)
data_files <- utils::unzip(data_dir$datapath, exdir="temp")
data_files <- list.files("temp", pattern = "*.(csv|txt|TXT)", recursive = TRUE, full.names = TRUE)
rds_name <- paste0(dir_name, ".rds")
data_sets <- lapply(data_files, read_gps)
# remove "temp" from file name
file_names <- gsub("(temp)(\\/)", "", data_files)
gps_units <- gsub("(.*)(20)([0-9]{2}\\_)(.*)(\\_{1}.*)(\\.(csv|txt|TXT))","\\4", file_names)
ani_ids <- gsub("(.*)(20)([0-9]{2}\\_)(.*\\_)(.*)(\\.(csv|txt|TXT))","\\5", file_names)
site_names <- c()
# function to compute max/min lat/long from a dirty dataset
maxminlatlong <- function(data, dtype){
if(dtype == "igotu") {
suppressWarnings( df <- data[!is.na(as.numeric(data$Index)), ] ) # discard any rows with text in the first column duplicate header rows
}
else {
df <- data
}
df <- utils::type.convert(df) %>%
dplyr::select( Latitude, Longitude) %>%
dplyr::filter(!is.na(Latitude), Latitude !=0, !is.na(Longitude), Longitude !=0)
return( c( max(df$Latitude),
min(df$Latitude),
max(df$Longitude),
min(df$Longitude)
)
)
}
# function to update a global max/min lat/long with a new dataset
update_maxminlatlong <- function(mmll, newdata, newdtype){
new_mmll <- maxminlatlong(newdata, newdtype)
c(max(mmll[1], new_mmll[1]),
min(mmll[2], new_mmll[2]),
max(mmll[3], new_mmll[3]),
min(mmll[4], new_mmll[4]))
}
maxminsll <- maxminlatlong(data_sets[[1]]$df, data_sets[[1]]$dtype)
meta_df <- data.frame()
for(i in 1:length(file_names)) {
site_names[i] <- ifelse( grepl("\\_", file_names[i]), tolower(sub("\\_.*","", file_names[i])), paste0("Unknown_", gsub("(.*).(csv|txt|TXT)", "\\1", file_names[i])))
ani_ids[i] <- ifelse(ani_ids[i] == file_names[i], paste0("Unknown_", gsub("(.*).(csv|txt|TXT)", "\\1", file_names[i])), ani_ids[i])
gps_units[i] <- ifelse(gps_units[i] == file_names[i], paste0("Unknown_", gsub("(.*).(csv|txt|TXT)", "\\1", file_names[i])), gps_units[i])
df <- data_sets[[i]]$df
dtype <- data_sets[[i]]$dtype
if(i > 1 ){
maxminsll <- update_maxminlatlong(maxminsll, df, dtype)
}
df <- clean_location_data(df, dtype, filters = FALSE, aniid = ani_ids[i], gpsid = gps_units[i],
maxrate = max_rate, maxdist = max_dist, maxcourse = max_course,
maxtime = max_clean_time)
df_clean <- clean_location_data(df, dtype, filters = TRUE, aniid = ani_ids[i], gpsid = gps_units[i],
maxrate = max_rate, maxdist = max_dist, maxcourse = max_course,
maxtime = max_clean_time)
current_meta <- get_meta(df_clean, i, dtype, file_names[i], site_names[i], ani_ids[i], "temp.rds")
meta_df <- save_meta(meta_df, current_meta)
data_sets[[i]] <- df
}
ani_ids <- make.unique(ani_ids, sep="_")
unlink("temp", recursive = TRUE)
saveRDS(data_sets, "temp.rds")
return(list(data = data_sets, file = file_names,
ani = ani_ids, gps = gps_units,
site = site_names, rds_name = rds_name,
min_lat = maxminsll[2], max_lat = maxminsll[1],
min_long = maxminsll[4], max_long = maxminsll[3],
meta = meta_df))
}
#'
#'Cleans a directory of animal data files
#'
#'@param data_info list of animal data frames with information about the data, generated by store_batch
#'@param filters filter bad data points, defaults to true
#'@param tz_in input time zone, defaults to UTC
#'@param tz_out output time zone, defaults to UTC
#'@param maxrate maximum rate of travel (meters/minute) between consecutive points
#'@param maxcourse maximum distance (meters) between consecutive points
#'@param maxdist maximum geographic distance (meters) between consecutive points
#'@param maxtime maximum time (minutes) between consecutive points
#'@return clean df with all animal data files from the directory
#'@noRd
#'
clean_batch_df <- function(data_info, filters = TRUE, tz_in = "UTC", tz_out = "UTC", max_rate, max_course,
max_dist, max_clean_time) {
data_sets <- list()
withProgress(message = paste0("Preparing raw data", ifelse(filters, " (filtered)", " (unfiltered)")), detail = paste0("0/",length(data_info$data), " files prepped"), value = 0, {
for(i in 1:length(data_info$data)) {
df <- data_info$data[[i]]
dtype <- data_info$meta$dtype[i]
aniid <- data_info$ani[i]
gpsid <- data_info$gps[i]
# clean df
if(filters) {
df_out <- clean_location_data(df, dtype, filters,
aniid = aniid,
gpsid = gpsid,
maxrate = max_rate, max_course = max_course,
max_dist = max_dist, max_time=max_time, tz_in = tz_in, tz_out = tz_out)
}
else {
df_out <- df
}
# add cleaned df to the list of data
data_sets[[paste0("ani",aniid)]] <- df_out
incProgress(1/(length(data_info$data)), detail = paste0(i,"/",length(data_info$data), " files prepped"))
} #cleaning for loop
}) #progress bar
return(do.call(rbind, data_sets))
}
#'
#'Cleans a directory of animal data files and stores them locally in rds format
#'
#'@param data_info list of animal data frames with information about the data, generated by store_batch
#'@param filters filter bad data points, defaults to true
#'@param zoom level of zoom, defaults to 11
#'@param get_elev logical, whether to append elevation data
#'@param get_slope logical, whether to compute slope (in degrees), defaults to true
#'@param get_aspect logical, whether to compute aspect (in degrees), defaults to true
#'@param weather_vars weather variables to append
#'@param selected_station weather station for data lookup, can be NULL if weather_vars is NULL
#'@param min_lat minimum latitude for filtering, defaults to min in data_info
#'@param max_lat maximum latitude for filtering, defaults to max in data_info
#'@param min_long minimum longitude for filtering, defaults to min in data_info
#'@param max_long maximum longitude for filtering, defaults to max in data_info
#'@param tz_in input time zone, defaults to UTC
#'@param tz_out output time zone, defaults to UTC
#'@param maxrate maximum rate of travel (meters/minute) between consecutive points
#'@param maxcourse maximum distance (meters) between consecutive points
#'@param maxdist maximum geographic distance (meters) between consecutive points
#'@param maxtime maximum time (minutes) between consecutive points
#'@return df of metadata for animal file directory
#'@noRd
#'
clean_store_batch <- function(data_info, filters = TRUE, zoom = 11, get_elev = FALSE, get_slope = FALSE, get_aspect = FALSE,
weather_vars = NULL, selected_station = NULL,
min_lat = data_info$min_lat, max_lat = data_info$max_lat, min_long = data_info$min_long, max_long = data_info$max_long,
tz_in = "UTC", tz_out = "UTC", dbscan_enable=FALSE, kalman = FALSE, kalman_max_timestep=300, max_rate, max_course, max_dist, max_clean_time,
dbscan_knn_eps = 0.001, dbscan_knn_k = 5) {
#initialize empty meta
meta_df <- data.frame(matrix(ncol = 9, nrow = 0))
meta_cols <- c("file_id", "file_name", "site", "ani_id", "min_date", "max_date", "min_lat", "max_lat", "storage")
colnames(meta_df) <- meta_cols
num_saved_rds <- 0
withProgress(message = "Processing data", detail = paste0("0/",length(data_info$data), " files processed"), value = 0, {
data_sets <- list()
all_data_sets <- data.frame()
for(i in 1:length(data_info$data)) {
df <- data_info$data[[i]]
dtype <- data_info$meta$dtype[i]
if(dtype == "igotu" & data_info$meta$storage[1] != "demo_nov19.rds") {
df <- df[!duplicated(as.list(df))] # discard any columns that are duplicates of index
colnames(df)[1] <- "Index"
suppressWarnings( df <- df[!is.na(as.numeric(df$Index)), ] ) # discard any rows with text in the first column duplicate header rows
}
df <- utils::type.convert(df)
aniid <- data_info$ani[i]
gpsid <- data_info$gps[i]
# clean df
df_out<- clean_location_data(df, dtype, filters,
aniid = aniid,
gpsid = gpsid,
maxrate = max_rate, maxcourse = max_course, maxdist = max_dist, maxtime = max_clean_time,
tz_in = tz_in, tz_out = tz_out,
dbscan_enable=dbscan_enable, kalman=kalman, kalman_min_lat=min_lat, kalman_max_lat=max_lat, kalman_min_lon=min_long, kalman_max_lon=max_long, kalman_max_timestep=kalman_max_timestep)
# add cleaned df to the list of data
data_sets[[paste0("ani",aniid)]] <- df_out
all_data_sets <- all_data_sets %>% rbind(df_out)
} #cleaning for loop
df_processed <- all_data_sets
# Elevation lookup
if(get_elev) {
df_processed <- all_data_sets %>% dplyr::filter(Latitude <= max_lat,
Latitude >= min_lat,
Longitude <= max_long,
Longitude >= min_long)
status_message <- modalDialog(
pre(id = "console"),
title = "Please Wait...",
easyClose = TRUE,
footer = NULL
)
showModal(status_message)
if(nrow(df_processed) == 0) {
incProgress(0, detail = paste0("Appending elevation at zoom = ", zoom, " for invalid bounds. Defaulting to all data."))
withCallingHandlers({
shinyjs::html("console", "")
df_processed <- lookup_elevation_aws(all_data_sets, zoom = zoom, get_slope = get_slope, get_aspect = get_aspect)
},
message = function(m) {
shinyjs::html(id = "console", html = m$message)
})
}
else {
incProgress(0, detail = paste0("Appending elevation for lat. bounds (", min_lat, ",", max_lat,
") and long. bounds (", min_long, ",", max_long, ") at zoom = ", zoom, "..." ))
withCallingHandlers({
shinyjs::html("console", "")
df_processed <- lookup_elevation_aws(df_processed, zoom = zoom, get_slope = get_slope, get_aspect = get_aspect)
},
message = function(m) {
shinyjs::html(id = "console", html = m$message)
})
}
}
# Weather lookup
if(length(weather_vars) != 0) {
incProgress(0, detail = paste0("Appending weather data..."))
selected_vars <- c()
if("wind direction" %in% weather_vars) {
selected_vars <- c(selected_vars, "wind_direction")
}
if("wind speed" %in% weather_vars) {
selected_vars <- c(selected_vars, "wind_speed")
}
if("ceiling height" %in% weather_vars) {
selected_vars <- c(selected_vars, "ceiling_height")
}
if("visibility distance" %in% weather_vars) {
selected_vars <- c(selected_vars, "visibility_distance")
}
if("temperature" %in% weather_vars) {
selected_vars <- c(selected_vars, "temperature")
}
if("dewpoint temperature" %in% weather_vars) {
selected_vars <- c(selected_vars, "temperature_dewpoint")
}
if("air pressure" %in% weather_vars) {
selected_vars <- c(selected_vars, "air_pressure")
}
if("precipitation depth" %in% weather_vars) {
selected_vars <- c(selected_vars, "AA1_depth")
}
withCallingHandlers({
shinyjs::html("console", "")
df_processed <- df_processed %>%
lookup_weather(selected_vars, search = FALSE, station = selected_station, is_shiny = TRUE)
},
message = function(m) {
shinyjs::html(id = "console", html = m$message)
})
}
removeModal()
for(i in 1:length(data_info$data)) {
aniid <- data_info$ani[i]
#on every 50th data file, increment file name counter and wipe data_sets
if(i %% 50 == 0 & i > 49) {
saveRDS(data_sets, data_info$rds_name)
num_saved_rds <- num_saved_rds + 1
data_info$rds_name <- paste0(data_dir, num_saved_rds, ".rds")
data_sets <- list()
}
df_out <- df_processed %>% dplyr::filter(Animal == aniid)
# get meta from df
file_meta <- get_meta(df_out, i, data_info$meta$dtype[i], data_info$file[i], data_info$site[i], aniid, data_info$rds_name)
# save meta to the designated meta df
meta_df <- save_meta(meta_df, file_meta)
# replace df with elevation df
data_sets[[paste0("ani",aniid)]] <- df_out
}
}) #progress bar
#save remaining data files
saveRDS(data_sets, data_info$rds_name)
return(meta_df)
}
#'
#'Generate metadata for an animal data frame -
#'filename, site, date min/max, animals, min/max lat/longitude, storage location
#'
#'@param df clean animal data frame
#'@param file_id ID number of source of animal data frame
#'@param dtype igotu or columbus
#'@param file_name .csv source of animal data frame
#'@param site physical source of animal data
#'@param ani_id ID of animal found in data frame
#'@param storage_loc .rds storage location of animal data frame
#'@return df of metadata for animal data frame
#'@noRd
get_meta <- function(df, file_id, dtype, file_name, site, ani_id, storage_loc) {
return(data.frame(file_id = file_id,
dtype = dtype,
file_name = file_name,
site = site,
ani_id = ani_id,
min_date = min(df$DateTime, na.rm = TRUE),
max_date = max(df$DateTime, na.rm = TRUE),
min_lat = min(df$Latitude),
max_lat = max(df$Latitude),
min_long = min(df$Longitude),
max_long = max(df$Longitude),
storage = storage_loc))
}
#'
#'Save metadata to a data frame and return it
#'
#'@param meta_df the data frame to store metadata in
#'@param file_meta meta for a .csv file generated by get_meta
#'@return df of metadata
#'@noRd
#'
save_meta <- function(meta_df, file_meta) {
meta_df <- rbind(meta_df, file_meta)
return(meta_df)
}
#'
#'Get animal data set from specified meta.
#'If date range is invalid, automatically returns all animal data specified by meta_df.
#'
#'@param meta_df data frame of specified meta
#'@param min_date minimum date specified by user
#'@param max_date maximum date specified by user
#'@return df of animal data from specified meta
#'@noRd
#'
get_data_from_meta <- function(meta_df, min_date = min(meta_df$min_date), max_date = max(meta_df$max_date)) {
meta_df$storage <- as.character(meta_df$storage)
rds_files <- list(unique(meta_df$storage))
current_df <- data.frame()
for(file_name in rds_files) {
current_rds <- readRDS(file_name)
for(df in current_rds) {
current_df <- rbind(current_df, df)
}
}
current_df <- current_df %>%
dplyr::filter(Animal %in% meta_df$ani_id,
DateTime <= as.Date(max_date),
DateTime >= as.Date(min_date))
# check if current_df is empty
if(nrow(current_df) == 0) {
current_df <- current_df %>%
dplyr::filter(Animal %in% meta_df$ani_id)
}
return(current_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.