#' MABM shapefile creation, preparation for database import, and audio file scrubbing.
#'
#' This function associates bat calls classified with Bat Call Identification (BCID;
#' \url{http::/www.batcallid.com}) software with simultaneously collected geographic
#' location information. For this application, bat calls were collected with (usually) an
#' Anabat SD2 or the older Anabat SD1 (\url{https://www.titley-scientific.com}) and
#' georeference by integrating the Anabat data logger, PDA and an external GPS. From this
#' association, it also creates multiple shapefiles and a *.csv for incorporation into the
#' MABM Access database. If requested, suspected noise files can be scrubbed (moved) to a
#' new subdirectory.
#'
#' If \code{for_import = TRUE}, the function will rearrange the columns to match the expected
#' import format of the MABM database and replace \code{NA}s with blanks to play nicely with
#' expected variable types. This necessitates conversion of most (all?) columns to text
#' strings (which is not very handy if you want to work with the output in R). This argument
#' is forced to \code{for_import = FALSE} if the user specifies \code{keep_output = TRUE}.
#'
#' If \code{keep_output = TRUE}, the function creates a list containing (1) a \code{data.frame}
#' ('final_calls') containing information on the bat detections, (2) a
#' \code{\link[sf]{sf}} ('route_pt') of all GPS fixes along the route, (3) a
#' \code{\link[sf]{sf}} of these GPS fixes, and (4) a
#' \code{\link[sf]{sf}} ('call_pt') of all bat detections. This is handy if the
#' user wants to conduct further investigations in R. To access this output, remember to
#' assign the function output to an R object.
#'
#' @param route_name character string indicating the name used to file the created output. Default (NULL)
#' prompts the user to select from a list of all MABM routes. This argument is useful primarily
#' when processing data external to the MABM program (e.g., data collected along a non-MABM route)
#' @param scrub logical indicating whether Anabat files (ending with '#'), if present, identified
#' as noise (i.e., not assigned an ID in BCID) should be scrubbed (moved) to a newly created
#' 'scrubbed' subdirectory (default = TRUE); non-scrubbed files are not moved
#' @param gps logical (default = TRUE) indicating whether the a GPS file is available to georeference
#' the BCID classification file
#' @param for_import logical indicating whether the output *.csv file should be formatted for import
#' into the MABM Access database (default = TRUE). See details.
#' @param keep_output logical (default = FALSE) that creates a list containing potentially
#' useful outputs.
#' @param overwrite logical (default = FALSE) indicating whether to overwrite the output
#' directory and files, if they exist.
#' @param plot logical (default = FALSE) indicating whether to plot the processed route using
#' \code{\link{plot_MABM_route}}
#' @export
MABM_route <- function(route_name = NULL, scrub = TRUE, gps = TRUE,
for_import = TRUE, keep_output = FALSE, overwrite = FALSE,
plot = FALSE) {
lat = lon = time = dt = filename = "." = call_id = Site = Site_abbr = NULL # Variable "declaration" for R CMD check
# Ask to set MABM root directory
if (is.null(getOption("MABM_home"))) {
ans <- yesno()
if (ans %in% c("Y", "y")) set_MABM_root()
}
# If keeping output (keep_output = TRUE), override for_import
if (keep_output) for_import <- FALSE
## Prompt user to specify route name (site_Name in Access)
# Load sites
sites <- utils::read.csv(system.file("extdata", "site_list.csv", package = "MABM"), header = TRUE,
stringsAsFactors = FALSE) %>%
arrange(Site, Site_abbr)
colons <- ifelse(grepl("\\d", substr(sites$Site_abbr, nchar(sites$Site_abbr), nchar(sites$Site_abbr))),
": ", "")
menu_items <- paste0(sites$Site, colons, sites$Site_desc, " (", sites$Site_abbr, ")")
if (is.null(route_name)) {
route_name <- utils::select.list(menu_items, title="Choose the MABM route", multiple = FALSE, graphics = TRUE)
# Drop the full location
route_name <- gsub(".*\\(|\\)", "", route_name)
}
if (route_name == "") stop("You must select or provide a route name.")
## Retrieve GPS and BCID files
## Assumes BCID file is in same directory
# Call file first
if (!is.null(getOption("MABM_home"))) {
base_dir <- grep(paste0(route_name, "$"), two_deep(getOption("MABM_home")), value = TRUE)
if (length(base_dir) == 1) {
base_dir <- paste0(normalizePath(base_dir), "\\*.xls")
} else base_dir <- "C:\\*.xls"
} else base_dir = "C:\\*.xls"
calls <- utils::choose.files(default = base_dir,
caption = "Select BCID output .xls file with bat call information.",
multi = FALSE)
if (length(calls) == 0) stop("Function cancelled. No BCID output file selected.")
route_date <- substr(basename(calls), 1, 8)
# Extract file input directory
trunc <- sapply(gregexpr("\\\\", calls), utils::tail, 1)
in_dir <- substr(calls, 1, trunc)
# Assume GPS text file in same directory and called `gps.txt`
# Notify if this is so; prompt user to specify if `gps.txt` not found
if (!gps) {
GPS <- structure(list(lat = NA_real_, lon = NA_real_, alt_m = NA_real_,
date = structure(NA_real_, class = "Date"), time = NA_character_,
dt = structure(NA_real_, tzone = "UTC", class = c("POSIXct", "POSIXt")),
call_id = NA_integer_, order = NA_integer_),
.Names = c("lat","lon", "alt_m", "date", "time", "dt", "call_id", "order"),
row.names = 1L, class = "data.frame")
} else if (file.exists(paste0(in_dir, "gps.txt"))) {
message("\nUsing 'gps.txt' found with ", basename(calls), " as GPS source.\n")
GPS <- paste0(in_dir, "gps.txt")
} else {
GPS <- utils::choose.files(default = paste0(in_dir, "*.txt"),
caption = "File 'gps.txt' not found. Please select GPS text file.",
multi = FALSE)
}
if (length(GPS) == 0) stop("Function cancelled. No GPS text file selected.",
"You can specify `gps = FALSE` if none exists.")
if (gps) {
## Initial GPS text file read and division into columns
# Get number of lines to skip at beginning of GPS files
# Looks for line that contains location headers
n_skip <- grep("Latitude", readr::read_lines(GPS))
# Assign variable types and names; drop first and last columns
if (length(n_skip) == 1) {
GPS <- readr::read_fwf(GPS, readr::fwf_empty(GPS, skip = n_skip),
col_types = "_ccncc_", skip = n_skip)
} else {
GPS_string <- readr::read_lines(GPS)
GPS_start <- utils::head(n_skip, -1) + 1 # Drop last entry
GPS_end <- (grep("H R", GPS_string) - 2)[-1] # Two rows before start of datum info
# Drop empty segments
keep <- which(GPS_end >= GPS_start)
GPS_start <- GPS_start[keep]
GPS_end <- GPS_end[keep]
if (length(GPS_start) == 1 & length(GPS_end) == 1) {
keep <- seq(GPS_start, GPS_end)
} else {
keep <- do.call(c, mapply(seq, GPS_start, GPS_end))
}
GPS <- GPS_string[keep] %>% paste(., collapse = "\n")
GPS <- readr::read_fwf(GPS, readr::fwf_empty(GPS),
col_types = "_ccncc_")
}
names(GPS) <- c("lat", "lon", "alt_m", "date", "time")
# GPS quality control
# Sometimes GPS logs an incorrect date, although the time is correct
GPS <- gps_QC(GPS)
# Use until new dplyr is released
GPS <- GPS %>%
mutate(lat = as.numeric(substring(lat, 2)),
lon = ifelse(substring(lon, 1, 1) == "W",
as.numeric(substring(lon, 2)) * -1,
as.numeric(substring(lon, 2))),
date = lubridate::ymd(date),
dt = lubridate::ymd_hms(paste(date, time)),
call_id = gsub(":", "", time),
# call_id = as.integer(gsub(":", "", time)),
dec_min = dec_min(call_id)) %>%
arrange(dt) %>% as.data.frame()
GPS$order = 1:nrow(GPS)
}
## Call .xls file read and formatting
# Get start/end rows of relevant call information in BCID .xls file
# Looks at first sheet in .xls file
call_string <- do.call(paste0, readxl::read_excel(calls, sheet = 1, col_names = FALSE)) #concatenates all columns to simplify search
call_start <- grep("FILENAME", call_string) + 1 # Row after column headers; we have to make our own
# This may need some modification in future if format changes...
call_end <- grep("IDENTIFICATION", call_string) - 2 # Two rows before identification summary
# Read file
# This is all very hack-ish until readxl can incorporate the cellranger package
calls <- readxl::read_excel(calls, sheet = 1, col_names = FALSE,
range = readxl::cell_limits(c(call_start, 1), c(call_end, 7)))
names(calls) <- c("filename", "spp", "spp_perc", "group", "group_perc", "tot_pulses", "disc_prob")
# Get rid of some random tabs retained in the BCID .xls output
calls <- mutate_all(calls, clean_tabs)
# Replace blanks (i.e., "") with NAs
calls[calls == ""] <- NA
calls <- set_col_types(calls, c(rep("character", 2), "numeric", "character", "numeric",
"integer", "numeric"))
# Restructure data
# transform filename to call ID
calls <- mutate(calls,
call_id = gsub("[.]", "", substr(filename, 5, 11)),
# call_id = as.integer(gsub("[.]", "", substr(filename, 5, 11))),
dec_min = dec_min(call_id))
if (gps) {
## Associate (join) location data from GPS with bat calls
# However, rather than joining/merging, which require identical matches,
# we associate every call with the closest (absolute) GPS fix and record
# the difference in time (most will be 0; i.e., exact matches to the second)
# First, calculate difference in time (sec) between call and all GPS fixes
time_diffs <- outer(calls$dec_min, GPS$dec_min, "-")
# Get index of the closest GPS fix
nearest_fix <- apply(time_diffs, 1, function(x) which.min(abs(x)))
GPS_diff <- diag(time_diffs[, nearest_fix, drop = FALSE])
calls <- cbind(select(calls, -dec_min), select(GPS, -call_id)[nearest_fix, ],
GPS_diff = GPS_diff, row.names = NULL) %>%
arrange(order) # Ensure ordered chronologically
} else {
calls <- cbind(select(calls, -dec_min), select(GPS, -call_id), GPS_diff = NA,
row.names = NULL) %>%
# Add/extract call date/time
mutate(time = parse_time(call_id),
date = as.Date(route_date, format = "%Y%m%d") +
as.difftime(ifelse(lubridate::hour(lubridate::hms(time)) < 12, 1, 0), units = "days"))
}
## Rearranges columns for importGPS into MABM MS Access database (column order
## specified in import specification file 'calls1'
if (for_import) {
import <- with(calls,
data.frame(lat, lon, alt_m, date, time, call_id, filename,
spp, spp_perc, group, group_perc, tot_pulses, disc_prob,
call_id, order, GPS_diff, site_name = route_name))
# Replace NAs with blanks to play nicely with conversion in Access
# To do so requires the conversion of all fields to text
# NOTE: `time` is the time of the GPS fix, not the detection
import <- mutate_all(import, as.character)
import[is.na(import)] <- ""
}
## Create output directory
out_name <- paste(route_name, route_date, sep = "_")
out_dir <- paste0(in_dir, out_name)
if (!dir.exists(out_dir)) {
dir.create(out_dir)
} else if (!overwrite) {
stop("The output directory exists and you elected not to overwrite it.")
}
# Write final bat call data file with associated GPS information
csv_name <- paste0("Calls_", out_name, "_final.csv")
if (for_import) utils::write.csv(import, file = file.path(dirname(in_dir), csv_name), quote = FALSE)
if (gps) {
## Create point shapefile of all GPS locations (SavedRoute)
GPS_sf <- mutate(GPS, dt = as.character(dt)) %>%
st_as_sf(coords = c("lon", "lat"), crs = "+proj=longlat +datum=WGS84")
name <- paste0("SavedRoute_", out_name)
st_write(GPS_sf, file.path(out_dir, paste0(name, ".shp")), quiet = TRUE)
## Create line shapefile of the route (RouteLine)
GPS_lsf <- GPS_sf %>% st_coordinates() %>% st_linestring() %>%
st_sfc() %>% st_sf(id = "route") %>% st_set_crs(st_crs(GPS_sf))
name <- paste0("RouteLine_", out_name)
st_write(GPS_lsf, file.path(out_dir, paste0(name, ".shp")), quiet = TRUE)
## Create point shapefile of locations with bat calls (Calls)
calls_sf <- mutate(calls, dt = as.character(dt)) %>%
st_as_sf(coords = c("lon", "lat"), crs = "+proj=longlat +datum=WGS84")
name <- paste0("Calls_", out_name)
st_write(calls_sf, file.path(out_dir, paste0(name, ".shp")), quiet = TRUE)
message("The folder '", out_name, "' has been created in\n'",
in_dir, "'\n", "and contains the following:\n\n",
"Shapefiles:\n",
paste(list.files(path = out_dir, pattern=".shp$"), collapse = "\n"), "\n")
if (for_import)
message(csv_name, " is ready for MABM database import in ", dirname(in_dir))
} else {
message("The folder '", out_name, "' has been created in\n'",
in_dir, "'\n", "and contains the following:\n\n",
"Shapefiles:\n",
"No GPS data identified. No shapefiles created.\n")
if (for_import)
message(csv_name, " is ready for MABM database import in ", dirname(in_dir))
}
if (scrub) {
# Get list of all call files in directory
all_calls <- grep("[0-9]{2}\\#", dir(in_dir), value = TRUE)
# Get good calls (from call ID file)
good_calls <- calls[, c("filename", "spp")]
# Filter good calls from all calls
bad_calls <- all_calls[!(all_calls %in% good_calls[, 1])]
# Checking necessity of scrubbing before doing it!
if (length(all_calls) == 0) {
message("\nScrubbing summary:\nNo Anabat files detected in directory. Scrubbing ignored.")
} else if (length(bad_calls) == 0) {
message("\nScrubbing summary:\nNo suspected noise files in directory. Scrubbing ignored.")
} else { # Yay, we get to scrub!!!
# Create scrubbed directory
scrub_dir <- paste0(in_dir, "scrubbed")
dir.create(scrub_dir)
# Move likely noise files
sapply(bad_calls, move, in_dir = in_dir, out_dir = scrub_dir)
message("\nScrubbing summary:\nMoved ", length(bad_calls),
" suspected noise files to:\n'", scrub_dir, "'")
}
}
if (plot) {
p <- plot_MABM_route(gps = list.files(path = out_dir,
pattern="SavedRoute.*\\.shp",
full.names = TRUE))
print(p)
}
if (keep_output) return(list(final_calls = calls, route_pt = GPS_sf,
route_line = GPS_lsf, call_pt = calls_sf))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.