#' read Batscope export
#'
#' reads the \code{xlsx} generated by the batscope export and brings it into
#' the right format.
#'
#' @param path filename
#' @param species_col_name what is the name of the column which contains the
#' species name
#' @param quality_col_name what is the name of the column which contains the
#' relevant quality scores
#' @param quality_threshold sequences with species assignment quality below this
#' threshold will be discarded.
#' @param time_zone time zone of timestamps
#' @param shiny_progress display more progress info for shiny
#' @param shiny_progress_n fraction of progres bar for multiple files
#' @family data functions
#' @export
readBatscopeXLSX <- function(path = file.choose(),
batscope_version = "BatScope4",
species_col_name = "Auto Class 1",
quality_col_name = "Auto Class 1 Conf",
quality_threshold = 0.8,
time_zone = "UTC",
shiny_progress = FALSE,
shiny_progress_n = 1) {
message("\n", path, "\nwird eingelesen, kann eine Weile dauern...\n")
if (shiny_progress == TRUE) {
shiny::incProgress(0.1 / shiny_progress_n, detail = ".xlsx lesen..")
}
rawdata <- read_excel(path)
if (shiny_progress == TRUE) {
shiny::incProgress(0.6 / shiny_progress_n, detail = "Qualitätsprüfung...")
}
# MODIFY DATA for use in R
if (batscope_version == "BatScope4") {
data_r <- data_frame(
project = rawdata$Project,
timestamp = ymd_hms(rawdata$Timestamp, tz = time_zone),
survey_date =
if_else(
hour(timestamp) > 12,
ymd(lubridate::date(timestamp), tz = time_zone),
ymd(lubridate::date(timestamp) - 1, tz = time_zone)
),
latitude = rawdata$Latitude,
longitude = rawdata$Longitude,
temperature = rawdata$Temperature,
species = rawdata[[species_col_name]],
species_conf = rawdata[[quality_col_name]],
n_calls = rawdata[[str_c(species_col_name, " Calls")]]
)
} else {
data_r <- data_frame(
project = rawdata$ProjectName,
timestamp = update(rawdata$recTime,
year = year(rawdata$recDate),
month = month(rawdata$recDate), mday = day(rawdata$recDate), tzs = time_zone
),
survey_date =
if_else(
hour(timestamp) > 12,
ymd(lubridate::date(timestamp), tz = time_zone),
ymd(lubridate::date(timestamp) - 1, tz = time_zone)
),
latitude = rawdata$GPSLatitude,
longitude = rawdata$GPSLongitude,
temperature = rawdata$temperature,
species = rawdata[[species_col_name]],
species_conf = rawdata[[quality_col_name]],
n_calls = rawdata$numCallsEstimated
)
}
# discard sequences with low quality
dim_qual_before <- dim(rawdata)
data_r <- dplyr::filter(data_r, species_conf > quality_threshold)
dim_qual_after <- dim(data_r)
dim_qual_diff <- dim_qual_before[1] - dim_qual_after[1]
message("Summary of ", quality_col_name, "\n\n", sep = "")
message("\nDiscarded ", dim_qual_diff, " of ",
dim_qual_before[1], " sequences (",
(dim_qual_diff / dim_qual_before[1]) * 100, "%); ", dim_qual_after[1],
" remaining\n",
sep = ""
)
if (shiny_progress) {
incProgress(0.1 / shiny_progress_n, detail = "Daten formatieren...")
}
return(data_r)
}
#' Summarize Batscope data_r
#'
#' summarizes the Batscope Data by nights and bins
#'
#' @param data_r data.frame generated with \code{\link{readBatscopeXLSX}}
#' @param bin_width length of bins in min
#' @param lat vector of GPS latitudes for the stations
#' (recycled if needed). If NULL (default) the logged GPS data will be used
#' (averaged for each station)
#' @param long vector of GPS longitudes for the stations
#' (recycled if needed). If NULL (default) the logged GPS data will be used
#' (averaged for each station)
#' @param progress name of the progress bar to use, see
#' \code{\link[plyr]{create_progress_bar}}
#' @param shiny_progress display more progress info for shiny
#' @family data functions
#' @export
sumBatscopeData <- function(data_r,
bin_length = 5,
lat = NULL,
long = NULL,
progress = "text",
shiny_progress = FALSE) {
# wann startet die nacht und wann endet sie. Wird nur fuer binning
# verwendet.
nacht_start <- 13
nacht_ende <- 12
if (shiny_progress) {
incProgress(0.1, detail = "Binning...")
}
# binning der Daten (in bin_length min Intervalle)
n_cuts <- (24 + nacht_ende - nacht_start) * (60 / bin_length) + 1
cuts_list <- list()
for (i in seq_len(length(unique(data_r$survey_date)))) {
cuts_list[[i]] <- seq(as_datetime(unique(data_r$survey_date)[i]) + nacht_start * 60 * 60,
by = paste0(bin_length, " min"), length = n_cuts
)
}
cuts <- as.POSIXct(unlist(cuts_list),
origin = "1970-01-01 00:00",
tz = tz(unique(data_r$timestamp)[1])
)
data_r$bins_factor <- cut(data_r$timestamp,
breaks = unique(cuts),
include.lowest = TRUE, right = FALSE
)
# Zahlen der Events pro Tag, Mikrophon, species und bins
if (shiny_progress) {
incProgress(0.2,
detail = "Zusammenfassung nach Tag, Project, Species und Bins..."
)
} else {
cat("Zusammenfassung nach Tag, Project, Spezies und Bins...\n")
}
data_binned_by_species <- plyr::ddply(data_r,
.(survey_date, project, species, bins_factor),
summarize,
n_events = length(n_calls),
sum_nCalls = sum(n_calls),
meanT_BL = mean(temperature),
.progress = progress
)
# Zahlen der Events pro Tag, Mikrophon, und bins (alle species)
if (shiny_progress) {
incProgress(0.2, detail = "Zusammenfassung Total aller species...")
} else {
cat("Zusammenfassung Total aller species...\n")
}
data_binned_all_species <- plyr::ddply(data_r,
.(survey_date, project, bins_factor),
summarize,
n_events = length(n_calls),
sum_nCalls = sum(n_calls),
meanT_BL = mean(temperature),
.progress = progress
)
data_binned_all_species$species <- factor("all")
data_binned <- rbind(data_binned_by_species, data_binned_all_species)
data_binned$bins <- as.POSIXct(data_binned$bins_factor, tz = tz(unique(data_r$timestamp)[1]))
# GPS Koordinaten
if (shiny_progress) {
incProgress(0.2, detail = "GPS Koordinaten bearbeiten...")
} else {
cat("GPS Koordinaten bearbeiten...\n")
}
if (is.null(lat) | is.null(long)) {
gps_coords <- ddply(data_r, .(project), summarize,
lat = mean(latitude, na.rm = TRUE),
long = mean(longitude, na.rm = TRUE)
)
if (any(is.na(gps_coords))) {
stop("GPS Koordinaten nicht für alle Stationen vorhanden.")
stop("Bitte manuell eingeben.")
} else {
message("Koordinaten von Batlogger verwendet.")
print(gps_coords)
}
} else {
gps_coords <- data.frame(
project = unique(data_r$project),
lat,
long
)
message("Manuelle Koordinaten verwendet.")
print(gps_coords)
}
data_binned <- merge(data_binned, gps_coords)
# Sonnenauf und -untergang
if (shiny_progress) {
incProgress(0.2, detail = "Berechne Sonnenauf und -untergangszeiten...")
} else {
cat("Berechne Sonnenauf und -untergangszeiten...\n")
}
gps_matrix <- matrix(c(data_binned$long, data_binned$lat), ncol = 2)
data_binned$sunset <- sunriset(
gps_matrix, as_datetime(data_binned$survey_date),
direction = "sunset", POSIXct.out = TRUE
)[, 2]
data_binned$sunrise <- sunriset(
gps_matrix, as_datetime(data_binned$survey_date) + 24 * 60 * 60,
direction = "sunrise", POSIXct.out = TRUE
)[, 2]
data_binned$project <- factor(data_binned$project)
data_binned$species <- factor(data_binned$species)
data_binned$bin_length <- bin_length
return(data_binned)
}
#' Read all BatScope Export Files in Folder
#'
#' Wrapper for readBatscopeXLSX to read multiple \code{xlsx} files or all files
#' within a folder
#'
#' @param path path to one file within folder or a vector of path, defaults to choose.file()
#' @param read_folder logical, should all .xlsx files in the folder be read?
#' @param ... additional arguments passed to \code{\link{readBatscopeXLSX}}
#' @family data functions
#' @export
readBatscopeXLSXmultiple <- function(path = file.choose(),
read_folder = FALSE,
...) {
if (read_folder) {
folder <- dirname(path)
files <- list.files(folder, pattern = ".xlsx", full.names = TRUE)
} else {
files <- path
}
message("Reading following files")
message(files)
data <- list()
pb <- txtProgressBar(
min = 0, max = 1, initial = 0, char = "=",
width = NA, style = 3
)
for (i in seq_along(files)) {
data[[i]] <- suppressMessages(
readBatscopeXLSX(files[i], shiny_progress_n = length(files), ...)
)
setTxtProgressBar(pb, i / length(files))
}
close(pb)
data <- plyr::ldply(data, rbind)
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.