Nothing
#' @title Evaluate deployment of recording files for potential problems
#'
#' @description Runs a number of quality assurance / quality control (QAQC)
#' checks on a folder of recording files to identify potential problems.
#' These include checking the start and end times of files for consistency
#' to identify potential data gaps, measuring sound levels in each file
#' to identify potential recorder issues, and (if applicable) checking
#' battery and temperature data to identify potential instrument failure.
#' Can also create spectrogram images throughout the deployment to aid in
#' visually checking for problems or noise.
#'
#' @param dir folder or folders containing recordings and optionally Soundtrap
#' .log.xml files. All .wav and .log.xml files within \code{dir} will be
#' analysed, as well as all files in each subfolder of \code{dir} (only
#' going down one level).
#' @param excludeDirs the names of any subfolders within \code{dir} that
#' should be excluded (e.g. if \code{dir} contains folders "Recordings" and
#' "Clips" then \code{excludeDirs="Clips"} would result in only the "Recordings"
#' folder being analysed
#' @param sampleWindow start and end (in seconds) of the time window to use
#' for analysis, e.g. \code{c(40, 100)} will use a 60 second window starting
#' 40 seconds into the file
#' @param channel channel number of recording files to use for analysis
#' @param sensitivity the sensitivity of the recording device in dB, this
#' is typically a large negative number
#' @param calibration if not \code{NULL}, the frequency dependent calibration
#' to apply. Must have "frequency" and "gain" (in dB), can either be a .tf
#' file, a CSV file with columns for frequency and gain, or a dataframe with
#' columns frequency and gain
#' @param timeRange if not \code{NULL}, a vector of two POSIXct times identifying
#' the expected start and end times of the deployment. If the actual start and
#' end times of of the recording files are earlier or later than these, then
#' a warning will be issued and no calculations will be done, returning
#' \code{NULL}
#' @param name a name to assign for this deployment, used for plot labeling,
#' logging, and stored as \code{projectName} with the output dataframe. If
#' left as \code{NULL}, then the basename of \code{dir} will be used.
#' @param subDirPattern if not \code{NULL}, a pattern to use for selecting
#' which subfolders of \code{dir} to use for analysis. E.g. if folders
#' "Site1_Recordings" and "Site2_Recordings" both exist in \code{dir}, then
#' \code{subDirPattern="^Site1"} would result in analysing only the first
#' folder of recordings
#' @param outDir if not \code{NULL}, a directory to store outputs. Outputs include
#' plots, a CSV of the calculated metrics, and a .txt log file if \code{log=TRUE}
#' @param nSpectrograms number of spectrogram images to generate. Recording files
#' for creating the spectrograms will be approximately evenly spaced throughout
#' the deployment - if \code{nSpectrograms=5}, then the first recording will be
#' randomly chosen from the first 20% of files, the second from the second 20%,
#' etc.
#' @param specLength length of spectrograms to create, in seconds
#' @param panelLength length of each panel of the spectrogram plot, in seconds.
#' This must be less than \code{specLength}, and is used to avoid creating
#' awkwardly long plots. If \code{specLength=360} and \code{panelLength=60},
#' then the result will be a 6 panel plot where each section is 60 seconds long
#' @param log if \code{TRUE} and \code{outDir} is not \code{NULL}, then a text
#' file named "(name)_EvaluateRecorder_LogFile.txt" will be created in
#' \code{outDir} logging progress and warning messages
#' @param progress logical flag to show a progress bar
#' @param verbose logical flag to show some messages
#'
#' @returns a dataframe of the QAQC metric outputs for each recording file
#'
#' @importFrom PAMmisc fastReadWave processSoundtrapLogs
#' @importFrom scales label_log
#' @importFrom utils write.csv
#'
#' @export
#'
evaluateDeployment <- function(dir,
excludeDirs=c('Post_Retrieval_Data', 'Pre_Deployment_Data'),
sampleWindow=c(60, 120),
channel=1,
sensitivity=NA,
calibration=NULL,
timeRange=NULL,
name=NULL,
subDirPattern=NULL,
outDir=NULL,
nSpectrograms=0,
specLength=1800,
panelLength=300,
log=FALSE,
progress=TRUE,
verbose=TRUE) {
# subDirPattern is used when multiple device IDs were prsent in same main
# folder so that they can be processed separately
# add option that dir can be multiple if XML and wav not same main folder
# wav, log.xml, these are only file types we currently allow
exts <- '\\.wav|\\.log\\.xml'
if(!dir.exists(dir)) {
warning('Folder ', dir, ' does not exist')
return(NULL)
}
if(!is.null(outDir) && !dir.exists(outDir)) {
dir.create(outDir)
}
if(isTRUE(log) && is.null(outDir)) {
warning('Cannot create log file without specifying "outDir"')
return(NULL)
}
if(is.null(name)) {
name <- basename(dir[1])
}
procStart <- Sys.time()
on.exit({
procEnd <- Sys.time()
timeMin <- as.numeric(difftime(procEnd, procStart, units='mins'))
if(isTRUE(verbose)) {
cat('Project', name, 'processed in', round(timeMin, 1), 'minutes\n')
}
})
if(isTRUE(log)) {
logName <- paste0(name, '_EvaluateRecorder_LogFile.txt')
logCon <- file(file.path(outDir, logName), open='wt')
sink(file=logCon, type='message')
sink(file=logCon, type='output')
ow <- getOption('warn')
options(warn=1)
on.exit({
options(warn=ow)
sink()
sink(type='message')
close(logCon)
},
add=TRUE, after=TRUE)
cat('------------------------------------------\n')
cat('Starting deployment evaluation at',
format(Sys.time(), '%Y-%m-%d %H:%M:%S'), '\n')
cat('------------------------------------------\n')
cat('Analysis files saved to', outDir, '\n---\n')
cat('Analyzing files in', dir, '\n---\n')
cat('Analyzing project', name, '\n---\n')
}
dirName <- basename(dir)
# only going down one subfolder
# subDirs <- list.dirs(dir, full.names=TRUE, recursive=FALSE)
subDirs <- unlist(lapply(dir, list.dirs, full.names=TRUE, recursive=FALSE))
# sometimes we split into FPOD/ST directories first
hasST <- any(grepl('_ST$', subDirs))
hasFPOD <- any(grepl('_FPOD$', subDirs))
if(hasST && hasFPOD) {
dir <- grep('_ST$', subDirs, value=TRUE)
subDirs <- list.dirs(dir, full.names=TRUE, recursive=FALSE)
}
if(length(subDirs) == 1) {
oneMore <- list.dirs(subDirs, full.names=TRUE, recursive=FALSE)
if(length(oneMore) != 0) {
subDirs <- oneMore
}
}
keepDirs <- rep(TRUE, length(subDirs))
for(e in excludeDirs) {
keepDirs <- keepDirs & !grepl(e, basename(subDirs), ignore.case=TRUE)
}
subDirs <- subDirs[keepDirs]
if(!is.null(subDirPattern)) {
keepSub <- grepl(subDirPattern, basename(subDirs))
if(!any(keepSub)) {
warning('subDirPattern ', subDirPattern, ' did not match any subfolders',
' in folder ', dir, immediate.=TRUE)
keepSub <- rep(TRUE, length(keepSub))
}
subDirs <- subDirs[keepSub]
}
# subDirs <- subDirs[!basename(subDirs) %in% excludeDirs]
# list files from both base and sub dirs
allFiles <- unlist(lapply(c(dir, subDirs), function(x) {
list.files(x, pattern=exts, full.names=TRUE, recursive=FALSE)
}))
isWav <- grepl('\\.wav$', allFiles)
if(!any(isWav)) {
warning('No wav files found in folder ', dir, immediate. = TRUE)
return(NULL)
}
wavFiles <- allFiles[isWav]
if(!is.null(subDirPattern)) {
wavBase <- unique(dirname(wavFiles))
if(length(wavBase) > 1) {
warning('More than 1 folder of wav files was found within folder ',
dir, ' using subDirPattern ', subDirPattern, immediate.=TRUE)
}
}
isLog <- grepl('\\.log\\.xml', allFiles)
logFiles <- allFiles[isLog]
wavTimes <- fileToTime(wavFiles)
if(!is.null(timeRange)) {
startWav <- which.min(wavTimes)
diffStart <- as.numeric(difftime(wavTimes[startWav], timeRange[1], units='secs'))
if(diffStart < -1) {
warning('Appears Clipping has not happened - first wav file (',
basename(wavFiles[startWav]), ') is ', abs(diffStart),
' seconds before expected start time', immediate. = TRUE)
return(NULL)
}
endWav <- which.max(wavTimes)
endHeader <- fastReadWave(wavFiles[endWav], header=TRUE)
endTime <- wavTimes[endWav] + endHeader$samples / endHeader$sample.rate
diffEnd <- as.numeric(difftime(wavTimes[endWav], endTime, units='secs'))
if(diffEnd > 1) {
warning('Appears Clipping has not happened - last wav file (',
basename(wavFiles[endWav]), ') is ', abs(diffEnd),
' seconds after expected end time', immediate. = TRUE)
return(NULL)
}
}
if(!is.null(calibration) &&
is.character(calibration) &&
!file.exists(calibration)) {
isCal <- grepl(basename(calibration), allFiles)
if(!any(isCal)) {
warning('Calibration file ', calibration, ' could not be found')
return(NULL)
}
calibration <- allFiles[isCal][1]
}
if(isTRUE(log)) {
cat('Calculating TOLs from', sampleWindow[1], 'to', sampleWindow[2],
'for', length(wavFiles), 'files', '\n')
cat('---\n')
cat('Total calibration correction of recorder and hydrophone entered as',
sensitivity, 'dB', '\n---\n')
if(is.null(calibration) || is.na(calibration)) {
cat('No frequency-dependent calibration applied', '\n---\n')
} else if(is.character(calibration)) {
cat('Applying frequency-dependent calibration from file',
calibration, '\n')
} else {
cat('Applying frequency-dependent calibration\n')
}
}
wavQaqc <- evaluateRecordings(wavFiles,
sampleWindow=sampleWindow,
octave='tol',
channel=channel,
calibration=calibration,
sensitivity=sensitivity,
progress=progress
)
if(!is.null(name)) {
wavQaqc$projectName <- name
}
if(!is.null(outDir)) {
tolPlot <- plotQAQCLevel(wavQaqc, dbRange=c(50, 140), freqMin=30, title=name) +
theme(plot.title = element_text(hjust = 0.5))
ggsave(filename=file.path(outDir, paste0(name, '_TOLPlot.png')),
plot=tolPlot, width=4e3, height=3e3, units='px')
gapPlot <- plotQAQCGap(wavQaqc, title=name) +
theme(plot.title = element_text(hjust = 0.5))
ggsave(filename=file.path(outDir, paste0(name, '_GapPlot.png')),
plot=gapPlot, width=3e3, height=4e3, units='px')
if(!is.null(calibration)) {
calPoints <- checkCalibration(calibration)
freqs <- seq(from=min(calPoints$frequency), to=max(calPoints$frequency), by=1)
calSmooth <- data.frame(frequency=freqs,
gain=interp1(calPoints$frequency, calPoints$gain, xi=freqs, method='pchip'))
logFreq <- floor(range(log10(freqs)))
logBreaks <- 10^((logFreq[1]):(logFreq[2]))
calPlot <- ggplot() +
geom_point(data=calPoints, aes(x=.data$frequency, y=.data$gain)) +
geom_line(data=calSmooth, aes(x=.data$frequency, y=.data$gain)) +
scale_x_log10(breaks=logBreaks, labels=label_log()) +
ggtitle(name) +
labs(y='Transfer Function (dB)', x='Frequency (Hz)')
ggsave(filename=file.path(outDir, paste0(name, '_CalibrationPlot.png')),
plot=calPlot, width=3e3, height=2e3, units='px')
}
}
# IF st log files exist add the batt/temp data
if(any(isLog)) {
if(isTRUE(log)) {
cat('---', format(Sys.time(), '%Y-%m-%d %H:%M:%S'), '\n')
cat('Analyzing Soundtrap log files for Temperature and Voltage\n')
}
logQaqc <- processSoundtrapLogs(logFiles)
wavQaqc <- timeJoin(wavQaqc,
rename(logQaqc, 'UTC' = 'startUTC')[c('UTC', 'intBatt', 'extBatt', 'temp')],
interpolate=FALSE)
# logs may not match first and last wav files bc they can be chopped, correct
firstIn <- wavQaqc$UTC[1] >= logQaqc$startUTC & wavQaqc$UTC[1] <= logQaqc$endUTC
lastIn <- wavQaqc$UTC[nrow(wavQaqc)] >= logQaqc$startUTC & wavQaqc$UTC[nrow(wavQaqc)] <= logQaqc$endUTC
firstIn <- max(which(firstIn))
lastIn <- max(which(lastIn))
wavQaqc$intBatt[1] <- logQaqc$intBatt[firstIn]
wavQaqc$intBatt[nrow(wavQaqc)] <- logQaqc$intBatt[lastIn]
wavQaqc$extBatt[1] <- logQaqc$extBatt[firstIn]
wavQaqc$extBatt[nrow(wavQaqc)] <- logQaqc$extBatt[lastIn]
wavQaqc$temp[1] <- logQaqc$temp[firstIn]
wavQaqc$temp[nrow(wavQaqc)] <- logQaqc$temp[lastIn]
if(!is.null(outDir)) {
tvPlot <- plotQAQCTV(wavQaqc, title=name) +
theme(plot.title = element_text(hjust = 0.5))
ggsave(filename=file.path(outDir, paste0(name, '_TempVoltPlot.png')),
plot=tvPlot, width=3e3, height=2e3, units='px')
}
}
if(!is.null(outDir)) {
wavQaqc$UTC <- psxTo8601(wavQaqc$UTC)
write.csv(wavQaqc,
file=file.path(outDir, paste0(name, '_QAQCData.csv')),
row.names=FALSE)
wavQaqc$UTC <- as.POSIXct(wavQaqc$UTC, format='%Y-%m-%dT%H:%M:%SZ', tz='UTC')
}
if(isTRUE(log)) {
cat('---', format(Sys.time(), '%Y-%m-%d %H:%M:%S'), '\n')
cat('Generating', nSpectrograms, 'spectrograms\n')
}
if(!is.null(outDir) &&
nSpectrograms > 0) {
specDir <- file.path(outDir, paste0(name, '_Spectrograms'))
if(!dir.exists(specDir)) {
dir.create(specDir)
}
toSpec <- rep(NA, nSpectrograms)
nWavs <- length(wavFiles)
brks <- ceiling(seq(from=1, to=nWavs+1, length.out=nSpectrograms+1))
for(i in seq_along(toSpec)) {
thisSet <- seq(from=brks[i], to=brks[i+1]-1, by=1)
toSpec[i] <- sample(thisSet, size=1)
}
if(toSpec[nSpectrograms] > length(nWavs)) {
toSpec[nSpectrograms] <- nWavs
}
for(i in seq_along(toSpec)) {
thisWav <- fastReadWave(wavFiles[toSpec[i]], from=0, to=specLength)
baseWav <- gsub('\\.[A-z]{1,4}$', '', basename(wavFiles[toSpec[i]]))
thisFile <- paste0(name, '_',
'Spectrogram', specLength, 'sec_',
baseWav, '.png')
trySpec <- try(createSpecImage(thisWav, channel=1, wl=2048, hop=1,
title=paste0(name, '_', baseWav),
startTime=fileToTime(wavFiles[toSpec[i]]),
panelLength=panelLength, ratio=1,
file=file.path(specDir, thisFile)))
if(inherits(trySpec, 'try-error')) {
warning('Problem with spectrogram for ', baseWav)
}
}
}
if(isTRUE(log)) {
cat('------------------------------------------\n')
cat('Evaluation finished without issue on',
format(Sys.time(), '%Y-%m-%d %H:%M:%S'), '\n')
cat('------------------------------------------\n')
}
wavQaqc
}
psxTo8601 <- function(x) {
if(is.character(x)) {
return(x)
}
format(x, format='%Y-%m-%dT%H:%M:%SZ')
}
#' @importFrom tools file_path_sans_ext
#'
fileToTime <- function(x) {
if(length(x) > 1) {
result <- lapply(x, fileToTime)
result <- as.POSIXct(unlist(result), origin='1970-01-01 00:00:00', tz='UTC')
return(result)
}
x <- basename(x)
x <- file_path_sans_ext(x)
format <- c('pamguard', 'pampal', 'soundtrap', 'sm3', 'icListens1', 'icListens2', 'AMAR')
for(f in format) {
switch(
f,
'pamguard' = {
date <- gsub('.*([0-9]{8}_[0-9]{6}_[0-9]{3})$', '\\1', x)
posix <- as.POSIXct(substr(date, 1, 15), tz = 'UTC', format = '%Y%m%d_%H%M%S')
if(is.na(posix)) next
millis <- as.numeric(substr(date, 17, 19)) / 1e3
if(!is.na(posix)) {
break
}
},
'pampal' = {
date <- gsub('.*([0-9]{14}_[0-9]{3})$', '\\1', x)
posix <- as.POSIXct(substr(date, 1, 14), tz = 'UTC', format = '%Y%m%d%H%M%S')
if(is.na(posix)) next
millis <- as.numeric(substr(date, 16, 18)) / 1e3
if(!is.na(posix)) {
break
}
},
'soundtrap' = {
date <- gsub('.*\\.([0-9]{12})$', '\\1', x)
posix <- as.POSIXct(date, format = '%y%m%d%H%M%S', tz='UTC')
millis <- 0
if(!is.na(posix)) {
break
}
},
'sm3' = {
date <- gsub('.*\\_([0-9]{8}_[0-9]{6})Z?$', '\\1', x)
posix <- as.POSIXct(date, format = '%Y%m%d_%H%M%S', tz='UTC')
millis <- 0
if(!is.na(posix)) {
break
}
},
'icListens1' = {
date <- gsub('.*_([0-9]{8}-[0-9]{6})$', '\\1', x)
posix <- as.POSIXct(date, format = '%Y%m%d-%H%M%S', tz='UTC')
millis <- 0
if(!is.na(posix)) {
break
}
},
'icListens2' = {
date <- gsub('.*_([0-9]{6}-[0-9]{6})$', '\\1', x)
posix <- as.POSIXct(date, format = '%y%m%d-%H%M%S', tz='UTC')
millis <- 0
if(!is.na(posix)) {
break
}
},
'AMAR' = {
# 'AMAR668.9.20210823T231318Z.wav' example
date <- gsub('.*([0-9]{8}T[0-9]{6}Z)$', '\\1', x)
posix <- as.POSIXct(date, format='%Y%m%dT%H%M%SZ', tz='UTC')
millis <- 0
if(!is.na(posix)) {
break
}
}
)
}
posix + millis
}
timeJoin <- function(x, y, thresh=Inf, interpolate=TRUE, replace=FALSE, keepDiff=FALSE) {
if(is.null(x) ||
nrow(x) == 0) {
return(x)
}
x[['timeDiff']] <- NULL
# reservedCols <- c('eventId', 'db', 'UID', 'eventLabel', 'BinaryFile', 'species', 'Channel')
if(isFALSE(replace)) {
addCols <- colnames(y)[!colnames(y) %in% colnames(x)]
oldCols <- colnames(x)
} else if(isTRUE(replace)) {
addCols <- colnames(y)[colnames(y) != 'UTC']
oldCols <- colnames(x)[!colnames(x) %in% colnames(y)]
oldCols <- c('UTC', oldCols)
} else if(is.na(replace)) {
overlaps <- (colnames(y) %in% colnames(x)) &
colnames(y) != 'UTC'
if(any(overlaps)) {
overlaps <- colnames(y)[overlaps]
hasNa <- sapply(overlaps, function(c) {
anyNA(x[[c]])
})
hasNa <- overlaps[hasNa]
for(n in hasNa) {
isNa <- is.na(x[[n]])
newMatch <- timeJoin(x[c('UTC', n)], y[c('UTC', n)], thresh=thresh, interpolate=interpolate, replace=TRUE)
x[[n]][isNa] <- newMatch[[n]][isNa]
}
}
addCols <- colnames(y)[!colnames(y) %in% colnames(x)]
oldCols <- colnames(x)
}
# addCols <- addCols[!addCols %in% reservedCols]
if(length(addCols) == 0) {
return(x)
}
y <- y[c('UTC', addCols)]
x <- x[oldCols]
x$xTime <- x$UTC
y$yTime <- y$UTC
setDT(x)
setDT(y)
setkeyv(x, 'UTC')
setkeyv(y, 'UTC')
result <- y[x, roll='nearest']
result$timeDiff <- abs(as.numeric(result$xTime) - as.numeric(result$yTime))
tooFar <- result$timeDiff > thresh
result[tooFar, (addCols) := NA]
if(nrow(y) == 1) {
interpolate <- FALSE
}
if(interpolate) {
for(c in addCols) {
interpVal <- approx(x=y$UTC, y=y[[c]], xout=result$xTime[!tooFar], rule=2)$y
result[[c]][!tooFar] <- interpVal
}
}
result[, c('xTime', 'yTime') := NULL]
setDF(x)
setDF(y)
setDF(result)
outCols <- c(oldCols, addCols)
if(isTRUE(keepDiff)) {
outCols <- c(outCols, 'timeDiff')
}
result[outCols]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.