# nvspl_to_ai ==================================================================
#' @name nvspl_to_ai
#' @title Convert NVSPLs data into acoustic indices
#' @description Convert NVSPL table data into acoustic indices.
#' @param input.directory Top-level NVSPL directory from which to ingest data (often named "NVSPL").
#' @param output.directory Where to place output files (often named "ANALYSIS").
#' @param project File name for your project (e.g., 'GLBAPhenology2019').
#' @param instrum Audio recorder used. Default = 'SM4'.
#' @param start.at.beginning Logical flag for where computation should start. Default = TRUE. If TRUE, computation starts at the beginning of recordings on a given day. TRUE is preferred in most cases for NSNSD bioacoustic analyses. When FALSE, indices are calculated only for data that begins at the top of the chosen timestep. For example, if timestep is 10 minutes, then each AI calculation will start at 0, 10, 20, 30, 40, 50 for each hour. If the full ten minute sample is not available, the timestep is skipped. start.at.beginning = FALSE may be desired if the user wishes to break results easily into hours.
#' @param fminimum Lower limit of frequency band of interest (NOTE: HXXXX represents the center frequency of the octave band, eg. H1600 = 1413-1778 Hz). Default = 'H1600'.
#' @param fmaximum Upper limit of frequency band of interest. Default = 'H8000'.
#' @param fbinMax Sets the frequency bins to look across for cluster analysis. Default = 8.
#' @param BKfminimum Lower limit of frequency bands for background noise (NOTE: if NVSPL calculated with PAMGuide, no data below H25). Default = 'H31p5'.
#' @param BKfmaximum Upper limit of frequency bands for background noise. Default = 'H1250'.
#' @param timestep Timestep of computation, in minutes (NOTE: to get daily values, summarizing the timestep values is preferred over running the code for an entire day). Default = 10.
#' @param startday Optional start date if many files or error.
#' @param plt Logical flag for whether to plot NVSPL files. NOTE: this will slow the function down considerably. Default = FALSE.
#' @return Returns a CSV file of the acoustic indices to the output directory. For posterity, also saves a "params" file to the output.directory.
#' @details
#'
#' CSV file contains the following 59 columns:
#'
#' \itemize{
#' \item{\strong{Site}: Site name.}
#' \item{\strong{Date}: Date as YYYY_MM_DD.}
#' \item{\strong{Yr}: Year as YYYY.}
#' \item{\strong{Mo}: Month of year as integer.}
#' \item{\strong{Day}: Day of month.}
#' \item{\strong{Hr}: Hour of day.}
#' \item{\strong{Min}: Minute of hour}
#' \item{\strong{Sec}: Second of minute.}
#' \item{\strong{timestep}: In minutes, the time increment over which acoustic indices are calculated.}
#' \item{\strong{SampleLength_sec}: Sample length (i.e., timestep) in seconds.}
#' \item{\strong{ACIout}: ACI results for each time chunk on a given day - SPLs (link to documentation of how this was computed, or describe here) }
#' \item{\strong{ACIoutI}: ACI results for each time chunk on a given day - intensity (unlog)}
#' \item{\strong{ACIoutN}: ACI results for each time chunk on a given day - normalized }
#' \item{\strong{BKdB_low}: These are all background noise of some kind. Unclear on the details based on the code }
#' \item{\strong{BKdBA_low}: tbd. }
#' \item{\strong{BKdB_bird}: tbd. }
#' \item{\strong{BKdBA_bird}: tbd. }
#' \item{\strong{avgAMP}: tbd. }
#' \item{\strong{L10AMP}: tbd. }
#' \item{\strong{Hf}: tbd. }
#' \item{\strong{Ht}: tbd. }
#' \item{\strong{El}: tbd. }
#' \item{\strong{BioPh}: tbd. }
#' \item{\strong{AthPh}: tbd. }
#' \item{\strong{SoundScapeI}: tbd. }
#' \item{\strong{AA}: Acoustic activity, but unclear to me what each of these means based on the code. Regular AA is average time a noise is above background in each timestep. }
#' \item{\strong{AAc}: tbd. }
#' \item{\strong{AAdur}: tbd. }
#' \item{\strong{AAanth}: tbd. }
#' \item{\strong{AAcanth}: tbd. }
#' \item{\strong{AAduranth}: tbd. }
#' \item{\strong{Rough}: Roughness - change in acoustic energy from one second to the next, summarized over timestep. }
#' \item{\strong{ADI_step}: Acoustic diversity index for the timestep (based on vegan::diversity) }
#' \item{\strong{Eveness_step}: ineq::Gini }
#' \item{\strong{pk}: tbd. }
#' \item{\strong{pkd}: tbd. }
#' \item{\strong{pks}: tbd. }
#' \item{\strong{Hm}: tbd. }
#' \item{\strong{HvPres}: tbd. }
#' \item{\strong{HvSPL}: tbd. }
#' \item{\strong{unlist(dif_L10L90)}: tbd. What is this colname?}
#' \item{\strong{Mamp}: maybe this is acoustic richness instead, according to the code (though at some point I concluded not based on plots) }
#' \item{\strong{NumCL}: tbd. }
#' \item{\strong{SP2}: this might be spectral persistence?? }
#' \item{\strong{CL1dur}: tbd. }
#' \item{\strong{CL1pk}: tbd. }
#' \item{\strong{CL1Leq}: tbd. }
#' \item{\strong{CL2dur}: tbd. }
#' \item{\strong{CL2pk}: tbd. }
#' \item{\strong{CL2Leq}: tbd. }
#' \item{\strong{CL3dur}: tbd. }
#' \item{\strong{CL3pk}: tbd. }
#' \item{\strong{CL3Leq}: tbd. }
#' \item{\strong{CL4dur}: tbd. }
#' \item{\strong{CL4pk}: tbd. }
#' \item{\strong{CL4Leq}: tbd. }
#' \item{\strong{vers}: PAMGUIDE version used for NVSPLs that gets carried over into here? Or is this AI versioning, documentation of which I have yet to locate? }
#' \item{\strong{AR}: Acoustic richness - i'm assuming that's what AR stands for? }
#' }
#'
#' @seealso \code{\link{wave_to_nvspl}}
#' @importFrom grDevices colorRampPalette
#' @importFrom ineq Gini
#' @importFrom moments kurtosis skewness
#' @importFrom NbClust NbClust
#' @importFrom stats median quantile var
#' @importFrom utils read.csv write.csv
#' @importFrom vegan diversity
#' @export
#' @examples
#' \dontrun{
#'
#' # Create an input and output directory for this example
#' dir.create('example-input-directory')
#' dir.create('example-output-directory')
#'
#' # Read in example NVSPL data
#' data(exampleNVSPL)
#'
#' # Write example NVSPL data to example input directory
#' for (i in 1:length(exampleNVSPL)) {
#' write.table(
#' x = exampleNVSPL[[i]],
#' file = paste0('example-input-directory/', names(exampleNVSPL)[i]),
#' sep = ',',
#' quote = FALSE
#' )
#' }
#'
#' # Run nvspl_to_ai to generate acoustic indices csv for example NVSPL files
#' nvspl_to_ai(
#' input.directory = 'example-input-directory',
#' output.directory = 'example-output-directory',
#' project = 'example-project'
#' )
#'
#' # View Results
#' (ai.results <- read.csv(
#' list.files(path = 'example-output-directory',
#' pattern = '.csv', full.names = TRUE))
#' )
#'
#' # Delete all temporary example files when finished
#' unlink(x = 'example-input-directory', recursive = TRUE)
#' unlink(x = 'example-output-directory', recursive = TRUE)
#'
#' }
#'
nvspl_to_ai <- function (input.directory, # top-level NVSPL file directory
output.directory, # where to put output files
project,
instrum = "SM4", # c('SM4', 'SM2' ?) # indicate audio recorder used
start.at.beginning = TRUE,
fminimum = 'H1600', # lower limit of freq band of interest (NOTE: HXXXX represents the center frequency of the octave band, eg. H1600 = 1413-1778 Hz)
fmaximum = 'H8000', # upper limit
fbinMax = 8, #sets the frequency bins to looks across for cluster analysis
BKfminimum = "H31p5", # lower limit of frequency bands for background noise (NOTE: if NVSPL calculated with PAMGuide, no data below H25)
BKfmaximum = "H1250", # upper limit
timestep = 10, # in minutes (NOTE: to get daily values, summarizing the timestep values is preferred over running the code for an entire day)
startday = 1, # optional start day if many files or error
plt = FALSE
)
{
reqd <- c('ineq', 'moments', 'NbClust', 'vegan')
for (i in 1:length(reqd)) {
if (!requireNamespace(reqd[i], quietly = TRUE)) {
stop(
paste0("Package ", reqd[i]," must be installed to use this function."),
call. = FALSE
)
}
}
# Ensure a forward slash at end ($) of input.directory
if (grepl("\\/$", input.directory) == FALSE) {
input.directory <- paste0(input.directory, '/')
}
# Ensure a forward slash at end ($) of output.directory
if (grepl("\\/$", output.directory) == FALSE) {
output.directory <- paste0(output.directory, '/')
}
## INITIALIZE PARAMETERS INPUT FILE FOR calculate_ACI_NVSPL_Generic.R
## (1) SET DIRECTORY WHERE ALL AUDIO FILES TO PROCESS ARE
# #if you want to process multiple directories (site) with same recording parameters, choose the highest directory
# workingDir = choose.dir(caption = "Choose top directory with NVSPL files to process")
# # For example: E:\\ProjectName\\AUDIO\\"
# Outdir = choose.dir(caption = "Choose directory to put Acoustic Index output files")
# # RECOMMEND: E:\\ProjectName\\ANALYSIS\\AcousticIndex"
workingDir <- input.directory
Outdir <- output.directory
## (2) LIST OF FILES AND SITES TO PROCESS
nvsplFiles <- list.files(workingDir, pattern="^NVSPL", recursive=T, full.names=T)
mySites <- sort(unique(gsub("NVSPL_(.+)_\\d{4}_\\d{2}_\\d{2}_\\d{2}.txt","\\1",basename(nvsplFiles))))
## (3) VERSION OF PAMGUIDE, see the most recent code
PAMGuideVersion <- 'V12noChunk_fixdBA' # CB: to be updated with any future tweaks/releases of this Github code
# Save params for posterity once all necessary objects have been initialized
params.name <- paste0("paramsFileAcousticIndex_", project, "_", instrum)
save(file = paste0(output.directory, params.name), list = ls(all.name = TRUE))
message('Output "params" file memorializing your inputs to this function has been saved in: \n ', output.directory, ' as ', params.name)
# "calculate_NVSPLtoAI_Generic"
#RUN LOOPs through sites (ss) then days (dys) within a site to calculate acoustic indices, background values
for (ss in 1:length(mySites) ) #ss = 1
{
OUTPUT_sites = NULL
OUTPUT_sitesD = NULL
siteFiles = nvsplFiles[grepl(paste("NVSPL_",mySites[ss],"_",sep=""), basename(nvsplFiles) )]
siteDays = sort(unique(gsub("NVSPL_.+_(\\d{4}_\\d{2}_\\d{2})_\\d{2}.txt","\\1",basename(siteFiles))))
#FORMAT matrices and data
uDayIndex = (unique(siteDays))
acis = NULL
acisD = NULL
# start on a specified day
if (startday == 1) { startday = uDayIndex[1]} # start on first day
dindx = grep(startday, uDayIndex)
for ( dd in dindx:length(uDayIndex)) #START loop through unique site days # dd = 1
{
cat('Calculating Acoustic Index for site: ', mySites[ss], " (", ss,' of ',length(mySites),') for file ', dd,' of ',
length(uDayIndex), " [", uDayIndex[dd], "]", '\n',sep="")
#GET files for a given day
dy = siteDays[dd]
siteDayFiles = siteFiles[grepl(dy,basename(siteFiles))]
#GET DATA IN THE RIGHT FORMAT
dayMatrix = NULL
for(sf in siteDayFiles) {
myTempMatrix = read.csv(sf)
timeData = matrix(as.numeric(unlist( strsplit( gsub(":"," ",substr(gsub('-',' ',as.character(myTempMatrix[,2])),0,19)),' ')) ),ncol=6, byrow=T )
timezone <- unique(myTempMatrix$timezone) # CB added. Needs to be carried through since occasionally varies based on who collected the data that year
myTempMatrix = cbind(timeData, myTempMatrix[,3:45] )
colnames(myTempMatrix)[1:6] = c("Yr","Mo", "Day", "Hr", "Min", "Sec")
myTempMatrix$timezone <- timezone
dayMatrix = rbind(dayMatrix, myTempMatrix)
#REVIEW spectrogram of the 1 hr file if desired
if (plt == TRUE)
{
myTempMatrixPLT <- cbind(timeData, myTempMatrix[,3:45] )
site <- gsub('.txt', '', as.character(basename(sf)) )
colnames(myTempMatrixPLT)[1:6] <- c("Yr","Mo", "Day", "Hr", "Min", "Sec")
dayMatrix2 <- myTempMatrixPLT
plotSecs <- timeData[,5] * 60 + timeData[,6]
plotFreqs <- grep("H\\d",colnames(dayMatrix2),value=T)
image(plotSecs,1:length(plotFreqs),
as.matrix(dayMatrix2[,plotFreqs]),
col=colorRampPalette(c("blue","orange","white"))(100),
xlab="Time (s)",ylab = "Freq Idx",
zlim=c(-8,85))
#locator() #allows you to pick points on the image
}
}
rm(myTempMatrix)
#CHECK to see if full minutes are included, remove any non-full minutes
test = do.call(paste, c(dayMatrix[c("Hr", "Min")], sep = "_"))
unHrMin = unique(test)
dayMatrix = cbind(dayMatrix,test)
#REMOVE rows with NAs (e.g. unique( dayMatrix$dbA ) )
dayMatrix = dayMatrix[!is.na(dayMatrix$dbA),]
# NOTE FROM CB: This was in the Github version of the "startAtBeginning" version
# (I can't find any other versions in Teams or Synology so this is what I'm going with)
# But I can't tell if this should be applied both the the start.at.begininng routine
# or to BOTH routines ... OR NONE AT ALL, and maybe this is deprecated since it
# didn't make it into the version on GLBA Synology???
# Seemslike it would be relevant either way
# if (start.at.beginning == TRUE) {
#REMOVE DUPLICATE TIMES
#noticed that sometimes if a day is slit over multiple audio files, you can get repeated rows... need to remove dublicate rows before segmenting by timestep
test2 = do.call(paste, c(dayMatrix[c("Hr", "Min","Sec")], sep = "_"))
dayMatrix = cbind(dayMatrix,test2)
dayMatrix = dayMatrix[!duplicated(dayMatrix$test2),]
# }
#A-weight 33 octave bands in dayMatrix
FREQ = colnames(dayMatrix[7:(33+6)])
aweight <- ( c(-63.4,-56.7,-50.5,-44.7, -39.4, -34.6, -30.2, -26.2, -22.5, - 19.1, -16.1,
-13.4, -10.9, -8.6, -6.6, -4.2, -3.2, -1.9, -0.8, 0, 0.6, 1, 1.2,
1.3, 1.2, 1.0, 0.5, -0.1, -1.1, -2.5, -4.3, -6.6, -9.3) )
dayMatrix33 = dayMatrix[ which( colnames(dayMatrix)==FREQ[1] ):
which( colnames(dayMatrix)==FREQ[33]) ]
dayMatrix33A = t( t(dayMatrix33) + aweight)
dayMatrix33A = cbind(dayMatrix[,1:6],dayMatrix33A)
#........................................................................................
## RUN acoustic indicies on each "timestep"
#........................................................................................
#SUBSET so sampling starts at the top of a time stamp
Output =NULL
Output1 =NULL
BKADI =NULL
# If using generic version and not start.at.beginning
if (start.at.beginning == FALSE ) {
for(i in 0:23) #hours: unique( dayMatrix$Hr )
{
for(j in 1:(60/timestep)) #timestep for each hour: unique( dayMatrix$Min )
{
matchMins = seq(timestep * (j - 1), by = 1, length.out = timestep)
# unweighted matrix
workingMatrix = dayMatrix[ dayMatrix$Hr == i & dayMatrix$Min %in% matchMins , ]
# remove rows with inf in the H25 column.... because in cases we saw all data were inf for that second.
workingMatrix = workingMatrix[is.finite(workingMatrix$H25),]
# A-weighted matrix
workingMatrixA <- dayMatrix33A[ dayMatrix$Hr == i & dayMatrix33A$Min %in% matchMins, ]
workingMatrixA = workingMatrixA[is.finite(workingMatrixA$H25),]
#remove rows with inf in the H25 column.... because in cases we saw all data were inf for that second.
dayMatrixlim = NULL
BKdayMatrixlimA = NULL
BKdayMatrixlim = NULL
dayMatrixlimA = NULL
if( dim(workingMatrix)[1] >= (timestep * 60)- 60 )
{
#TRUNCATE matrix in frequency bands of interest
dayMatrixlim = workingMatrix[which( colnames(workingMatrix)==fminimum ):which( colnames(workingMatrix)==fmaximum) ]
BKdayMatrixlim = workingMatrix[which( colnames(workingMatrix)== BKfminimum ):which( colnames(workingMatrix)== BKfmaximum) ]
dayMatrixlimA = workingMatrixA[which( colnames(workingMatrixA)== fminimum ):which( colnames(workingMatrixA)== fmaximum) ]
BKdayMatrixlimA= workingMatrixA[which( colnames(workingMatrixA)== BKfminimum ):which( colnames(workingMatrixA)== BKfmaximum) ]
#Number of frequency bands
nFQ = as.numeric( dim(dayMatrixlim) [2] )
nFQbk = as.numeric( dim(BKdayMatrixlim) [2] )
#Number of seconds
fileDur = as.numeric( dim(dayMatrixlim) [1] )
#........................................................................................
# CALCULATE per-frequency metrics- need in other indices (NOT SAVED!)
#........................................................................................
#(1) simple average backtound noise estimate for each 1/3 octave band
BKBirddB <- 10*log10( colMeans( 10^(dayMatrixlim/10) ) )
## NOTE: how to report these values:
# will not report- this is what you used for background noise estimate in the frequency band of the ADI
# how you calculated these values:
# In each timestep (10 min) for each 1/3 octave frequency band of interest (fminimum -fmaxium),
# 1) converted to pressures, 2) calcuted the mean pressure for each frequency band, and 3) converted back to dB
# (2) Background noise from #2 in Towsey, modified here as a level for each frequency band, not normalized!
BK_Towsey <- NULL
for (ff in 1:length(dayMatrixlim) ) { #loop through each frequency band
#(1) compute a histogram of lowest dB values, with a length equal to 1/8th of total values
tmp <- sort ( dayMatrixlim[,ff]) [ 1:( dim(dayMatrixlim)[1] /8) ]
#(2) smooth the histogram with moving average- did not do!!!
#!!!!! check this: filter(tmp,rep(1,5),method = "convolution")
#(3) find bin with the maximum count
tmp1 <- c(table(tmp)) #count for each dB bin
tmpval <- as.numeric( (tmp1)) #counts for each bin as, numeric
tmpnam <- as.numeric(names(tmp1)) #dB value for each bin, numeric
tmpc <- as.matrix( cbind(tmpnam,tmpval))
colmax <- as.numeric(which.max(tmpval))
#(4) accumulating counts in histogram bins below (3) until 68% of counts below (3) is reached
cutoff = sum( tmpval[ 1:colmax-1] ) * .68 #value to stop the summation at
# got an error when the first bin had the max # of values..... so just use the min value
if (cutoff == 0) {stploc = 0}
cntbk <- 0
for (h in 1:length(tmpval[1:colmax])-1 )
{
if (cntbk > cutoff) {
break }
loc = colmax - h #find the location to start the summation
cntbk = cntbk + tmpval[loc]
stploc = tmpnam[loc-1]
}
# (5) final calc: (3) + N(4)
if (cutoff == 0) {BK_Towsey[ff] = tmpnam[colmax] } else { BK_Towsey[ff] = tmpnam[colmax] + stploc*0.1 }
}
BK_Towsey_anth<- NULL
for (ff in 1:length(BKdayMatrixlim) ) { #loop through each frequency band
#(1) compute a histogram of lowest dB values, with a length equal to 1/8th of total values
tmp <- sort ( BKdayMatrixlim[,ff]) [ 1:( dim(BKdayMatrixlim)[1] /8) ]
#(2) smooth the histogram with moving average- did not do!!!
#!!!!! check this: filter(tmp,rep(1,5),method = "convolution")
#(3) find bin with the maximum count
tmp1 <- c(table(tmp)) #count for each dB bin
tmpval <- as.numeric( (tmp1)) #counts for each bin as, numeric
tmpnam <- as.numeric(names(tmp1)) #dB value for each bin, numeric
tmpc <- as.matrix( cbind(tmpnam,tmpval))
colmax <- as.numeric(which.max(tmpval))
#(4) accumulating counts in histogram bins below (3) until 68% of counts below (3) is reached
cutoff = sum( tmpval[ 1:colmax-1] ) * .68 #value to stop the summation at
# got an error when the first bin had the max # of values..... so just use the min value
if (cutoff == 0) {stploc = 0}
cntbk <- 0
for (h in 1:length(tmpval[1:colmax])-1 )
{
if (cntbk > cutoff) {
break }
loc = colmax - h #find the location to start the summation
cntbk = cntbk + tmpval[loc]
stploc = tmpnam[loc-1]
}
# (5) final calc: (3) + N(4)
if (cutoff == 0) {BK_Towsey_anth[ff] = tmpnam[colmax] } else { BK_Towsey_anth[ff] = tmpnam[colmax] + stploc*0.1 }
}
# (3) Exceedence levels by frequency band (not normalized!)
exceed = NULL
for (iy in 1:dim(dayMatrixlim)[2]) {
tmp <- quantile(dayMatrixlim[,iy], c(0.05, 0.1, 0.5, 0.9, 0.95), na.rm = TRUE)
exceed = cbind(exceed,tmp) }
colnames(exceed) <- colnames(dayMatrixlim)
L10 = (exceed[4,]) #L10 = 90th percentile
L90 = (exceed[2,]) #L90 = 10th percentile
difexceed = L10 - L90
#........................................................................................
#........................................................................................
#START ACOUSTIC INDEX CALCULATIONS
#........................................................................................
## (1a) CALCULATE ACI- SPLs
t <- array(0, dim(dayMatrixlim) - c(1,0))
for (frq in 1:(dim(t)[2])) # loop through frequency bands
{
t[,frq] <- abs( diff(dayMatrixlim[,frq]) )/ sum(dayMatrixlim[,frq])
}
ACIout = sum(t)# ACI results for each time chunck on a given day
rm(t)
## (1b) CALCULATE ACI- intensity (unlog)
dayMatrixlimI = 10^(dayMatrixlim/10) # convert to pressure
t <- array(0, dim(dayMatrixlimI) - c(1,0))
for (frq in 1:(dim(t)[2])) # loop through frequency bands
{
t[,frq] <- abs( diff(dayMatrixlimI[,frq]) )/ sum(dayMatrixlimI[,frq])
}
ACIoutI = sum(t)# ACI results for each time chunck on a given day
rm(t)
## (1b) CALCULATE ACI- normalized
dayMatrixlimN = normdBA(dayMatrixlim)
t <- array(0, dim(dayMatrixlimN) - c(1,0))
for (frq in 1:(dim(t)[2])) # loop through frequency bands
{
t[,frq] <- abs( diff(dayMatrixlimN[,frq]) )/ sum(dayMatrixlimN[,frq])
}
ACIoutN = sum(t)# ACI results for each time chunck on a given day
rm(t)
#........................................................................................
#........................................................................................
## (2,3,4,5) CALCULATE background noise SPL for timestep, gives 1 timestep background dB value
#A-weight the BK values
BKdBA_low = 10*log10( sum( 10^(BKdayMatrixlimA/10))/ (timestep*60))
BKdBA_bird = 10*log10( sum( 10^(dayMatrixlimA/10))/ (timestep*60) )
#unweight values
BKdB_low = 10*log10( sum( 10^(BKdayMatrixlim/10))/ (timestep*60))
BKdB_bird = 10*log10( sum( 10^(dayMatrixlim/10))/ (timestep*60) )
## NOTE: how to REPORT these values:
# xx dB re:1 uPa (BKfminimum Hz- BKfminimum Hz)
# how you calculated these values:
# In each timestep (10 min) over the frequency band of interest,
# 1) converted to pressures, 2) calcuted the mean pressure, and 3) converted back to dB
#........................................................................................
#........................................................................................
## (6,7) Average signal amplitude- #1 in Towsey et al., 2013
#modified as the mean dBA value for the timestep, normalized by the min/max (set at: -10 to 80 dB)
avgAMP = normdBA(mean (workingMatrix$dbA) )
L10AMP = normdBA(quantile (workingMatrix$dbA, .1) ) #10th percentile = L90
#........................................................................................
#........................................................................................
## (8,9,10) Spectal Entrophy (#10 in Towsey), Temporal entropy (#7 in Towsey), Entropy Index
Pres = colMeans(10^(dayMatrixlim/10)) #Leq for each Fq band over time period (mean of the pressures)
Pres2 = Pres/sum(Pres)
Hf = -sum( (Pres2 * log(Pres2))) /log(nFQ)
Leqt = ( rowMeans( (10^(dayMatrixlim/10)) ) ) #Leq for each second over entire band (could also used dBA)
Leqt2 = Leqt/sum(Leqt)
Ht = -sum( (Leqt2 * log(Leqt2)) / log(fileDur) )
EI = Ht * Hf
#........................................................................................
#........................................................................................
## (11,12,13,14) Biophony, Anthrophony, normalised difference in soundscape, ratio of A/B
BioPh = sum( colMeans(10^(dayMatrixlim/10)) )
AthPh = sum( colMeans(10^(BKdayMatrixlim/10)) )
SoundScapeI = (BioPh -AthPh ) / (BioPh + AthPh )
Bio_anth = (BioPh /AthPh )
#........................................................................................
#........................................................................................
## (15,16,17) acoustic activity, count of acoustic events, duration of acoustic events
AA = NULL
AAc = NULL
s2n <- NULL
for (f in 1:length(dayMatrixlim) ) {
AA[f] = length( dayMatrixlim[,f][ dayMatrixlim[,f] > BK_Towsey[f]+3 ] )/ length (dayMatrixlim[,f])
AAc[f] = length( dayMatrixlim[,f][ dayMatrixlim[,f] > BK_Towsey[f]+3 ] )
s2n[f] = max((dayMatrixlim[,f])) - BK_Towsey[f]
}
AA= sum(AA)
AAc = sum(AAc)
AAdur = NULL
for (f in 1:length(dayMatrixlim) ) {
#logical matrix...
temp <- dayMatrixlim[,f] > BK_Towsey[f]+3
#table(temp)["TRUE"] ; table(temp)["FALSE"]
temp2 <- temp*1 # convert to 0/1
#find consecutive 1 values...
rl <- rle(temp2)
len = rl$lengths
v = rl$value
if (length(v) == 1 )
{
if (v == 0) {AAdur[f] = 0 }
else {AAdur[f] = len }
next
}
cumsum = NULL
cntAA = 0
for ( qq in seq(from = 2, to = length(v), by =2) )
{
cntAA = cntAA +1
cumsum[cntAA] = len[qq]
}
AAdur[f] = mean(cumsum)
}
AAdur = median(AAdur)
#........................................................................................
## START HERE!!!!! (15,16,17) acoustic activity, count of acoustic events, duration of acoustic events for NOISE
AAanth = NULL
AAcanth = NULL
s2nanth <- NULL
AAduranth = NULL
for (f in 1:length(BKdayMatrixlim) ) {
AAanth[f] = length( BKdayMatrixlim[,f][ BKdayMatrixlim[,f] > BK_Towsey_anth[f]+3 ] )/ length (BKdayMatrixlim[,f])
AAcanth[f] = length( BKdayMatrixlim[,f][ BKdayMatrixlim[,f] > BK_Towsey_anth[f]+3 ] )
s2nanth[f] = max((BKdayMatrixlim[,f])) - BK_Towsey_anth[f]
}
AAanth= sum(AAanth)
AAcanth = sum(AAcanth)
for (f in 1:length(BKdayMatrixlim) ) {
#logical matrix...
temp <- BKdayMatrixlim[,f] > BK_Towsey_anth[f]+3
#table(temp)["TRUE"] ; table(temp)["FALSE"]
temp2 <- temp*1 # convert to 0/1
#find consecutive 1 values...
rl <- rle(temp2)
len = rl$lengths
v = rl$value
if (length(v) == 1 )
{
if (v == 0) {AAduranth[f] = 0 }
else {AAduranth[f] = len }
next
}
cumsum = NULL
cntAA = 0
for ( qq in seq(from = 2, to = length(v), by =2) )
{
cntAA = cntAA +1
cumsum[cntAA] = len[qq]
}
AAduranth[f] = mean(cumsum)
}
AAduranth = median(AAduranth)
#........................................................................................
#........................................................................................
## (18) Roughness
Rough = NULL
for (f in 1:length(dayMatrixlim) ){
x = dayMatrixlim[,f]
x <- x/max(x)
deriv2 <- diff(x, 1, 2)
Rough[f] <- sum(deriv2^2, na.rm = TRUE)
}
Rough = median(Rough)
#........................................................................................
#........................................................................................
## (19,20) Acoustic diversity, acoustic eveness
Score = NULL
for (f in 1:length(dayMatrixlim) )
{
Score[f] = length( dayMatrixlim[,f][dayMatrixlim[,f] > BK_Towsey[f]+3] )/ length (dayMatrixlim[,f])
}
ADI_step = diversity(Score, index = "shannon")
Eveness_step = Gini(Score)
#........................................................................................
#........................................................................................
## (21,22,23,24,25,26) frequency with max amplitude, total count of max for each freq band (normalized by total bands),
# kurtosis, skewness, entrophy of spectral max, entrophy of spectral variance)
# peak frequency
peakf = NULL
for (j in 1:dim(dayMatrixlim)[1] )
{ peakf[j] = (which.max( dayMatrixlim[j,] ) ) }
pk2 = matrix(0,1,dim(dayMatrixlim)[2])
for (uu in 1:nFQ) { pk2[uu] = sum(peakf == uu) }
colnames(pk2) = colnames(dayMatrixlim)
pk2nor = pk2/fileDur
pk = as.numeric ( gsub("H","",colnames(dayMatrixlim[which.max(pk2)]) ) )
# kurtosis- is the shape of peak frequencies normally distributed (kurtosis) ?
pkd = kurtosis( as.vector(pk2nor) )
# skewness- What is the skewness is this distribution? (symmetrical = 0, right skew = +)
pks = skewness(as.vector(pk2nor))
# entropy of Spectral Maxima
pk2[pk2 == 0] <- 1e-07
pk_prob = pk2/(sum(pk2)) # normalize
Hm = -sum( (pk_prob * log2(pk_prob) ) )/log2(nFQ)
# entropy of spectral variance- pressure and dB
Press = (10^(dayMatrixlim/10)) #conver back to pressure
Pv = NULL
for (v in 1:dim(Press)[2] ) {Pv[v] = var(Press[,v]) }
Pv2 = Pv/sum(Pv)
HvPres = -sum( (Pv2 * log2(Pv2)) ) / log2(nFQ)
Pv = NULL
for (v in 1:dim(dayMatrixlim)[2] ) {Pv[v] = var(dayMatrixlim[,v]) }
Pv2 = Pv/sum(Pv)
HvSPL = -sum( (Pv2 * log2(Pv2)) ) / log2(nFQ)
#........................................................................................
#........................................................................................
## (27) Normalize exceedence levels for dBA
Exceed_norm = normdBA ( quantile(workingMatrix$dbA, c(0.05, 0.1, 0.5, 0.9, 0.95),na.rm = TRUE) )
L10n = (Exceed_norm[4]) #L10 = 90th percentile
L90n = (Exceed_norm[2]) #L90 = 10th percentile
dif_L10L90 = L10n - L90n
#........................................................................................
#........................................................................................
## (28) Acoustic Richness- calculate temoral entropy and median broadband SPL for each minute and ranks results
Mamp = quantile( 10*log10( ( rowMeans( (10^(dayMatrixlim/10)) ) ) ), 0.5)
#........................................................................................
## (29) Spectral diversity (Sd)- determine optimal number of clusters for the timestep and how confident/explained
#!!!!!UNDER CONSTRUCTION!!!!!!!!!!!
# http://www.sthda.com/english/wiki/determining-the-optimal-number-of-clusters-3-must-known-methods-unsupervised-machine-learning
# http://www.sthda.com/english/wiki/determining-the-optimal-number-of-clusters-3-must-known-methods-unsupervised-machine-learning
x = scale(dayMatrixlim) # To standarize the variables
res = NbClust(x, distance = "euclidean", min.nc=2, max.nc=10, method = "kmeans", index = "silhouette")
# use all algothrums to find optimal cluster... takes way to long!
#res = NbClust(x, distance = "euclidean", min.nc=2, max.nc=(dim(x)[1])/60, method = "ward.D", index = "all")
# use "gap" method (Tibshirani et al. 2001) Smallest n_{c} such that criticalValue >= 0
#res = NbClust(x, distance = "euclidean", min.nc=2, max.nc=(dim(x)[1])/60, method = "ward.D", index = "gap")
# Euclidean distance : Usual square distance between the two vectors (2 norm). d(x,y)= (sum_{j=1}^{d} (x_j - y_j)^2)^(1/2)
# Ward method minimizes the total within-cluster variance. At each step the pair of clusters with minimum cluster distance are merged. To implement this method, at each step find the pair of clusters that leads to minimum increase in total within-cluster variance after merging. Two different algorithms are found in the literature for Ward clustering. The one used by option "ward.D" (equivalent to the only Ward option "ward" in R versions <= 3.0.3) does not implement Ward's (1963) clustering criterion, whereas option "ward.D2" implements that criterion (Murtagh and Legendre 2013). With the latter, the dissimilarities are squared before cluster updating.
# fviz_nbclust(x, pam, method = "gap") # to visualize the results
#res$All.index
#res$All.CriticalValues
x2 = as.data.frame(t(res$Best.nc))
x4 = (table(x2$Number_clusters))
NumCL = as.numeric(names(which.max(x4))) # "R"
#........................................................................................
# (30) Spectral persistence
#........................................................................................
BP = as.numeric(res$Best.partition)
BPrle <- rle(BP)# need to find how many seconds each cluster "persisted" for
# average duration of the clusters which persist for longer than one frame, so don't count 1s!
SP2= mean(BPrle$lengths[BPrle$lengths>1])
# what is the peak frequency, average broadband SPL, and average duration for each cluster
cluster = res$Best.partition
tempCL = cbind(dayMatrixlim,cluster) # adds cluster number to matrix
tempCL$max = apply(tempCL[,1:fbinMax], 1, which.max) # add max SPL column # to the matrix
tempCL$pkFq = round(as.numeric ( gsub("H","", colnames(tempCL[tempCL$max] ) ) )) # converts column number to frequency band
tempCL$spl = 10*log10 (rowMeans( (10^(tempCL[,1:fbinMax]/10)) ) )
cls = unique(res$Best.partition) #unique clusters in this data set
CLdets = NULL
for (clls in 1:4) {
tmpCL = tempCL[tempCL$cluster == cls[clls],]
tmpCLdur = (BPrle$lengths[BPrle$values == cls[clls] ] )
Cldur = mean(tmpCLdur,rm.na = T) # mean duration of each cluster
#Cldur = mean(tmpCLdur[tmpCLdur>1]) # mean duration of each cluster
tmpCLpk = table(tmpCL$pkFq)
CLpk = as.numeric( rownames( as.data.frame(which.max(tmpCLpk))[1] ) ) # peak frequency
if (is.na( tmpCL[1,1]) == T ){ CLpk = NA}
CLspl = median(tmpCL$spl)
CLdets = cbind(CLdets, cbind(Cldur, CLpk, CLspl) )
rm(tmpCL,tmpCLpk, tmpCLdur,Cldur,CLpk, CLspl)
}
colnames(CLdets) = c("CL1dur", "CL1pk", "CL1Leq","CL2dur", "CL2pk", "CL2Leq","CL3dur", "CL3pk", "CL3Leq","CL4dur", "CL4pk", "CL4Leq")
rm(tempCL,cls)
#........................................................................................
#END ACOUSTIC INDEX CALCULATIONS
#........................................................................................
#APPEND to a datasheet with first minute of chunk...
Output = rbind(Output,
cbind(mySites[ss], siteDays[dd],
workingMatrix[1,1:6], timestep, dim(workingMatrix)[1],
ACIout, ACIoutI, ACIoutN, BKdB_low, BKdBA_low,
BKdB_bird, BKdBA_bird,
avgAMP, L10AMP, Hf, Ht, EI, BioPh, AthPh,
SoundScapeI, Bio_anth,
AA, AAc, AAdur, AAanth, AAcanth, AAduranth,
Rough, ADI_step, Eveness_step,
pk, pkd, pks, Hm, HvPres, HvSPL, unlist(dif_L10L90),
Mamp, NumCL, SP2,CLdets,
PAMGuideVersion, timezone) )
BKADI = rbind(BKADI,BKBirddB) # rbind(BKADI,cbind(workingMatrix[1,1:6],BKBirddB))
#colnames (site, Day, YY, MM, DD, HH, MM, SS, timestep, #Sec,
#ACI, BKlow_dB, BKlow_dBA, BKbird_dB, BKbird_dBA,
#avg_nom, L10_norm, SpectralEntropy, TemporalEntropy, EntropyIndex, Biophony, Anthrophony, SoundScapeIndex, RatioSoundScapeIndex,
# AcousticActivity, AcousticEvents, DurAcousticEvents, Roughness, AcousticDiversity, Eveness)
#........................................................................................
#some clean up
#rm(ACIout, BKdB_low,BKdBA_low, BKdB_bird, BKdBA_bird,BKBirddB)
} # else {cat(i," hr ", j, " min NO DATA",'\n',sep="") }
} # END of minute loop
} # END of hour loop
} # end if using generic and not 'start.at.beginning'
# If using start.at.beginning
if (start.at.beginning == TRUE) {
for(i in ( seq(from = 1, to = dim(dayMatrix)[1], by = (timestep*60)) ) )
{
# unweighted matrix
workingMatrix = dayMatrix[i:(i+((timestep*60)-1)),]
# CHECK: cat(i,": ", "start:", workingMatrix[1,5], ":",workingMatrix[1,6], "(", dim(workingMatrix)[1],")\n")
# Inf values are a problem in calculations, so need to remove...
idx= NULL
for (mm in 1: dim(workingMatrix)[1] ) {
test = !is.infinite( unlist(workingMatrix[mm,]) )
if( all(test) == FALSE ) {
#cat("this works!")
idx = c(idx,mm)
} }
if (!is.null(idx)) { workingMatrix = workingMatrix[-idx,] }
workingMatrix= workingMatrix[rowSums(is.na(workingMatrix)) != ncol(workingMatrix), ]
# A-weighted matrix
workingMatrixA = dayMatrix33A[i:(i+((timestep*60)-1)),]
# Inf values are a problem in calculations, so need to remove
idx= NULL
for (mm in 1: dim(workingMatrixA)[1] ) {
test = !is.infinite( unlist(workingMatrixA[mm,]) )
if( all(test) == FALSE ) {
#cat("this works!")
idx = c(idx,mm)
} }
if (!is.null(idx)) { workingMatrixA = workingMatrixA[-idx,] }
workingMatrixA= workingMatrixA[rowSums(is.na(workingMatrixA)) != ncol(workingMatrixA), ]
dayMatrixlim = NULL
BKdayMatrixlimA = NULL
BKdayMatrixlim = NULL
dayMatrixlimA = NULL
if( dim(workingMatrix)[1] >= (timestep * 60) - 60 )
{
#TRUNCATE matrix in frequency bands of interest
dayMatrixlim = workingMatrix[which( colnames(workingMatrix)==fminimum ):which( colnames(workingMatrix)==fmaximum) ]
BKdayMatrixlim = workingMatrix[which( colnames(workingMatrix)== BKfminimum ):which( colnames(workingMatrix)== BKfmaximum) ]
dayMatrixlimA = workingMatrixA[which( colnames(workingMatrixA)== fminimum ):which( colnames(workingMatrixA)== fmaximum) ]
BKdayMatrixlimA= workingMatrixA[which( colnames(workingMatrixA)== BKfminimum ):which( colnames(workingMatrixA)== BKfmaximum) ]
#Number of frequency bands
nFQ = as.numeric( dim(dayMatrixlim) [2] )
nFQbk = as.numeric( dim(BKdayMatrixlim) [2] )
#Number of seconds
fileDur = as.numeric( dim(dayMatrixlim) [1] )
#........................................................................................
# CALCULATE per-frequency metrics- need in other indices (NOT SAVED!)
#........................................................................................
#(1) simple average backtound noise estimate for each 1/3 octave band
BKBirddB <- 10*log10( colMeans( 10^(dayMatrixlim/10) ) )
## NOTE: how to report these values:
# will not report- this is what you used for background noise estimate in the frequency band of the ADI
# how you calculated these values:
# In each timestep (10 min) for each 1/3 octave frequency band of interest (fminimum -fmaxium),
# 1) converted to pressures, 2) calcuted the mean pressure for each frequency band, and 3) converted back to dB
# (2) Background noise from #2 in Towsey, modified here as a level for each frequency band, not normalized!
BK_Towsey <- NULL
for (ff in 1:length(dayMatrixlim) ) { #loop through each frequency band
#(1) compute a histogram of lowest dB values, with a length equal to 1/8th of total values
tmp <- sort ( dayMatrixlim[,ff]) [ 1:( dim(dayMatrixlim)[1] /8) ]
#(2) smooth the histogram with moving average- did not do!!!
#!!!!! check this: filter(tmp,rep(1,5),method = "convolution")
#(3) find bin with the maximum count
tmp1 <- c(table(tmp)) #count for each dB bin
tmpval <- as.numeric( (tmp1)) #counts for each bin as, numeric
tmpnam <- as.numeric(names(tmp1)) #dB value for each bin, numeric
tmpc <- as.matrix( cbind(tmpnam,tmpval))
colmax <- as.numeric(which.max(tmpval))
#(4) accumulating counts in histogram bins below (3) until 68% of counts below (3) is reached
cutoff = sum( tmpval[ 1:colmax-1] ) * .68 #value to stop the summation at
# got an error when the first bin had the max # of values..... so just use the min value
if (cutoff == 0) {stploc = 0}
cntbk <- 0
for (h in 1:length(tmpval[1:colmax])-1 )
{
if (cntbk > cutoff) {
break }
loc = colmax - h #find the location to start the summation
cntbk = cntbk + tmpval[loc]
stploc = tmpnam[loc-1]
}
# (5) final calc: (3) + N(4)
if (cutoff == 0) {BK_Towsey[ff] = tmpnam[colmax] } else { BK_Towsey[ff] = tmpnam[colmax] + stploc*0.1 }
}
BK_Towsey_anth<- NULL
for (ff in 1:length(BKdayMatrixlim) ) { #loop through each frequency band
#(1) compute a histogram of lowest dB values, with a length equal to 1/8th of total values
tmp <- sort ( BKdayMatrixlim[,ff]) [ 1:( dim(BKdayMatrixlim)[1] /8) ]
#(2) smooth the histogram with moving average- did not do!!!
#!!!!! check this: filter(tmp,rep(1,5),method = "convolution")
#(3) find bin with the maximum count
tmp1 <- c(table(tmp)) #count for each dB bin
tmpval <- as.numeric( (tmp1)) #counts for each bin as, numeric
tmpnam <- as.numeric(names(tmp1)) #dB value for each bin, numeric
tmpc <- as.matrix( cbind(tmpnam,tmpval))
colmax <- as.numeric(which.max(tmpval))
#(4) accumulating counts in histogram bins below (3) until 68% of counts below (3) is reached
cutoff = sum( tmpval[ 1:colmax-1] ) * .68 #value to stop the summation at
# got an error when the first bin had the max # of values..... so just use the min value
if (cutoff == 0) {stploc = 0}
cntbk <- 0
for (h in 1:length(tmpval[1:colmax])-1 )
{
if (cntbk > cutoff) {
break }
loc = colmax - h #find the location to start the summation
cntbk = cntbk + tmpval[loc]
stploc = tmpnam[loc-1]
}
# (5) final calc: (3) + N(4)
if (cutoff == 0) {BK_Towsey_anth[ff] = tmpnam[colmax] } else { BK_Towsey_anth[ff] = tmpnam[colmax] + stploc*0.1 }
}
# (3) Exceedence levels by frequency band (not normalized!)
exceed = NULL
for (iy in 1:dim(dayMatrixlim)[2]) {
tmp <- quantile(dayMatrixlim[,iy], c(0.05, 0.1, 0.5, 0.9, 0.95), na.rm = TRUE)
exceed = cbind(exceed,tmp) }
colnames(exceed) <- colnames(dayMatrixlim)
L10 = (exceed[4,]) #L10 = 90th percentile
L90 = (exceed[2,]) #L90 = 10th percentile
difexceed = L10 - L90
#........................................................................................
#........................................................................................
#START ACOUSTIC INDEX CALCULATIONS
#........................................................................................
## (1a) CALCULATE ACI- SPLs
t <- array(0, dim(dayMatrixlim) - c(1,0))
for (frq in 1:(dim(t)[2])) # loop through frequency bands
{
t[,frq] <- abs( diff(dayMatrixlim[,frq]) )/ sum(dayMatrixlim[,frq])
}
ACIout = sum(t)# ACI results for each time chunck on a given day
rm(t)
## (1b) CALCULATE ACI- intensity (unlog)
dayMatrixlimI = 10^(dayMatrixlim/10) # convert to pressure
t <- array(0, dim(dayMatrixlimI) - c(1,0))
for (frq in 1:(dim(t)[2])) # loop through frequency bands
{
t[,frq] <- abs( diff(dayMatrixlimI[,frq]) )/ sum(dayMatrixlimI[,frq])
}
ACIoutI = sum(t)# ACI results for each time chunk on a given day
rm(t)
## (1b) CALCULATE ACI- normalized
dayMatrixlimN = normdBA(dayMatrixlim)
t <- array(0, dim(dayMatrixlimN) - c(1,0))
for (frq in 1:(dim(t)[2])) # loop through frequency bands
{
t[,frq] <- abs( diff(dayMatrixlimN[,frq]) )/ sum(dayMatrixlimN[,frq])
}
ACIoutN = sum(t)# ACI results for each time chunck on a given day
rm(t)
#........................................................................................
#........................................................................................
## (2,3,4,5) CALCULATE background noise SPL for timestep, gives 1 timestep background dB value
#A-weight the BK values
BKdBA_low = 10*log10( sum( 10^(BKdayMatrixlimA/10))/ (timestep*60))
BKdBA_bird = 10*log10( sum( 10^(dayMatrixlimA/10))/ (timestep*60) )
#unweight values
BKdB_low = 10*log10( sum( 10^(BKdayMatrixlim/10))/ (timestep*60))
BKdB_bird = 10*log10( sum( 10^(dayMatrixlim/10))/ (timestep*60) )
## NOTE: how to REPORT these values:
# xx dB re:1 uPa (BKfminimum Hz- BKfminimum Hz)
# how you calculated these values:
# In each timestep (10 min) over the frequency band of interest,
# 1) converted to pressures, 2) calcuted the mean pressure, and 3) converted back to dB
#........................................................................................
#........................................................................................
## (6,7) Average signal amplitude- #1 in Towsey et al., 2013
#modified as the mean dBA value for the timestep, normalized by the min/max (set at: -10 to 80 dB)
avgAMP = normdBA(mean (workingMatrix$dbA) )
L10AMP = normdBA(quantile (workingMatrix$dbA, .1) ) #10th percentile = L90
#........................................................................................
#........................................................................................
## (8,9,10) Spectal Entrophy (#10 in Towsey), Temporal entropy (#7 in Towsey), Entropy Index
Pres = colMeans(10^(dayMatrixlim/10)) #Leq for each Fq band over time period (mean of the pressures)
Pres2 = Pres/sum(Pres)
Hf = -sum( (Pres2 * log(Pres2))) /log(nFQ)
Leqt = ( rowMeans( (10^(dayMatrixlim/10)) ) ) #Leq for each second over entire band (could also used dBA)
Leqt2 = Leqt/sum(Leqt)
Ht = -sum( (Leqt2 * log(Leqt2)) / log(fileDur) )
EI = Ht * Hf
#........................................................................................
#........................................................................................
## (11,12,13,14) Biophony, Anthrophony, normalised difference in soundscape, ratio of A/B
BioPh = sum( colMeans(10^(dayMatrixlim/10)) )
AthPh = sum( colMeans(10^(BKdayMatrixlim/10)) )
SoundScapeI = (BioPh -AthPh ) / (BioPh + AthPh )
Bio_anth = (BioPh /AthPh )
#........................................................................................
#........................................................................................
## (15,16,17) acoustic activity, count of acoustic events, duration of acoustic events
AA = NULL
AAc = NULL
s2n <- NULL
for (f in 1:length(dayMatrixlim) ) {
AA[f] = length( dayMatrixlim[,f][ dayMatrixlim[,f] > BK_Towsey[f]+3 ] )/ length (dayMatrixlim[,f])
AAc[f] = length( dayMatrixlim[,f][ dayMatrixlim[,f] > BK_Towsey[f]+3 ] )
s2n[f] = max((dayMatrixlim[,f])) - BK_Towsey[f]
}
AA= sum(AA)
AAc = sum(AAc)
AAdur = NULL
for (f in 1:length(dayMatrixlim) ) {
#logical matrix...
temp <- dayMatrixlim[,f] > BK_Towsey[f]+3
#table(temp)["TRUE"] ; table(temp)["FALSE"]
temp2 <- temp*1 # convert to 0/1
#find consecutive 1 values...
rl <- rle(temp2)
len = rl$lengths
v = rl$value
if (length(v) == 1 )
{
if (v == 0) {AAdur[f] = 0 }
else {AAdur[f] = len }
next
}
cumsum = NULL
cntAA = 0
for ( qq in seq(from = 2, to = length(v), by =2) )
{
cntAA = cntAA +1
cumsum[cntAA] = len[qq]
}
AAdur[f] = mean(cumsum)
}
AAdur = median(AAdur)
#........................................................................................
## (15,16,17) acoustic activity, count of acoustic events, duration of acoustic events for NOISE
AAanth = NULL
AAcanth = NULL
s2nanth <- NULL
AAduranth = NULL
for (f in 1:length(BKdayMatrixlim) ) {
AAanth[f] = length( BKdayMatrixlim[,f][ BKdayMatrixlim[,f] > BK_Towsey_anth[f]+3 ] )/ length (BKdayMatrixlim[,f])
AAcanth[f] = length( BKdayMatrixlim[,f][ BKdayMatrixlim[,f] > BK_Towsey_anth[f]+3 ] )
s2nanth[f] = max((BKdayMatrixlim[,f])) - BK_Towsey_anth[f]
}
AAanth= sum(AAanth)
AAcanth = sum(AAcanth)
for (f in 1:length(BKdayMatrixlim) ) {
#logical matrix...
temp <- BKdayMatrixlim[,f] > BK_Towsey_anth[f]+3
#table(temp)["TRUE"] ; table(temp)["FALSE"]
temp2 <- temp*1 # convert to 0/1
#find consecutive 1 values...
rl <- rle(temp2)
len = rl$lengths
v = rl$value
if (length(v) == 1 )
{
if (v == 0) {AAduranth[f] = 0 }
else {AAduranth[f] = len }
next
}
cumsum = NULL
cntAA = 0
for ( qq in seq(from = 2, to = length(v), by =2) )
{
cntAA = cntAA +1
cumsum[cntAA] = len[qq]
}
AAduranth[f] = mean(cumsum)
}
AAduranth = median(AAduranth)
#........................................................................................
#........................................................................................
## (18) Roughness
Rough = NULL
for (f in 1:length(dayMatrixlim) ){
x = dayMatrixlim[,f]
x <- x/max(x)
deriv2 <- diff(x, 1, 2)
Rough[f] <- sum(deriv2^2, na.rm = TRUE)
}
Rough = median(Rough)
#........................................................................................
#........................................................................................
## (19,20) Acoustic diversity, acoustic eveness
Score = NULL
for (f in 1:length(dayMatrixlim) )
{
Score[f] = length( dayMatrixlim[,f][dayMatrixlim[,f] > BK_Towsey[f]+3] )/ length (dayMatrixlim[,f])
}
ADI_step = diversity(Score, index = "shannon")
Eveness_step = Gini(Score)
#........................................................................................
#........................................................................................
## (21,22,23,24,25,26) frequency with max amplitude, total count of max for each freq band (normalized by total bands),
# kurtosis, skewness, entrophy of spectral max, entrophy of spectral variance)
# peak frequency
peakf = NULL
for (j in 1:dim(dayMatrixlim)[1] )
{ peakf[j] = (which.max( dayMatrixlim[j,] ) ) }
pk2 = matrix(0,1,dim(dayMatrixlim)[2])
for (uu in 1:nFQ) { pk2[uu] = sum(peakf == uu) }
colnames(pk2) = colnames(dayMatrixlim)
pk2nor = pk2/fileDur
pk = as.numeric ( gsub("H","",colnames(dayMatrixlim[which.max(pk2)]) ) )
# kurtosis- is the shape of peak frequencies normally distributed (kurtosis) ?
pkd = kurtosis( as.vector(pk2nor) )
# skewness- What is the skewness is this distribution? (symmetrical = 0, right skew = +)
pks = skewness(as.vector(pk2nor))
# entropy of Spectral Maxima
pk2[pk2 == 0] <- 1e-07
pk_prob = pk2/(sum(pk2)) # normalize
Hm = -sum( (pk_prob * log2(pk_prob) ) )/log2(nFQ)
# entropy of spectral variance- pressure and dB
Press = (10^(dayMatrixlim/10)) #conver back to pressure
Pv = NULL
for (v in 1:dim(Press)[2] ) {Pv[v] = var(Press[,v]) }
Pv2 = Pv/sum(Pv)
HvPres = -sum( (Pv2 * log2(Pv2)) ) / log2(nFQ)
Pv = NULL
for (v in 1:dim(dayMatrixlim)[2] ) {Pv[v] = var(dayMatrixlim[,v]) }
Pv2 = Pv/sum(Pv)
HvSPL = -sum( (Pv2 * log2(Pv2)) ) / log2(nFQ)
#........................................................................................
#........................................................................................
## (27) Normalize exceedence levels for dBA
Exceed_norm = normdBA ( quantile(workingMatrix$dbA, c(0.05, 0.1, 0.5, 0.9, 0.95),na.rm = TRUE) )
L10n = (Exceed_norm[4]) #L10 = 90th percentile
L90n = (Exceed_norm[2]) #L90 = 10th percentile
dif_L10L90 = L10n - L90n
#........................................................................................
#........................................................................................
## (28) Acoustic Richness- calculate temoral entropy and median broadband SPL for each minute and ranks results
Mamp = quantile( 10*log10( ( rowMeans( (10^(dayMatrixlim/10)) ) ) ), 0.5)
#........................................................................................
## (29) Spectral diversity (Sd)- determine optimal number of clusters for the timestep and how confident/explained
# http://www.sthda.com/english/wiki/determining-the-optimal-number-of-clusters-3-must-known-methods-unsupervised-machine-learning
# http://www.sthda.com/english/wiki/determining-the-optimal-number-of-clusters-3-must-known-methods-unsupervised-machine-learning
x = scale(dayMatrixlim) # To standarize the variables
#get an error when NAN are present in x- not sure why it creates NAN- can I just skip these time steps?
res = NbClust(x, distance = "euclidean", min.nc=2, max.nc=10, method = "kmeans", index = "silhouette")
# use all algothrums to find optimal cluster... takes way to long!
#res = NbClust(x, distance = "euclidean", min.nc=2, max.nc=(dim(x)[1])/60, method = "ward.D", index = "all")
# use "gap" method (Tibshirani et al. 2001) Smallest n_{c} such that criticalValue >= 0
#res = NbClust(x, distance = "euclidean", min.nc=2, max.nc=(dim(x)[1])/60, method = "ward.D", index = "gap")
# Euclidean distance : Usual square distance between the two vectors (2 norm). d(x,y)= (sum_{j=1}^{d} (x_j - y_j)^2)^(1/2)
# Ward method minimizes the total within-cluster variance. At each step the pair of clusters with minimum cluster distance are merged. To implement this method, at each step find the pair of clusters that leads to minimum increase in total within-cluster variance after merging. Two different algorithms are found in the literature for Ward clustering. The one used by option "ward.D" (equivalent to the only Ward option "ward" in R versions <= 3.0.3) does not implement Ward's (1963) clustering criterion, whereas option "ward.D2" implements that criterion (Murtagh and Legendre 2013). With the latter, the dissimilarities are squared before cluster updating.
# fviz_nbclust(x, pam, method = "gap") # to visualize the results
#res$All.index
#res$All.CriticalValues
x2 = as.data.frame(t(res$Best.nc))
x4 = (table(x2$Number_clusters))
NumCL = as.numeric(names(which.max(x4))) # "R"
#........................................................................................
# (30) Spectral persistence
#........................................................................................
BP = as.numeric(res$Best.partition)
BPrle <- rle(BP)# need to find how many seconds each cluster "persisted" for
# average duration of the clusters which persist for longer than one frame, so don't count 1s!
SP2= mean(BPrle$lengths[BPrle$lengths>1])
# what is the peak frequency, average broadband SPL, and average duration for each cluster
cluster = res$Best.partition
tempCL = cbind(dayMatrixlim,cluster) # adds cluster number to matrix
tempCL$max = apply(tempCL[,1:fbinMax], 1, which.max) # add max SPL column # to the matrix
tempCL$pkFq = round(as.numeric ( gsub("H","", colnames(tempCL[tempCL$max] ) ) )) # converts column number to frequency band
tempCL$spl = 10*log10 (rowMeans( (10^(tempCL[,1:fbinMax]/10)) ) )
cls = unique(res$Best.partition) #unique clusters in this data set
CLdets = NULL
for (clls in 1:4) {
tmpCL = tempCL[tempCL$cluster == cls[clls],]
tmpCLdur = (BPrle$lengths[BPrle$values == cls[clls] ] )
Cldur = mean(tmpCLdur,rm.na = T) # mean duration of each cluster
tmpCLpk = table(tmpCL$pkFq)
CLpk = as.numeric( rownames( as.data.frame(which.max(tmpCLpk))[1] ) ) # peak frequency
if (is.na( tmpCL[1,1]) == T ){ CLpk = NA}
CLspl = median(tmpCL$spl)
CLdets = cbind(CLdets, cbind(Cldur, CLpk, CLspl) )
rm(tmpCL,tmpCLpk, tmpCLdur,Cldur,CLpk, CLspl)
}
colnames(CLdets) = c("CL1dur", "CL1pk", "CL1Leq","CL2dur", "CL2pk", "CL2Leq","CL3dur", "CL3pk", "CL3Leq","CL4dur", "CL4pk", "CL4Leq")
rm(tempCL,cls)
#........................................................................................
#END ACOUSTIC INDEX CALCULATIONS
#........................................................................................
#APPEND to a datasheet with first minute of chunk...
Output = rbind(Output,
cbind(mySites[ss], siteDays[dd],
workingMatrix[1,1:6], timestep, dim(workingMatrix)[1],
ACIout, ACIoutI, ACIoutN, BKdB_low, BKdBA_low,
BKdB_bird, BKdBA_bird,
avgAMP, L10AMP, Hf, Ht, EI, BioPh, AthPh,
SoundScapeI, Bio_anth,
AA, AAc, AAdur, AAanth, AAcanth, AAduranth,
Rough, ADI_step, Eveness_step,
pk, pkd, pks, Hm, HvPres, HvSPL, unlist(dif_L10L90), Mamp,
NumCL, SP2,CLdets,
PAMGuideVersion, timezone) )
BKADI = rbind(BKADI,BKBirddB) # rbind(BKADI,cbind(workingMatrix[1,1:6],BKBirddB))
#colnames (site, Day, YY, MM, DD, HH, MM, SS, timestep, #Sec,
#ACI, BKlow_dB, BKlow_dBA, BKbird_dB, BKbird_dBA,
#avg_nom, L10_norm, SpectralEntropy, TemporalEntropy, EntropyIndex, Biophony, Anthrophony, SoundScapeIndex, RatioSoundScapeIndex,
# AcousticActivity, AcousticEvents, DurAcousticEvents, Roughness, AcousticDiversity, Eveness)
#........................................................................................
#some clean up
#rm(ACIout, BKdB_low,BKdBA_low, BKdB_bird, BKdBA_bird,BKBirddB)
} # else {cat(i," hr ", j, " min NO DATA",'\n',sep="") }
}# end of timestep loop
} # end if using start.at.beginning
## append all TIMESTEP data for each day together
Output$AR = (rank(Output$Mamp) * rank(Output$Ht))/(length(Output)^2)
acis = rbind(acis, Output)
#........................................................................................
## RUN acoustic indicies on each DAY- NOT PART OF THIS CODE see version 17!
# CB ^oh no. what does this mean?? what is version 17? Is this tracked somewhere?
#........................................................................................
dayMatrixlim1 = NULL
BKdayMatrixlim1 = NULL
BKdayMatrixlimA1 = NULL
dayMatrixlimA1 = NULL
} # END of loop for DAYS
acisD = as.data.frame(acisD)
acisD$AR = (rank(acisD$Mamp1)* rank(acisD$HtDay))/(length(acisD)^2)
OUTPUT_sites = rbind(OUTPUT_sites, acis)
OUTPUT_sitesD = rbind(OUTPUT_sitesD, acisD)
## WRITE OUT DATA, for each site
colnames(OUTPUT_sites)[2] ="Date"
colnames(OUTPUT_sites)[1] ="Site"
colnames(OUTPUT_sites)[10] ="SampleLength_sec"
dstmp = Sys.Date()
savFile <- 1
if (savFile == 1) {
# TIMESTEP DATA
# Name file according to whether the start.at.beginning routine was used
ifelse(test = start.at.beginning == TRUE,
yes = outFileName <- paste0(Outdir, "\\", mySites[ss], "_",
timestep, "mins", "_NVSPL_AcousticIndexStartatBegin_Created", dstmp, ".csv"),
no = outFileName <- paste0(Outdir, "\\", mySites[ss], "_",
timestep, "mins", "_NVSPL_AcousticIndex_Created", dstmp, ".csv"))
write.csv(OUTPUT_sites, file=outFileName, na ="NaN", quote=F, row.names=F)
}
} # END of loop for SITES
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.