knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
# install.packages("devtools") # devtools::install_github("DenaJGibbon/gibbonR") library(gibbonR)
In 'gibbonR' there are two ways that you can format your training data. The first can be a set of labelled .wav clips with the class indicated in the name of the file (e.g., 'gibbon_01.wav' and 'noise_01.wav'). The second is to have a folder of selection tables created in Raven Pro (K. Lisa Yang Center for Conservation Bioacoustics) and a folder with the associated '.wav' files. For the second approach there must be an annotation column indicating the call type and it is assumed that all signals of interest are annotated, and the rest of the files contain only background noise.
TrainingWavFilesDir <- "data/MultipleSoundClasses/" trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg="TRUE") trainingdata$class <- as.factor(trainingdata$class)
trainingdata$class <- as.factor(trainingdata$class) ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", cross = 25, probability = TRUE) print(paste('SVM accuracy',ml.model.svm$tot.accuracy)) ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class) print(ml.model.rf)
# Specify the folder where the training data will be saved TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables" # Directory with annotated selection tables AnnotatedSelectionTables <- list.files("/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/GibbonTrainingSelectionTables/", full.names = T) # Directory with corresponding .wav files AnnotatedWaveFiles <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTrainingFiles/",full.names = T) AnnotatedWaveFilesShort <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTrainingFiles/",full.names = F) AnnotatedWaveFilesShort <- str_split_fixed(AnnotatedWaveFilesShort,pattern = '.wav', n=2)[,1] # Loop to cut out the corresponding annotations into short clips for(i in 1: length(AnnotatedSelectionTables)){ # Read in selection table TempSelectionTable <- read.delim2(AnnotatedSelectionTables[i]) # Find the corresponding soundfile SoundFileIndex <- which(str_detect(AnnotatedSelectionTables[i],AnnotatedWaveFilesShort)) TempAnnotateWave <- readWave(AnnotatedWaveFiles[SoundFileIndex]) ShortSoundClips <- lapply(1:nrow(TempSelectionTable), function(j) extractWave(TempAnnotateWave, from= as.numeric(TempSelectionTable[j,]$Begin.Time..s.), to=as.numeric(TempSelectionTable[j,]$ End.Time..s.), xunit = c("time"),plot=F,output="Wave")) # Write wave files to folder for(k in 1:length(ShortSoundClips)){ TempClip <- ShortSoundClips[[k]] WavFileName <- paste(TrainingDataFolderLocation,'/female.gibbon_', k, '.wav',sep="") writeWave(TempClip,WavFileName,extensible = F) } }
# Specify the folder where the training data will be saved TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/" # Directory with annotated selection tables NoiseSelectionTables <- list.files("/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/NoiseSelectionTables/", full.names = T) # Directory with corresponding .wav files NoiseWaveFiles <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/NoiseFiles/",full.names = T) NoiseWaveFilesShort <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/NoiseFiles/",full.names = F) NoiseWaveFilesShort <- str_split_fixed(NoiseWaveFilesShort,pattern = '.wav', n=2)[,1] for(i in 1:length(NoiseSelectionTables)){ # Find the corresponding soundfile SoundFileIndex <- which(str_detect(NoiseSelectionTables[i],NoiseWaveFilesShort)) DetectBLED(input=NoiseWaveFiles[SoundFileIndex], min.freq = 400, max.freq = 1600, noise.quantile.val=0.3, spectrogram.window =512, pattern.split = ".wav", min.signal.dur = 3, max.sound.event.dur = 12, wav.output = "TRUE", output.dir = TrainingDataFolderLocation, swift.time=TRUE, time.start=06, time.stop=11, write.table.output=TRUE, verbose=TRUE, random.sample=FALSE) }
TrainingWavFilesDir <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/" trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg="TRUE") trainingdata$class <- as.factor(trainingdata$class)
trainingdata$class <- as.factor(trainingdata$class) ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", cross = 25, probability = TRUE) print(paste('SVM accuracy',ml.model.svm$tot.accuracy)) ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class) print(ml.model.rf)
# Specify the folder where the training data will be saved TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/" TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard") TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class)
TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles' OutputDirectory <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/DetectAndClassifyOutput" DetectAndClassify(input=TestFileDirectory, input.type='directory', feature.df=TrainingDataMFCC, model.type.list=c('SVM','RF'), tune = TRUE, short.wav.duration=300, target.signal = c("female.gibbon"), min.freq = 400, max.freq = 1600, noise.quantile.val=0.15, time.window.number =3, n.windows = 9, num.cep = 12, spectrogram.window =160, pattern.split = ".wav", min.signal.dur = 3, max.sound.event.dur = 25, maximum.separation =1, probability.thresh.svm = 0.15, probability.thresh.rf = 0.15, wav.output = "TRUE", output.dir =OutputDirectory, swift.time=TRUE,time.start=5,time.stop=10, write.csv.output=FALSE,verbose=TRUE, random.sample='NA')
# Set location of test file selection tables input.dir.text.files <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/GibbonTestSelectionTables" Annotatedfiles <- list.files(input.dir.text.files,full.names = T) ListOfAnnotatedFilesShort <- list.files(input.dir.text.files,full.names = F) nslash <- str_count(Annotatedfiles,pattern = '/')[1]+1 snames <- str_split_fixed(Annotatedfiles,pattern = '/',n=nslash)[,nslash] all.detections <- data.frame() for(x in 1:length(Annotatedfiles)){ temp.table <- read.delim2(Annotatedfiles[x],fill = T,header =T) file.name <- str_split_fixed(snames[x],pattern = '[.]',n=2)[,1] recorder <- str_split_fixed(file.name,pattern='_',n=3)[,1] date <- str_split_fixed(file.name,pattern='_',n=3)[,2] time <- str_split_fixed(file.name,pattern='_',n=3)[,3] if(nrow(temp.table >0)){ temp.table.updated <- cbind.data.frame(file.name,recorder,date,time,temp.table) } else { temp.row <- as.data.frame(t(rep('NA',ncol(temp.table)))) colnames(temp.row) <- colnames(temp.table) temp.table.updated <- cbind.data.frame(file.name,recorder,date,time,temp.row) } all.detections <- rbind.data.frame(all.detections,temp.table.updated) }
OutputDirectory <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/DetectAndClassifyOutput" all.combinedprecision.recall.randomiter <- data.frame() range.secs.start <- 6 range.secs.end <- 6 ### Detections using band-limited energy summation gibbondetects <- OutputDirectory list.ml <- list.files(gibbondetects, full.names = T, pattern='.wav') # Need to focus on gibbons for this validation nslash <- str_count(list.ml[[1]],'/')+1 list.ml.signals <- str_split_fixed(list.ml,pattern = '/',n=nslash)[,nslash] list.ml.signals <- str_split_fixed(list.ml.signals,pattern = '_',n=5)[,4] list.ml <- list.ml[which(list.ml.signals=='female.gibbon')] ml.detection.df <- data.frame() for(y in 1:length(list.ml)){ L.wav <- list.ml[[y]] n.slash <- str_count(L.wav, pattern = "/")[1] + 1 det.file.name <- str_split_fixed(L.wav,"/",n=n.slash)[,n.slash] det.file.name <- str_split_fixed(det.file.name,".wav",n=2)[,1] file.name <- paste(str_split_fixed(det.file.name,"_",n=5)[,1],str_split_fixed(det.file.name,"_",n=5)[,2], str_split_fixed(det.file.name,"_",n=5)[,3], sep='_') det.date <- str_split_fixed(det.file.name,"_",n=5)[,2] det.time <- str_split_fixed(det.file.name,"_",n=5)[,3] det.swift <- str_split_fixed(det.file.name,"_",n=5)[,1] det.time.start <- as.numeric(str_split_fixed(det.file.name,"_",n=9)[,6]) det.time.end <- as.numeric(str_split_fixed(det.file.name,"_",n=9)[,7]) probability <- str_split_fixed(det.file.name,"_",n=8)[,8] ml.algorithm <- str_split_fixed(det.file.name,"_",n=7)[,5] detections.df <- cbind.data.frame(file.name,det.swift, det.date, det.time,det.time.start,det.time.end,probability,ml.algorithm) ml.detection.df <- rbind.data.frame(ml.detection.df,detections.df) } recall.snr.all.df <- data.frame() for(x in 1:nrow(ml.detection.df)){ all.detections.subset <- ml.detection.df[x,] validate.detect.subset <-subset(all.detections,file.name==as.character(all.detections.subset$file.name)) validate.detect.subset$Begin.Time..s. <- as.numeric(validate.detect.subset$Begin.Time..s.) min.start.time <- as.numeric(all.detections.subset$det.time.start)-range.secs.start max.start.time <- as.numeric(all.detections.subset$det.time.start)+range.secs.end detections.ml <- subset(validate.detect.subset, Begin.Time..s.>min.start.time & Begin.Time..s.< max.start.time) if(nrow(detections.ml)>0){ all.detections.subset$class.label <- '1' } else{ all.detections.subset$class.label <- '-1' } recall.snr.all.df <- rbind.data.frame(recall.snr.all.df,all.detections.subset) }
library(ROCR) auc.df <- data.frame() performance.df <- data.frame() ml.index <- unique(recall.snr.all.df$ml.algorithm) for(m in 1:length(ml.index)){ temp.subset <- subset(recall.snr.all.df, ml.algorithm==ml.index[m]) predictions <- as.numeric(temp.subset$probability) labels <- (temp.subset$class.label) pred <- prediction(predictions, labels) perf <- performance(pred, "rec", "prec") perfauc <- performance(pred, "aucpr") Precision <- perf@x.values[[1]] Recall <- perf@y.values[[1]] Threshold <- perf@alpha.values[[1]] AUC <- perfauc@y.values[[1]] perfF1 <- performance(pred, "f") F1 <- perfF1@y.values[[1]] print(AUC) ml.algorithm <- ml.index[m] tempauc <- cbind.data.frame(AUC,ml.algorithm) auc.df <- rbind.data.frame(auc.df,tempauc) temp.performance <- cbind.data.frame(Precision,Recall,Threshold,F1,ml.algorithm) performance.df <- rbind.data.frame(performance.df,temp.performance) perf <- performance(pred, "prec", "rec") plot(perf, avg= "threshold", colorize=TRUE, lwd= 3, main= paste(ml.index[m],'Precision/Recall')) plot(perf, lty=3, col="grey78", add=TRUE) }
library(gibbonR) library(ggpubr) UMAPBiplotAddSpectrograms(input.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",add.spectrograms=TRUE,min.freq=400,max.freq=1600,main="UMAP Plot")
library(gibbonR) library(ggpubr) library(apcluster) AffinityBiplotAddSpectrograms(input.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",class='fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600,main="UMAP Plot")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.