inst/doc/tutorial.R

## ----install_packages, message=FALSE, eval=FALSE------------------------------
#  install.packages("bioacoustics")
#  
#  # The bioacoustics package may also be installed from GitHub using devtools as follows:
#  install.packages("devtools")
#  devtools::install_github("wavx/bioacoustics") # For the latest, unstable version
#  
#  install.packages("warbleR")
#  install.packages("randomForest")

## ----load_packages, message=FALSE, eval=FALSE---------------------------------
#  # Load the packages
#  library(warbleR)
#  library(bioacoustics)
#  library(tools)
#  library(randomForest)

## ----xeno1, message=FALSE, eval=FALSE-----------------------------------------
#  df1 = query_xc(qword ='Catharus bicknelli type:call cnt:"United States"', download = FALSE)
#  df1 = df1[df1$Vocalization_type=="call",]
#  df1 = df1[df1$Quality=="A",]
#  df1 = df1[1:9,]

## ----xeno2, message=FALSE, results='hold', eval=FALSE-------------------------
#  df2 = query_xc(qword ='Setophaga magnolia type:song cnt:"Canada"', download = FALSE)
#  df2 = df2[df2$Quality=="A",]
#  df2 = df2[1:9,]

## ----xeno3, message=FALSE, results='hold', eval=FALSE-------------------------
#  df3 = query_xc(qword ='Passerella iliaca type:song cnt:"Canada"', download = FALSE)
#  df3 = df3[df3$Vocalization_type=="song",]
#  df3 = df3[df3$Quality %in% c("A", "B"),]
#  df3 = df3[1:9,]
#  
#  df = rbind(df1,df2,df3)
#  rm(df1,df2,df3)

## ----xeno4, eval=FALSE--------------------------------------------------------
#  # Visualize your data frame
#  View(df)
#  
#  # We will work in the R temp directory
#  wd <- tempdir()
#  
#  # Create a data directory if it does not exist
#  data_dir <- file.path(wd, "data")
#  
#  if(!dir.exists(data_dir))
#    dir.create(data_dir)
#  
#  # Download the MP3 files into your data directory
#  quer_xc(X = df, download = TRUE, path = data_dir)

## ----read_audio, eval=FALSE---------------------------------------------------
#  CATBIC <- read_audio(file.path(data_dir, "Catharus-bicknelli-54864.mp3"))
#  CATBIC

## ----metadata,eval=FALSE------------------------------------------------------
#  
#  metadata(CATBIC)
#  

## ----spectro0, eval=FALSE-----------------------------------------------------
#  # Set plot margins to 0
#  par(mar = c(0, 0, 0, 0), oma = c(0, 0, 0, 0))
#  
#  # Display with spectro()
#  ticks <- c(from = 1000, to = 20000, by = 1000) # frequency tick marks from 1 to
#                                                 # 20 kHz, and steps at each 1 kHz
#  temp_slice <- c(from = 1, to = 10) # in seconds
#  spectro(CATBIC, tlim = temp_slice, FFT_size = 512, ticks_y = ticks)

## ----help, eval=FALSE---------------------------------------------------------
#  # Access the arguments of the spectro function
#  ?spectro
#  ?fspec

## ----spectro1, eval=FALSE-----------------------------------------------------
#  # Set plot margins to 0
#  par(mar = c(0, 0, 0, 0), oma = c(0, 0, 0, 0))
#  
#  # Display the spectrogram with spectro()
#  ticks <- c(from = 1000, to = 20000, by = 1000) # frequency tick marks from 1 to
#                                                 # 20 kHz, 1 kHz steps
#  temp_slice <- c(from = 2, to = 3.5) # in seconds
#  spectro(CATBIC, tlim = temp_slice, FFT_size = 512, ticks_y = ticks)
#  
#  # fspec() gives you the spectrogram matrix with energy values (dB)
#  spec_mx <- fspec(CATBIC, tlim = temp_slice, FFT_size = 512, rotate = TRUE)
#  
#  # You can display the spectrogram with image()
#  image(spec_mx, xaxt = "n", yaxt = "n")

## ----filter, eval=FALSE-------------------------------------------------------
#  temp_slice <- c(from = 2.5, to = 3.5)
#  freq_slice <- c(from = 1500, to = 20000)
#  spec_o <- fspec(CATBIC, tlim = temp_slice, flim = freq_slice, FFT_size = 512, rotate = TRUE)
#  
#  ## min and max (range) dB intensity
#  range(spec_o) # -120 (min) to 0 dB (max)
#  # Note that the tolerance of your recorders depends on the number of bits.
#  # 16-bit recorders offer only around -96 dB tolerance and sound pressure above
#  # this level is clipped to 0 dB.
#  
#  ## Let's try a filter by mean + sd intensity
#  spec_f <- fspec(CATBIC, tlim = temp_slice, flim = freq_slice, FFT_size = 512, rotate = TRUE)
#  spec_f[spec_f < mean(spec_f) + sd(spec_f)] <- -120
#  # Works well with high intensity audio events, but leads to
#  # false negatives (missed events) otherwise.
#  
#  par(mar = c(0, 0, 0, 0), oma = c(0, 0, 0, 0))
#  image(spec_o, xaxt="n", yaxt="n")
#  image(spec_f, xaxt="n", yaxt="n")

## ----threshold_help, eval=FALSE-----------------------------------------------
#  # Access the arguments of the threshold_detection function
#  ?threshold_detection

## ----threshold1, eval=FALSE---------------------------------------------------
#  # Set each argument according to the targeted audio events
#  TD <- threshold_detection(
#    CATBIC, # Either a path to an audio file (see ?read_audio), or a Wave object
#    threshold = 12, # 12 dB SNR sensitivity for the detection algorithm
#    time_exp = 1, # Time expansion factor of 1. Only needed for bat recordings.
#    min_dur = 140, # Minimum duration threshold of 140 milliseconds (ms)
#    max_dur = 440, # Maximum duration threshold of 440 ms
#    min_TBE = 10, # Minimum time window between two audio events of 10 milliseconds
#    max_TBE = 5000, # Maximum time window between two audio events, here 5 seconds
#    EDG = 0.996, # Temporal masking with Exponential Decay Gain from 0 to 1
#    LPF = 10000, # Low-Pass Filter of 10 kHz
#    HPF = 1000, # High-Pass Filter of 1 kHz
#    FFT_size = 256, # Size of the Fast Fourrier Transform (FFT) window
#    FFT_overlap = 0.875, # Percentage of overlap between two FFT windows
#  
#    start_thr = 25, # 25 dB threshold at the start of the audio event
#    end_thr = 30, # 30 dB threshold at the end of the audio event
#    SNR_thr = 10, # 10 dB SNR threshold at which the extraction of the audio event stops
#    angle_thr = 45, # 45° of angle at which the extraction of the audio event stops
#    duration_thr = 440, # Noise estimation is resumed after 440 ms
#    NWS = 1000, # Time window length of 1 s used for background noise estimation
#    KPE = 1e-05, # Process Error parameter of the Kalman filter (for smoothing)
#    KME = 1e-04, # Measurement Error parameter of the Kalman filter (for smoothing)
#  
#    settings = FALSE, #  Save on a list the above parameters set with this function
#    acoustic_feat = TRUE, # Extracts the acoustic and signal quality parameters
#    metadata = FALSE, # Extracts on a list the metadata embedded with the Wave file
#    spectro_dir = file.path(tempdir(), "Spectros"), # Directory where to save the spectrograms
#    time_scale = 1, # Time resolution of 2 ms for spectrogram display
#    ticks = TRUE # Tick marks and their intervals are drawn on the y-axis (frequencies)
#  )
#  
#  # Get the number of extracted audio events
#  nrow(TD$data$event_data)

## ----threshold2, eval=FALSE---------------------------------------------------
#  # Let's try various settings, starting with 1024 FFT size instead of 256.
#  TD <- threshold_detection(
#    CATBIC, threshold = 12, time_exp = 1, min_dur = 140, max_dur = 440,
#    min_TBE = 10, max_TBE = 5000, EDG = 0.996, LPF = 10000, HPF = 1000,
#    FFT_size = 1024, FFT_overlap = 0.875, start_thr = 25, end_thr = 30,
#    SNR_thr = 10, angle_thr = 45, duration_thr = 440, NWS = 1000,
#    KPE = 1e-05, KME = 1e-04, settings = FALSE, acoustic_feat = TRUE,
#    metadata = FALSE, spectro_dir = file.path(tempdir(), "Spectros"), time_scale = 1,
#    ticks = c(1000, 10000, 1000) # Tick marks from 1 to 10 kHz with 1 kHz interval
#  )
#  
#  # Take a look at the spectrograms and compare them with the previous extraction.
#  nrow(TD$data$event_data) # Only three audio events!

## ----threshold3, eval=FALSE---------------------------------------------------
#  CATBIC <- read_audio(file.path(data_dir, "Catharus-bicknelli-54864.mp3"))
#  TD <- threshold_detection(
#    CATBIC, threshold = 12, time_exp = 1, min_dur = 140, max_dur = 440, min_TBE = 10,
#    max_TBE = Inf, EDG = 0.996, LPF = 10000, HPF = 1000, FFT_size = 256, FFT_overlap = 0.875,
#    start_thr = 22, end_thr = 30, SNR_thr = 10, angle_thr = 125, duration_thr = 440, NWS = 1000,
#    KPE = 1e-05, KME = 1e-05, settings = FALSE, acoustic_feat = TRUE, metadata = FALSE
#  )

## ----features1, eval=FALSE----------------------------------------------------
#  # Acoustic features are stored in a data frame called event_data,
#  # stored by order of detection.
#  
#  View(TD$data$event_data) # Contains the filename and the time of detection in the
#                           # recording, and 26 extracted features.

## ----features2, eval=FALSE----------------------------------------------------
#  # Start and end of the 5th extracted audio event (in samples)
#  c(TD$data$event_start[[5]], TD$data$event_end[[5]])
#  
#  # Remember you just have to divide by the sample rate to retrieve the time (s)
#  c(TD$data$event_start[[5]], TD$data$event_end[[5]]) / slot(CATBIC, "samp.rate")

## ----features3, eval=FALSE----------------------------------------------------
#  par(mar = c(1,1, 1, 1), oma = c(1, 1, 1, 1))
#  
#  # Amplitude track of the 5th audio event
#  plot(TD$data$amp_track[[5]], type = "l")
#  
#  # Frequency track of the 5th audio event
#  plot(TD$data$freq_track[[5]], type = "l")

## ----blob0, eval=FALSE--------------------------------------------------------
#  # Access the arguments of the blob_detection function
#  ?blob_detection

## ----blob1, eval=FALSE--------------------------------------------------------
#  # Use the bat recording stored in the package
#  data(myotis)
#  
#  # Set each argument according to the targeted audio events
#  BD <- blob_detection(
#    myotis, # Either a path to an audio file (see ?read_audio), or a Wave object
#    time_exp = 10, # Time expansion factor of 10 for time expanded recordings.
#    min_dur = 1.5, # Minimum duration threshold of 1.5 milliseconds (ms)
#    max_dur = 80, # Maximum duration threshold of 80 ms
#    min_area = 40, # minimum number of 40 pixels in the blob
#    min_TBE = 20, # Minimum time window between two audio events of 20 milliseconds
#    EDG = 0.996, # Temporal masking with Exponential Decay Gain from 0 to 1
#    LPF = slot(myotis, "samp.rate") * 10 / 2, # Low-Pass Filter at the Nyquist frequency
#    HPF = 16000, # High-Pass Filter of 16 kHz
#    FFT_size = 256, # Size of the Fast Fourrier Transform (FFT) window
#    FFT_overlap = 0.875, # Percentage of overlap between two FFT windows
#  
#    blur = 2, # Gaussian smoothing function with a factor of 2 for blurring the spectrogram
#    bg_substract = 20, # Foreground extraction with a mean filter applied on the spectrogram
#    contrast_boost = 20, # Edge contrast enhancement filter of the spectrogram contour
#  
#    settings = FALSE, #  Save on a list the above parameters set with this function
#    acoustic_feat = TRUE, # Extracts the acoustic and signal quality parameters
#    metadata = FALSE, # Extracts on a list the metadata embedded with the Wave file
#    spectro_dir = file.path(tempdir(), "Spectros"), # HTML page with spectrograms by order of detection
#    time_scale = 0.1, # Time resolution of 2 ms for spectrogram display
#    ticks = TRUE # Tick marks and their intervals are drawn on y-axis (frequencies)
#  )
#  
#  # Get the number of extracted audio events
#  nrow(BD$data$event_data)

## ----blob2, eval=FALSE--------------------------------------------------------
#  # Let's try various settings, starting with 512 FFT size instead of 256.
#  BD <- blob_detection(
#    myotis, time_exp = 10, FFT_size = 512, settings = FALSE, acoustic_feat = TRUE,
#    metadata = FALSE, spectro_dir = file.path(tempdir(), "Spectros"), time_scale = 0.1, ticks = TRUE
#  )
#  
#  # Take a look at the spectrograms and compare them with the previous extraction.
#  nrow(BD$data$event_data) # Only 6 audio events!

## ----blobfeat1, eval=FALSE----------------------------------------------------
#  # Acoustic features
#  head(BD$data)

## ----classification1, eval=FALSE----------------------------------------------
#  # Get the filepath for each MP3 file
#  files <- dir(data_dir, recursive = TRUE, full.names = TRUE, pattern = "[.]mp3$")
#  
#  # Detect and extract audio events
#  TDs <- setNames(
#    lapply(
#      files,
#      threshold_detection,
#      threshold = 12, min_dur = 140, max_dur = 440, min_TBE = 50, max_TBE = Inf,
#      LPF = 8000, HPF = 1500, FFT_size = 256, start_thr = 30, end_thr = 20,
#      SNR_thr = 10, angle_thr = 125, duration_thr = 400, spectro_dir = NULL,
#      NWS = 2000, KPE = 0.00001, time_scale = 2, EDG = 0.996
#    ),
#    basename(file_path_sans_ext(files))
#  )
#  
#  # Keep only files with data in it
#  TDs <- TDs[lapply(TDs, function(x) length(x$data)) > 0]
#  
#  # Keep the extracted feature and merge in a single data frame for further analysis
#  Event_data <- do.call("rbind", c(lapply(TDs, function(x) x$data$event_data), list(stringsAsFactors = FALSE)))
#  nrow(Event_data) # 355 audio events extracted
#  
#  # Compute the number of extracted CATBIC calls
#  sum(startsWith(Event_data$filename, "Cat"))
#  
#  # Add a "Class" column: "CATBIC" vs. other species of birds "OTHERS"
#  classes <- as.factor(ifelse(startsWith(Event_data$filename, "Cat"), "CATBIC", "OTHERS"))
#  Event_data <- cbind(data.frame(Class = classes), Event_data)
#  
#  # Get rid of the filename and time in the recording
#  Event_data$filename <- Event_data$starting_time <- NULL

## ----classification2, eval=FALSE----------------------------------------------
#  # Split the data in 60% Training / 40% Test sets
#  train <- sample(1:nrow(Event_data), round(nrow(Event_data) * .6))
#  Train <- Event_data[train,]
#  
#  test <- setdiff(1:nrow(Event_data), train)
#  Test <- Event_data[test,]
#  
#  # Train a random forest classifier
#  set.seed(666)
#  rf <- randomForest(Class ~ duration + freq_max_amp + freq_max + freq_min +
#                             bandwidth + freq_start + freq_center + freq_end +
#                             freq_knee + fc + freq_bw_knee_fc + bin_max_amp +
#                             pc_freq_max_amp + pc_freq_max + pc_freq_min +
#                             pc_knee + temp_bw_knee_fc + slope + kalman_slope +
#                             curve_neg + curve_pos_start + curve_pos_end +
#                             mid_offset + smoothness + snr + hd + smoothness,
#                     data = Train, importance = FALSE, proximity = FALSE,
#                     replace = TRUE, ntree = 4000, mtry = 4)
#  
#  # Look at the confusion matrix of the training set
#  rf$confusion # looks good, but...
#  
#  # Let's make predictions with our classifier on a test set
#  table(Test[,1], predict(rf, Test[,-1], type = "response")) # not bad!
#  
#  # To look at the predictions
#  head(predict(rf, Test[,-1], type = "prob"))

## ----keras1, eval=FALSE-------------------------------------------------------
#  # Run if keras is installed on your machine
#  library(keras)
#  
#  # Build the training set
#  Y_train <- to_categorical(as.integer(Train[,1]) - 1) # One hot encoding
#  
#  # X as matrix
#  X_train <- as.matrix(Train[,-1])
#  
#  # Build the test set
#  Y_test <- to_categorical(as.integer(Test[,1]) - 1)
#  Y_test <- Y_test[,-1]
#  X_test <- as.matrix(Test[,-1])
#  
#  # Build the sequential model
#  mod0 <- keras_model_sequential()
#  mod0 %>%
#    # Input shape layer = c(samples, rows, cols, channels)
#    layer_reshape(input_shape=ncol(X_train),target_shape=c(1,1,ncol(X_train))) %>%
#    # First conv 2d layer with 128 neurons, kernel size of 8 x 8 and stride of 1 x 1
#    layer_conv_2d(128, c(8,8), c(1,1), padding='same') %>%
#    layer_batch_normalization() %>%
#    layer_activation("relu") %>%
#    layer_dropout(0.2) %>%
#    # Second conv 2d layer with 256 neurons, kernel size of 5 x 5 and stride of 1 x 1
#    layer_conv_2d(256, c(5,5), c(1,1), padding='same') %>%
#    layer_batch_normalization() %>%
#    layer_activation("relu") %>%
#    layer_dropout(0.2) %>%
#    # Third conv 2d layer with 128 neurons, kernel size of 3 x 3 and stride of 1 x 1
#    layer_conv_2d(128, c(3,3), c(1,1), padding='same') %>%
#    layer_batch_normalization() %>%
#    layer_activation("relu") %>%
#    layer_dropout(0.2) %>%
#    # Average pooling layer
#    layer_global_average_pooling_2d() %>%
#    # Activation output layer with 2 classes
#    layer_dense(units = ncol(Y_train),  activation='softmax')
#  
#  # Model compile
#  mod0 %>% compile(loss = 'categorical_crossentropy',
#                   optimizer = "adam",
#                   metrics = "categorical_accuracy")
#  
#  
#  # Add a callback to reduce the learning rate when reaching the plateau
#  reduce_lr <- callback_reduce_lr_on_plateau(monitor = 'loss', factor = 0.5,
#                                             patience = 50, min_lr = 0.0001)
#  # Start learning
#  mod0 %>% fit(X_train, Y_train, batch_size = 32, epochs = 50,
#               validation_data = list(X_test, Y_test),
#               verbose = 1, callbacks = reduce_lr)
#  
#  # Score on the test set
#  score <- mod0 %>% evaluate(X_test, Y_test, batch_size = 32)
#  score

## ----keras2, eval=FALSE-------------------------------------------------------
#  # Look at predictions and build a confusion matrix
#  Pred <- as.factor(predict_classes(mod0, X_test, batch_size = 32, verbose = 1))
#  table(Y_test[,2], Pred)
#  
#  # To look at the prediction values
#  Prob <- round(predict_proba(mod0, X_test, batch_size = 32, verbose = 1), 2)

Try the bioacoustics package in your browser

Any scripts or data that you put into this service are public.

bioacoustics documentation built on Feb. 8, 2022, 5:06 p.m.