# This code should run with TensorFlow >= 1.13.
# It was designed to run in graph as well as eager modes.
# If running in eager mode only, some pieces of code can be substantially simplified.
library(keras)
#use_implementation("tensorflow")
library(tensorflow)
#tfe_enable_eager_execution(device_policy = "silent")
library(tfdatasets)
library(stringr)
library(dplyr)
files <- fs::dir_ls(path = "data-raw/wav_12khz/",
recurse = TRUE,
glob = "*.wav")
df <- data_frame(
fname = files,
class = fname %>%
str_extract("data-raw/wav_12khz/.*") %>%
str_replace_all("data-raw/wav_12khz/", "") %>%
str_replace_all("-[0-9]+\\.wav", ""),
class_id = class %>% as.factor() %>% as.integer() - 1L
)
batch_size <- 32
buffer_size <- nrow(df)
type = "log_magnitude"
window_size_ms <- 30L
window_stride_ms <- 10L
lower_edge_hertz <- 0
upper_edge_hertz <- 2595 * log10(1 + (16000 / 2) / 700)
num_mel_bins <- 64L
num_mfccs <- 13L
data_generator <- function(df,
window_size_ms,
window_stride_ms) {
df = df[id_train, ]
window_size_ms = window_size_ms
window_stride_ms = window_stride_ms
# assume sampling rate is the same in all samples
sampling_rate <-
tf$audio$decode_wav(tf$io$read_file(tf$reshape(df$fname[[1]], list()))) %>% .$sample_rate
samples_per_window <- (sampling_rate * window_size_ms) %/% 1000L # unit = samples
stride_samples <- (sampling_rate * window_stride_ms) %/% 1000L # unit = samples
n_periods <-
tf$shape(
tf$range(
samples_per_window %/% 2L,
16000L - samples_per_window %/% 2L,
stride_samples
)
)[1] + 1L
n_fft_coefs <-
(2 ^ tf$math$ceil(tf$math$log(
tf$cast(samples_per_window, tf$float32)
) / tf$math$log(2)) /
2 + 1L) %>% tf$cast(tf$int32)
ds <- tensor_slices_dataset(df) %>%
dataset_shuffle(buffer_size = buffer_size)
ds <- ds %>%
dataset_map(function(obs) {
wav <-
tf$audio$decode_wav(tf$io$read_file(tf$reshape(obs$fname, list())))
samples <- wav$audio
samples <- samples %>% tf$transpose(perm = c(1L, 0L))
stft_out <- tf$signal$stft(samples,
frame_length = samples_per_window,
frame_step = stride_samples)
magnitude_spectrograms <- tf$abs(stft_out)
if (type == "log_magnitude") {
log_magnitude_spectrograms <- tf$math$log(magnitude_spectrograms + 1e-6)
} else if (type == "log_mel" || type == "mfcc") {
# can't use when building static graph as it needs evaluation
# num_spectrogram_bins <- magnitude_spectrograms$shape[-1]$value
linear_to_mel_weight_matrix <-
tf$signal$linear_to_mel_weight_matrix(
num_mel_bins,
#num_spectrogram_bins,
257L,
# can't use when building static graph as we need to pass tensor not scalar
# tf$cast(sampling_rate, tf$float32),
16000,
lower_edge_hertz,
upper_edge_hertz
)
mel_spectrograms <- tf$tensordot(magnitude_spectrograms,
linear_to_mel_weight_matrix,
1L)
log_mel_spectrograms <- tf$math$log(mel_spectrograms + 1e-6)
if (type == "mfcc") {
# Keep the first `num_mfccs` MFCCs.
mfccs <- tf$signal$mfccs_from_log_mel_spectrograms(log_mel_spectrograms)[ , , 1:num_mfccs]
}
}
response <- tf$one_hot(obs$class_id, 30L)
if (type == "log_magnitude") {
# move batch dimension to channels
input <-
tf$transpose(log_magnitude_spectrograms, perm = c(1L, 2L, 0L))
} else if (type == "log_mel") {
input <-
tf$transpose(log_mel_spectrograms, perm = c(1L, 2L, 0L))
} else if (type == "mfcc") {
input <-
tf$transpose(mfccs, perm = c(1L, 2L, 0L))
}
list(input, response)
})
n_coefs <- if (type == "log_magnitude") {
n_fft_coefs
} else if (type == "log_mel") {
tf$constant(num_mel_bins)
} else if (type == "mfcc") {
tf$constant(num_mfccs)
}
ds <- ds %>%
dataset_repeat()
ds %>%
dataset_padded_batch(
batch_size = batch_size,
padded_shapes = list(tf$stack(list(
n_periods, n_coefs,-1L
)),
tf$constant(-1L, shape = shape(1L))),
drop_remainder = TRUE
)
}
id_train <- sample(nrow(df), size = 0.7 * nrow(df))
n_train <- length(id_train)
n_test <- nrow(df) - n_train
ds_train <- data_generator(df[id_train, ],
window_size_ms = window_size_ms,
window_stride_ms = window_stride_ms)
ds_val <- data_generator(df[-id_train, ],
window_size_ms = window_size_ms,
window_stride_ms = window_stride_ms)
if (!tf$executing_eagerly()) {
sess <- tf$Session()
batch <- next_batch(ds_train)
str(sess$run(batch))
n_periods <- sess$run(batch)[[1]] %>% dim() %>% .[2]
n_coefs <- sess$run(batch)[[1]] %>% dim() %>% .[3]
} else {
batch <- make_iterator_one_shot(ds_train) %>% iterator_get_next()
n_periods <- batch[[1]] %>% dim() %>% .[2]
n_coefs <- batch[[1]] %>% dim() %>% .[3]
}
model <- keras_model_sequential()
model %>%
layer_conv_2d(
input_shape = c(n_periods, n_coefs, 1),
filters = 32,
kernel_size = c(3, 3),
activation = 'relu',
padding = "same"
) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64,
kernel_size = c(3, 3),
activation = 'relu',
padding = "same") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 128,
kernel_size = c(3, 3),
activation = 'relu',
padding = "same") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 256,
kernel_size = c(3, 3),
activation = 'relu',
padding = "same") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_dropout(rate = 0.2) %>%
layer_flatten() %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 30, activation = 'softmax')
model %>% compile(
loss = loss_categorical_crossentropy,
optimizer = optimizer_adadelta(),
metrics = c('accuracy')
)
history <- model %>% fit(
ds_train,
epochs = 5,
steps_per_epoch = trunc(n_train / batch_size),
validation_data = ds_val,
validation_steps = trunc(n_test / batch_size)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.