View source: R/compareSounds.R
compareSounds  R Documentation 
Computes similarity between two sounds based on comparing their
spectrogramlike representations. If the input is audio, two methods of
producing spectrograms are available: specType = 'linear'
calls
powspec
for an power spectrogram with frequencies in Hz,
and specType = 'mel'
calls melfcc
for an auditory
spectrogram with frequencies in Mel. For more customized options, just
produce your spectrograms or feature matrices (time in column, features like
pitch, peak frequency etc in rows) with your favorite function before calling
compareSounds
because it also accepts matrices as input. To be
directly comparable, the two matrices are made into matrices of the same
size. In case of differences in sampling rates, only frequencies below the
lower Nyquist frequency or below maxFreq
are kept. In case of
differences in duration, the shorter sound is padded with 0 (silence) or NA,
as controlled by arguments padWith, padDir
. Then the matrices are
compared using methods like crosscorrelation or Dynamic Time Warp.
compareSounds(
x,
y,
samplingRate = NULL,
windowLength = 40,
overlap = 50,
step = NULL,
dynamicRange = 80,
method = c("cor", "cosine", "diff", "dtw"),
specType = c("linear", "mel")[2],
specPars = list(),
dtwPars = list(),
padWith = NA,
padDir = c("central", "left", "right")[1],
maxFreq = NULL
)
x, y 
either two matrices (spectrograms or feature matrices) or two sounds to be compared (numeric vectors, Wave objects, or paths to wav/mp3 files) 
samplingRate 
if one or both inputs are numeric vectors, specify sampling rate, Hz. A vector of length 2 means the two inputs have different sampling rates, in which case spectrograms are compared only up to the lower Nyquist frequency 
windowLength 
length of FFT window, ms 
overlap 
overlap between successive FFT frames, % 
step 
you can override 
dynamicRange 
parts of the spectra quieter than 
method 
method of comparing meltransformed spectra of two sounds:
"cor" = Pearson's correlation; "cosine" = cosine similarity; "diff" =
absolute difference between each bin in the two spectrograms; "dtw" =
multivariate Dynamic Time Warp with 
specType 
"linear" = power spectrogram with

specPars 
a list of parameters passed to 
dtwPars 
a list of parameters passed to 
padWith 
if the duration of x and y is not identical, the compared
spectrograms are padded with either silence ( 
padDir 
if padding, specify where to add zeros or NAs: before the sound ('left'), after the sound ('right'), or on both sides ('central') 
maxFreq 
parts of the spectra above 
Returns a dataframe with two columns: "method" for the method(s) used, and "sim" for the similarity between the two sounds calculated with that method. The range of similarity measures is [1, 1] for "cor", [0, 1] for "cosine" and "diff", and (Inf, Inf) for "dtw".
data(orni, peewit, package = 'seewave')
compareSounds(orni, peewit)
# spectrogram(orni); playme(orni)
# spectrogram(peewit); playme(peewit)
## Not run:
s1 = soundgen(formants = 'a', play = TRUE)
s2 = soundgen(formants = 'ae', play = TRUE)
s3 = soundgen(formants = 'eae', sylLen = 700, play = TRUE)
s4 = runif(8000, 1, 1) # white noise
compareSounds(s1, s2, samplingRate = 16000)
compareSounds(s1, s4, samplingRate = 16000)
# the central section of s3 is more similar to s1 than is the beg/eng of s3
compareSounds(s1, s3, samplingRate = 16000, padDir = 'left')
compareSounds(s1, s3, samplingRate = 16000, padDir = 'central')
# padding with 0 penalizes differences in duration, whereas padding with NA
# is like saying we only care about the overlapping part
compareSounds(s1, s3, samplingRate = 16000, padWith = 0)
compareSounds(s1, s3, samplingRate = 16000, padWith = NA)
# comparing linear (Hz) vs melspectrograms produces quite different results
compareSounds(s1, s3, samplingRate = 16000, specType = 'linear')
compareSounds(s1, s3, samplingRate = 16000, specType = 'mel')
# pass additional control parameters to dtw and melfcc
compareSounds(s1, s3, samplingRate = 16000,
specPars = list(nbands = 128),
dtwPars = list(dist.method = "Manhattan"))
# use feature matrices instead of spectrograms (time in columns, features in rows)
a1 = t(as.matrix(analyze(s1, samplingRate = 16000)$detailed))
a1 = a1[4:nrow(a1), ]; a1[is.na(a1)] = 0
a2 = t(as.matrix(analyze(s2, samplingRate = 16000)$detailed))
a2 = a2[4:nrow(a2), ]; a2[is.na(a2)] = 0
a4 = t(as.matrix(analyze(s4, samplingRate = 16000)$detailed))
a4 = a4[4:nrow(a4), ]; a4[is.na(a4)] = 0
compareSounds(a1, a2, method = c('cosine', 'dtw'))
compareSounds(a1, a4, method = c('cosine', 'dtw'))
# a demo for comparing different similarity metrics
target = soundgen(sylLen = 500, formants = 'a',
pitch = data.frame(time = c(0, 0.1, 0.9, 1),
value = c(100, 150, 135, 100)),
temperature = 0.001)
spec1 = soundgen:::getMelSpec(target, samplingRate = 16000)
parsToTry = list(
list(formants = 'i', # wrong
pitch = data.frame(time = c(0, 1), # wrong
value = c(200, 300))),
list(formants = 'i', # wrong
pitch = data.frame(time = c(0, 0.1, 0.9, 1), # right
value = c(100, 150, 135, 100))),
list(formants = 'a', # right
pitch = data.frame(time = c(0,1), # wrong
value = c(200, 300))),
list(formants = 'a',
pitch = data.frame(time = c(0, 0.1, 0.9, 1), # right
value = c(100, 150, 135, 100))) # right
)
sounds = list()
for (s in 1:length(parsToTry)) {
sounds[[length(sounds) + 1]] = do.call(soundgen,
c(parsToTry[[s]], list(temperature = 0.001, sylLen = 500)))
}
lapply(sounds, playme)
method = c('cor', 'cosine', 'diff', 'dtw')
df = matrix(NA, nrow = length(parsToTry), ncol = length(method))
colnames(df) = method
df = as.data.frame(df)
for (i in 1:nrow(df)) {
df[i, ] = compareSounds(
x = spec1, # faster to calculate spec1 once
y = sounds[[i]],
samplingRate = 16000,
method = method
)[, 2]
}
df$av = rowMeans(df, na.rm = TRUE)
# row 1 = wrong pitch & formants, ..., row 4 = right pitch & formants
df$formants = c('wrong', 'wrong', 'right', 'right')
df$pitch = c('wrong', 'right', 'wrong', 'right')
df
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.