View source: R/compareSounds.R
compareSounds  R Documentation 
Computes similarity between two sounds based on comparing their
spectrogramlike representations. If the input is audio, two methods of
producing spectrograms are available: specType = 'linear'
calls
powspec
for an power spectrogram with frequencies in Hz,
and specType = 'mel'
calls melfcc
for an auditory
spectrogram with frequencies in Mel. For more customized options, just
produce your spectrograms or feature matrices (time in column, features like
pitch, peak frequency etc in rows) with your favorite function before calling
compareSounds
because it also accepts matrices as input. To be
directly comparable, the two matrices are made into matrices of the same
size. In case of differences in sampling rates, only frequencies below the
lower Nyquist frequency or below maxFreq
are kept. In case of
differences in duration, the shorter sound is padded with 0 (silence) or NA,
as controlled by arguments padWith, padDir
. Then the matrices are
compared using methods like crosscorrelation or Dynamic Time Warp.
compareSounds( x, y, samplingRate = NULL, windowLength = 40, overlap = 50, step = NULL, dynamicRange = 80, method = c("cor", "cosine", "diff", "dtw"), specType = c("linear", "mel")[2], specPars = list(), dtwPars = list(), padWith = NA, padDir = c("central", "left", "right")[1], maxFreq = NULL )
x, y 
either two matrices (spectrograms or feature matrices) or two sounds to be compared (numeric vectors, Wave objects, or paths to wav/mp3 files) 
samplingRate 
if one or both inputs are numeric vectors, specify sampling rate, Hz. A vector of length 2 means the two inputs have different sampling rates, in which case spectrograms are compared only up to the lower Nyquist frequency 
windowLength 
length of FFT window, ms 
overlap 
overlap between successive FFT frames, % 
step 
you can override 
dynamicRange 
parts of the spectra quieter than 
method 
method of comparing meltransformed spectra of two sounds:
"cor" = Pearson's correlation; "cosine" = cosine similarity; "diff" =
absolute difference between each bin in the two spectrograms; "dtw" =
multivariate Dynamic Time Warp with 
specType 
"linear" = power spectrogram with

specPars 
a list of parameters passed to 
dtwPars 
a list of parameters passed to 
padWith 
if the duration of x and y is not identical, the compared
spectrograms are padded with either silence ( 
padDir 
if padding, specify where to add zeros or NAs: before the sound ('left'), after the sound ('right'), or on both sides ('central') 
maxFreq 
parts of the spectra above 
Returns a dataframe with two columns: "method" for the method(s) used, and "sim" for the similarity between the two sounds calculated with that method. The range of similarity measures is [1, 1] for "cor", [0, 1] for "cosine" and "diff", and (Inf, Inf) for "dtw".
data(orni, peewit, package = 'seewave') compareSounds(orni, peewit) # spectrogram(orni); playme(orni) # spectrogram(peewit); playme(peewit) ## Not run: s1 = soundgen(formants = 'a', play = TRUE) s2 = soundgen(formants = 'ae', play = TRUE) s3 = soundgen(formants = 'eae', sylLen = 700, play = TRUE) s4 = runif(8000, 1, 1) # white noise compareSounds(s1, s2, samplingRate = 16000) compareSounds(s1, s4, samplingRate = 16000) # the central section of s3 is more similar to s1 than is the beg/eng of s3 compareSounds(s1, s3, samplingRate = 16000, padDir = 'left') compareSounds(s1, s3, samplingRate = 16000, padDir = 'central') # padding with 0 penalizes differences in duration, whereas padding with NA # is like saying we only care about the overlapping part compareSounds(s1, s3, samplingRate = 16000, padWith = 0) compareSounds(s1, s3, samplingRate = 16000, padWith = NA) # comparing linear (Hz) vs melspectrograms produces quite different results compareSounds(s1, s3, samplingRate = 16000, specType = 'linear') compareSounds(s1, s3, samplingRate = 16000, specType = 'mel') # pass additional control parameters to dtw and melfcc compareSounds(s1, s3, samplingRate = 16000, specPars = list(nbands = 128), dtwPars = list(dist.method = "Manhattan")) # use feature matrices instead of spectrograms (time in columns, features in rows) a1 = t(as.matrix(analyze(s1, samplingRate = 16000)$detailed)) a1 = a1[4:nrow(a1), ]; a1[is.na(a1)] = 0 a2 = t(as.matrix(analyze(s2, samplingRate = 16000)$detailed)) a2 = a2[4:nrow(a2), ]; a2[is.na(a2)] = 0 a4 = t(as.matrix(analyze(s4, samplingRate = 16000)$detailed)) a4 = a4[4:nrow(a4), ]; a4[is.na(a4)] = 0 compareSounds(a1, a2, method = c('cosine', 'dtw')) compareSounds(a1, a4, method = c('cosine', 'dtw')) # a demo for comparing different similarity metrics target = soundgen(sylLen = 500, formants = 'a', pitch = data.frame(time = c(0, 0.1, 0.9, 1), value = c(100, 150, 135, 100)), temperature = 0.001) spec1 = soundgen:::getMelSpec(target, samplingRate = 16000) parsToTry = list( list(formants = 'i', # wrong pitch = data.frame(time = c(0, 1), # wrong value = c(200, 300))), list(formants = 'i', # wrong pitch = data.frame(time = c(0, 0.1, 0.9, 1), # right value = c(100, 150, 135, 100))), list(formants = 'a', # right pitch = data.frame(time = c(0,1), # wrong value = c(200, 300))), list(formants = 'a', pitch = data.frame(time = c(0, 0.1, 0.9, 1), # right value = c(100, 150, 135, 100))) # right ) sounds = list() for (s in 1:length(parsToTry)) { sounds[[length(sounds) + 1]] = do.call(soundgen, c(parsToTry[[s]], list(temperature = 0.001, sylLen = 500))) } lapply(sounds, playme) method = c('cor', 'cosine', 'diff', 'dtw') df = matrix(NA, nrow = length(parsToTry), ncol = length(method)) colnames(df) = method df = as.data.frame(df) for (i in 1:nrow(df)) { df[i, ] = compareSounds( x = spec1, # faster to calculate spec1 once y = sounds[[i]], samplingRate = 16000, method = method )[, 2] } df$av = rowMeans(df, na.rm = TRUE) # row 1 = wrong pitch & formants, ..., row 4 = right pitch & formants df$formants = c('wrong', 'wrong', 'right', 'right') df$pitch = c('wrong', 'right', 'wrong', 'right') df ## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.