soundgen: Sound Synthesis and Acoustic Analysis

# A list of variables returned by analyze() that are used to calculate flux,
# with the corresponding normalization coefficients. Called by getFeatureFlux()
featureFlux_vars = as.data.frame(matrix(c(
  'voiced', 1, FALSE,
  'ampl', 1, FALSE,
  # 'amDepVoiced', 100,
  # 'amFreqVoiced', 1000,
  'dom', 2, TRUE,  # 2 on log-scale means 1 octave = 1
  'entropy', 1, FALSE,
  # 'harmEnergy', 10,
  # 'harmHeight', 1000,
  'HNRVoiced', 6, FALSE,  # 6 dB = 1
  'loudness', 6, FALSE,
  'novelty', 1, FALSE,
  'peakFreq', 2, TRUE,
  'pitch', 2, TRUE,
  'quartile25', 2, TRUE,
  'quartile50', 2, TRUE,
  'quartile75', 2, TRUE,
  'roughness', 100, FALSE,  # %
  'specCentroid', 2, TRUE
  # 'specSlope', 10, FALSE
  # 'subDep', 1,
  # 'subRatio', 10
), ncol = 3, byrow = TRUE))
colnames(featureFlux_vars) = c('feature', 'norm_scale', 'log_transform')
featureFlux_vars$norm_scale = as.numeric(featureFlux_vars$norm_scale)
featureFlux_vars$log_transform = as.logical(featureFlux_vars$log_transform)


# Equal loudness curves for converting dB to phon
# Translated from the matlab implementation by Jeff Tackett (03/01/05)
# available from https://www.mathworks.com/matlabcentral/fileexchange/
# 7028-iso-226-equal-loudness-level-contour-signal
max_Barks = 27  # max 27 barks (~27000 Hz)
phonLevels = c(seq(0, 20, 1), seq(22, 40, 2), seq(45, 90, 5))
phonCurves = vector('list', length(phonLevels))
names(phonCurves) = phonLevels
for (p in 1:length(phonLevels)) {
  phonCurves[[p]] = iso226(phonLevels[p], nBarks = max_Barks)$curveBark
}
# plot(phonCurves[[1]][, c('freq_Hz', 'hearingThres_dB')])
# usethis::use_data(phonCurves, overwrite = TRUE, internal = TRUE)


# Prepare the matrix of coef for the spectrum spreading function
# (see Wonhos' dissertation from 1999)
max_Barks = 27  # max 40 barks (tuneR::bark2hz(27) ~ 27 kHz)
spreadSpecCoef = matrix(0, nrow = max_Barks, ncol = max_Barks)
for (i in 1:nrow(spreadSpecCoef)) {
  for (j in 1:ncol(spreadSpecCoef)) {
    spreadSpecCoef[i, j] = 15.81 + 7.5 * (i - j + 0.474) -
      17.5 * sqrt((1 + (i - j + 0.474) ^ 2))
  }
}
spreadSpecCoef = 10 ^ (spreadSpecCoef / 10)
# image(spreadSpecCoef)


# empirically observed ratios of harmonics, from BaNa algorithm: Ba, H., Yang, N., Demirkol, I., & Heinzelman, W. (2012, August). BaNa: A hybrid approach for noise resilient pitch detection. In Statistical Signal Processing Workshop (SSP), 2012 IEEE (pp. 369-372). IEEE.
BaNaRatios = data.frame(
  name = c(
    'F1/F0',
    'F2/FO',
    'F2/F1',
    'F3/F0',
    'F3/F2',
    'F4/F0',
    'F4/F1',
    'F4/F2',
    'F4/F3'
  ),
  value_low = c(1.9, 2.8, 1.42, 3.8, 1.29, 4.8, 2.4, 1.59, 1.15),
  value_high = c(2.1, 3.2, 1.59, 4.2, 1.42, 5.2, 2.6, 1.8, 1.29),
  divide_lower_by = c(1, 1, 2, 1, 3, 1, 2, 3, 4)
)


# When generating an integer random walk with getIntegerRandomWalk(), we need
# some kind of thresholds for activating different regimes of pitch effects.
# Here we set these thresholds.
slope_q1 = -.1
midpoint_q1 = 33
slope_q2 = -.1
midpoint_q2 = 66

noiseThresholdsDict = list(pitchEffects_amount = 0:100,
                           q1 = NA,
                           q2 = NA)
noiseThresholdsDict$q1 = 100 / (1 + exp(
  -slope_q1 * (noiseThresholdsDict$pitchEffects_amount - midpoint_q1)
))
noiseThresholdsDict$q2 = 100 / (1 + exp(
  -slope_q2 * (noiseThresholdsDict$pitchEffects_amount - midpoint_q2)
))
# plot (noiseThresholdsDict$pitchEffects_amount, noiseThresholdsDict$q1, type='l', col='red')
# points (noiseThresholdsDict$pitchEffects_amount, noiseThresholdsDict$q2, type='l', col='blue')


usethis::use_data(
  featureFlux_vars, phonCurves, spreadSpecCoef, BaNaRatios, noiseThresholdsDict,
  internal = TRUE, overwrite = TRUE
)