Nothing
### UTILITIES FOR POSTPROCESSING OF PITCH CONTOURS ###
#' Pathfinder
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing pitch contour. Starts with a
#' reasonable guess and computes the more-or-less optimal pitch contour (not
#' quite the very optimal - too computationally expensive).
#' @param pitchCands a matrix of multiple pitch candidates per fft frame. Each
#' column is one fft frame, each row is one candidate (the last row is always
#' "manual")
#' @param pitchCert a matrix of the same dimensionality as pitchCands specifying
#' our certainty in pitch candidates
#' @inheritParams analyze
#' @param interpolWin_bin when interpolating pitch candidates, the median is
#' calculated over \code{plus-minus interpolWin_bin}
#' @param interpolTol when interpolating pitch candidates, the criterion
#' for needing to interpolate is the absence of pitch candidates with values
#' within \code{1 plus-minus interpolTol} of the median of pitch center of
#' gravity over the interpolation window. For ex., if \code{interpolTol}
#' is .05, we look for values from 0.95 to 1.05 time the median value over
#' interpolation window.
#' @param interpolCert when interpolating pitch candidates, all generated pitch
#' candidates are assigned a certainty equal to \code{interpolCert}
#' @param manualCert the certainty in manually added pitch candidates
#' @return Returns a numeric vector of pitch values representing the best found
#' path through pitch candidates.
#' @keywords internal
pathfinder = function(pitchCands,
pitchCert,
pitchSource,
step,
manual = NULL,
certWeight = 0.5,
pathfinding = c('none', 'fast', 'slow')[2],
annealPars = list(maxit = 5000, temp = 1000),
interpolWin_bin = 3,
interpolTol = 0.05,
interpolCert = 0.3,
manualCert = 1,
snakeStep = 0,
snakePlot = FALSE) {
nr = nrow(pitchCands)
nc = ncol(pitchCands)
# Add "inviolable" manual pitch values
if (length(manual$frame) > 0) {
freqMan = certMan = rep(NA, nc)
freqMan[manual$frame] = manual$freq
certMan[manual$frame] = manualCert
pitchCands[, manual$frame] = NA # ignore all other candidates in manual frames
pitchCands = rbind(pitchCands, freqMan)
pitchCert = rbind(pitchCert, certMan)
pitchSource = rbind(pitchSource, rep('manual', nc))
nr = nr + 1
}
# take log to approximate human perception of pitch differences
pitchCands[!is.na(pitchCands)] = log2(pitchCands[!is.na(pitchCands)])
if (!is.null(manual)) manual$freq = log2(manual$freq)
# get the "center of gravity" of pitch candidates in each frame (mean of all
# pitch candidates weighted by their respective certainties)
pitchCenterGravity = rep(NA, ncol(pitchCands))
for (i in 1:ncol(pitchCands)) {
idxDeadCert = which(pitchSource[, i] == 'manual' & pitchCert[, i] == manualCert)
if (length(idxDeadCert) == 1) {
# special case: pitchCenterGravity has to pass through manual candidates
pitchCenterGravity[i] = pitchCands[idxDeadCert, i]
} else {
pitchCenterGravity[i] = weighted.mean(x = pitchCands[, i],
w = pitchCert[, i],
na.rm = TRUE)
}
}
## INTERPOLATION
# if a frame has no pitch candidate at all (NA) or no candidate
# between the most likely candidates for the adjacent frames, add such a
# candidate with ~low certainty
if (length(interpolWin_bin) > 0 && interpolWin_bin > 0) {
# order pitch candidates and certainties in each frame pushing NAs down so
# as to simplify the matrix (the position of manual candidates is not
# important since they are saved in 'manual')
if (nrow(pitchCands) > 1) {
o = apply(as.matrix(1:ncol(pitchCands), nrow = 1), 1, function(x) {
order(pitchCands[, x])
})
pitchCands = apply(matrix(1:ncol(pitchCands)), 1, function(x) {
pitchCands[o[, x], x]
})
pitchCert = apply(matrix(1:ncol(pitchCert)), 1, function(x) {
pitchCert[o[, x], x]
})
pitchSource = apply(matrix(1:ncol(pitchSource)), 1, function(x) {
pitchSource[o[, x], x]
})
}
intplt = interpolate(pitchCands = pitchCands,
pitchCert = pitchCert,
pitchSource = pitchSource,
pitchCenterGravity = pitchCenterGravity,
interpolWin_bin = interpolWin_bin,
interpolTol = interpolTol,
interpolCert = interpolCert)
pitchCands = intplt$pitchCands
pitchCert = intplt$pitchCert
pitchSource = intplt$pitchSource
pitchCenterGravity = intplt$pitchCenterGravity
}
# remove interpolated candidates in manual frames (otherwise they might be preferred)
if (length(manual$frame) > 0) {
for (f in manual$frame) {
pitchCands[which(pitchSource[, f] != 'manual'), f] = NA
}
}
# remove rows with all NA's
keep_rows = which(rowSums(!is.na(pitchCands)) > 0)
if (length(keep_rows) > 0) {
pitchCands = pitchCands[keep_rows, , drop = FALSE]
pitchCert = pitchCert[keep_rows, , drop = FALSE]
pitchSource = pitchSource[keep_rows, , drop = FALSE]
}
# special case: only a single pitch candidate for all frames in a syllable
# (no paths to chose among)
if (nrow(pitchCands) == 1) {
return(2 ^ pitchCands)
}
## PATH-FINDING
# find the best path through frame-by-frame pitch candidates
if (nc < 2) pathfinding = 'none'
if (pathfinding == 'fast') {
bestPath = pathfinding_fast(
pitchCands = pitchCands,
pitchCert = pitchCert,
pitchSource = pitchSource,
pitchCenterGravity = pitchCenterGravity,
certWeight = certWeight,
step = step
)
} else if (pathfinding == 'slow') {
bestPath = pathfinding_slow(
pitchCands = pitchCands,
pitchCert = pitchCert,
certWeight = certWeight,
pitchCenterGravity = pitchCenterGravity,
manual = manual,
annealPars = annealPars
)
} else { # if (pathfinding == 'none')
bestPath = rep(NA, nc)
for (i in 1:nc) {
idx = which.min(abs(pitchCands[, i] - pitchCenterGravity[i]))
if (length(idx) > 0 && !is.na(idx)) bestPath[i] = pitchCands[idx, i]
}
if (length(manual$frame) > 0) {
bestPath[manual$frame] = manual$freq
}
}
## SNAKE
# apply the snake algorithm to minimize the elastic forces acting on this
# pitch contour without deviating too far from high-certainty anchors
if (snakeStep > 0) {
bestPath = snake(
pitch = bestPath,
pitchCands = pitchCands,
pitchCert = pitchCert,
certWeight = certWeight,
pitchCenterGravity = pitchCenterGravity,
snakeStep = snakeStep,
snakePlot = snakePlot
)
}
if (!is.numeric(bestPath) | length(bestPath) != nc) {
# browser()
bestPath = pitchCenterGravity
}
return(2 ^ bestPath)
}
#' Interpolate
#'
#' Internal soundgen function.
#'
#' Interpolation: if a frame has no pitch candidate at all (NA) or no candidate
#' between the most likely candidates for the adjacent frames, add such a
#' candidate with some (low) certainty.
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#' candidates per fft frame weighted by our certainty in each of these
#' candidates
#' @return Returns a modified pitchCands matrix.
#' @keywords internal
interpolate = function(pitchCands,
pitchCert,
pitchSource,
pitchCenterGravity,
interpolWin_bin = 3,
interpolTol = 0.3,
interpolCert = 0.3) {
# ... add an empty row for new, interpolated pitch candidates
nr = nrow(pitchCands)
nc = ncol(pitchCands)
pitchCands = rbind(rep(NA, nc), pitchCands)
pitchCert = rbind(rep(NA, nc), pitchCert)
pitchSource = rbind(rep('intpl', nc), pitchSource)
for (f in 1:nc) {
left = max(1, f - interpolWin_bin)
right = min(ncol(pitchCands), f + interpolWin_bin)
win = left:right
med = NA
noLocalCands = FALSE
if (length(win) > 1) {
# median over interpolation window (by default plus-minus 2 points)
idx_man = which(pitchSource[, win] == 'manual' &
!is.na(pitchCert[, win]))
if (length(idx_man) > 0) {
# special case: if any manual cand-s over interpolWin_bin, only consider them
# med = median(pitchCands[, win] [idx_man], na.rm = TRUE)
idx_man_rc = which(pitchSource[, win] == 'manual' &
!is.na(pitchCert[, win]), arr.ind = TRUE)
curve = rep(NA, length(win))
curve[idx_man_rc[, 'col']] = pitchCands[, win] [idx_man]
} else {
# no manual cand's over interpolWin_bin --> use any cand-s for
# interpolation
curve = pitchCenterGravity[win]
curve[f + 1 - left] = NA
}
curve_df = na.omit(data.frame(x = 1:length(curve),
y = curve))
if (nrow(curve_df) > 2) {
# >2 points - use loess
l = try(suppressWarnings(loess(y ~ x, data = curve_df, span = 1)))
if (!inherits(med, 'try-error')) {
p = predict(l, newdata = 1:tail(curve_df$x, n = 1))
med = p[f + 1 - left]
# print(c(f, 'loess'))
}
} else {
# a single point - just take the middle
med = mean(curve_df$y)
# print(c(f, 'mean'))
}
noLocalCands = sum(
pitchCands[, f] > (1 - interpolTol) * med &
pitchCands[, f] < (1 + interpolTol) * med,
na.rm = TRUE
) == 0
if (noLocalCands & !is.na(med)) {
# if there are no pitch candidates in the frequency range
# expected based on pitch candidates in the adjacent frames...
# use median of adjacent frames for the new pitch cand
pitchCands[1, f] = med
# certainty assigned to interpolated frames
pitchCert[1, f] = interpolCert
# update pitchCenterGravity for the interpolated frame
pitchCenterGravity[f] = med
} else {
pitchCenterGravity[f] = weighted.mean(x = pitchCands[, f],
w = pitchCert[, f],
na.rm = TRUE)
}
}
}
return(list(pitchCands = pitchCands,
pitchCert = pitchCert,
pitchSource = pitchSource,
pitchCenterGravity = pitchCenterGravity))
}
#' Path through pitch candidates: fast
#'
#' Internal soundgen function.
#'
#' Uses a modified Viterbi algorithm to find a reasonable path though pitch
#' candidates. The idea is to start at several random "seeds", each time
#' exploring as many paths as there are candidates per seed. The path with the
#' lowest global cost is returned. Transition costs are a weighted mean of two
#' penalties: based on pitch certainties (corrected for distance from pitch
#' center of gravity) and on pitch jumps.
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#' candidates per fft frame weighted by our certainty in each of these
#' candidates
#' @param seed_ms start a new seed roughly every ... ms
#' @param plot if TRUE, plots pitch candidates and explored paths
#' @keywords internal
pathfinding_fast = function(pitchCands,
pitchCert,
pitchSource,
step,
pitchCenterGravity,
certWeight,
seed_ms = 250,
plot = FALSE) {
nc = ncol(pitchCands)
nr = nrow(pitchCands)
## Boost certainties of similar pitch candidates per frame (close to pitchCenterGravity)
for (i in 1:nc) {
pitchCert[, i] = pitchCert[, i] / (1 + abs(pitchCands[, i] - pitchCenterGravity[i]))
}
pitchUncert = 1 - pitchCert
## Pick a few points along the contour for re-seeding the path search
nSeeds = max(1, round(nc * step / seed_ms))
if (nSeeds == 1) seed_approx_pos = 1
seed_step = max(2, round(nc / (nSeeds + 1)))
seed_half_step = floor(seed_step / 2)
seed_approx_pos = round(seed_step * (1:nSeeds))
seed_approx_pos = unique(seed_approx_pos[seed_approx_pos <= nc])
nSeeds = length(seed_approx_pos)
# Repeat pathfinding at each seed to explore possible paths without getting stuck
paths = costs = vector('list', nSeeds)
for (seed in 1:nSeeds) {
# Find the frame with the greatest number of candidates (to explore as many paths as possible)
seed_win = (seed_approx_pos[seed] - seed_half_step) :
(seed_approx_pos[seed] + seed_half_step)
seed_win = seed_win[which(seed_win >= 1 & seed_win <= nc)]
idx_max = which.max(apply(
pitchCands[, seed_win, drop = FALSE], 2, function(x) sum(!is.na(x))
))
start = seed_win[1] - 1 + idx_max
start_cands_idx = which(!is.na(pitchCands[, start]))
start_cands = pitchCands[start_cands_idx, start]
# We'll try as many paths as there are candidates in the starting frame
nPaths = length(start_cands)
paths[[seed]] = costs[[seed]] = vector('list', nPaths)
# Work backwards and forwards from each starting candidate, taking the least costly transitions
for (p in 1:nPaths) {
idx_start = start_cands_idx[p]
point_current = pitchCands[idx_start, start]
cost_start = certWeight * pitchUncert[idx_start, start]
# work backwards
cost_back = 0
if (start > 1) {
path_back = rep(NA, start - 1)
for (i in rev(1:(start - 1))) {
cands = pitchCands[, i]
if (any(!is.na(cands))) {
cost_cert = pitchUncert[, i]
# get the cost of transition from the current point to each of the pitch
# candidates in the next frame
cost_pitchJump = abs(point_current - cands) * (25 / step)
if (length(cost_pitchJump) == 0 || !any(!is.na(cost_pitchJump))) cost_pitchJump = 0
# get a weighted average of transition costs associated with the certainty
# of each estimate vs. the magnitude of pitch jumps
costs_i = certWeight * cost_cert + (1 - certWeight) * cost_pitchJump
idx_i = which.min(costs_i)
point_current = pitchCands[idx_i, i]
path_back[i] = point_current
cost_back = cost_back + costs_i[idx_i]
} else {
path_back[i] = NA
}
}
} else {
path_back = numeric(0)
}
# work forwards
cost_forward = 0
if (start < nc) {
point_current = pitchCands[idx_start, start]
path_forward = rep(NA, nc - start)
for (i in (start + 1):nc) {
cands = pitchCands[, i]
if (any(!is.na(cands))) {
cost_cert = pitchUncert[, i] # abs(cands - pitchCenterGravity[i])
# get the cost of transition from the current point to each of the pitch
# candidates in the next frame
cost_pitchJump = abs(point_current - cands) * (25 / step)
if (length(cost_pitchJump) == 0 || !any(!is.na(cost_pitchJump))) cost_pitchJump = 0
# get a weighted average of transition costs associated with the certainty
# of each estimate vs. the magnitude of pitch jumps
costs_i = certWeight * cost_cert + (1 - certWeight) * cost_pitchJump
idx_i = which.min(costs_i)
point_current = pitchCands[idx_i, i]
path_forward[i - start] = point_current
cost_forward = cost_forward + costs_i[idx_i]
} else {
path_forward[i - start] = NA
}
}
} else {
path_forward = numeric(0)
}
paths[[seed]][[p]] = c(path_back, pitchCands[idx_start, start], path_forward)
costs[[seed]][[p]] = cost_back + cost_start + cost_forward
}
}
idx_best_path = which.min(unlist(costs))
paths = do.call(c, paths)
path = paths[[idx_best_path]]
if (plot) {
plot(1:nc, type = 'n', xlab = '', ylab = '',
ylim = range(pitchCands, na.rm = TRUE))
for (i in 1:nr) {
points(pitchCands[i, ], cex = 2 * pitchCert[i, ])
}
for (p in (1:length(paths))[-idx_best_path]) {
points(paths[[p]], type = 'l', col = 'gray')
}
points(paths[[idx_best_path]], type = 'l', col = 'blue')
}
return(path)
}
#' Cost of jumps
#'
#' Internal soundgen function.
#'
#' Internal helper function for calculating the cost of transitions between
#' pitch candidates. Needed for postprocessing of pitch contour - finding the
#' optimal pitch contour.
#' @param cand1,cand2 two candidate pitch values
#' @keywords internal
#' @examples
#' a = seq(-3, 3, by = .01)
#' b = 1 / (1 + 10 * exp(3 - 7 * abs(a)))
#' plot(a, b, type = 'l')
costJumps = function(cand1, cand2) {
return(1 / (1 + 10 * exp(3 - 7 * abs(cand1 - cand2))))
}
#' Path through pitch candidates: slow
#'
#' Internal soundgen function.
#'
#' Optimizes the path through pitch candidates using simulated annealing with
#' \code{\link[stats]{optim}}. This can be really slow, depending on control
#' parameters.
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#' candidates per fft frame weighted by our certainty in each of these
#' candidates
#' @param manual a dataframe of manual pitch candidates from pathfinder()
#' @keywords internal
pathfinding_slow = function(pitchCands = pitchCands,
pitchCert = pitchCert,
certWeight = certWeight,
pitchCenterGravity = pitchCenterGravity,
manual = NULL,
annealPars = list(maxit = 5000, temp = 1000)) {
# start with the pitch contour most faithful to center of gravity of pitch
# candidates for each frame
path_init = apply(matrix(1:ncol(pitchCands)), 1, function(x) {
which.min(abs(pitchCands[, x] - pitchCenterGravity[x]))
})
# use annealing to wiggle the path, attempting to minimize its cost
o = optim(
par = path_init,
fn = costPerPath,
gr = generatePath,
pitchCands = pitchCands,
pitchCert = pitchCert,
certWeight = certWeight,
pitchCenterGravity = pitchCenterGravity,
manual = manual,
method = 'SANN',
control = annealPars
)
bestPath = apply(matrix(1:ncol(pitchCands)), 1, function(x) {
pitchCands[o$par[x], x]
})
return(bestPath)
}
#' Cost per path
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contours called by
#' \code{\link{pathfinding_slow}}. Calculates the cost of a particular path
#' through pitch candidates based on pitch jumps and distance from
#' high-certainty candidates.
#' @param path evaluated path through pitch candidates (as integers specifying
#' the rows in pitchCands, not the actual values of pitch)
#' @inheritParams pathfinder
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#' candidates per fft frame weighted by our certainty in each of these
#' candidates
#' @param manual a dataframe of manual pitch candidates from pathfinder()
#' @keywords internal
costPerPath = function(path,
pitchCands,
pitchCert,
certWeight,
pitchCenterGravity,
manual = NULL) {
# if there is nothing to wiggle, generatePath() returns NA and we want
# annealing to terminate quickly, so we return very high cost
if (is.na(path[1])) return(1e10)
# if the path fails to pass through any of the manual pitch values,
# also return immediately with very high cost
if (!is.null(manual)) {
# the last cand in every column is the manual one, so the path
# should always go through the last cand in the manual frames
if (any(path[manual$frame] != nrow(pitchCands))) {
return(1e10)
}
}
# get the cost of transition from the current point to each of the pitch
# candidates in the next frame
cost_pitchJump = apply(as.matrix(1:(length(path) - 1), nrow = 1), 1, function(x) {
costJumps(pitchCands[path[x], x], pitchCands[path[x + 1], x + 1])
})
cost_pitchJump = mean(cost_pitchJump, na.rm = TRUE)
# get the cost of deviating from pitchCenterGravity
cost_cert = apply(as.matrix(1:length(path)), 1, function(x) {
abs(pitchCands[path[x], x] - pitchCenterGravity[x])
})
cost_cert = mean(cost_cert, na.rm = TRUE)
# get a weighted average of transition costs associated with the certainty
# of each estimate vs. the magnitude of pitch jumps
costs = certWeight * cost_cert + (1 - certWeight) * cost_pitchJump
return(costs)
}
#' Generate path
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contours called by
#' \code{\link{pathfinding_slow}}. Generates proposals for new paths through
#' pitch candidates. It gives up and returns NA after 100 attempts, which stops
#' annealing - so the adaptation of pitch contour doesn't happen
#' @param path currently evaluated path
#' @inheritParams pathfinder
#' @param ... nothing really, but otherwise optim() complains
#' @keywords internal
generatePath = function(path, pitchCands, ...) {
i = 1
while (i < 100) {
point_to_wiggle = sample(1:length(path), 1)
idx = which(!is.na(pitchCands[, point_to_wiggle])) [-path[point_to_wiggle]]
if (length(idx) > 0 & is.numeric(idx)) {
path[point_to_wiggle] = sample(idx, size = 1)
return(path)
}
i = i + 1
}
return(NA)
}
#' Snake
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contour. Wiggles a snake
#' along the gradient of internal + external forces. NB: if the snake is run,
#' the final contour may deviate from the actually measured pitch candidates!
#' @param pitch numeric vector representing our best guess at pitch contour,
#' which we are now attempting to improve by minimizing its elastic tension
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#' candidates per fft frame weighted by our certainty in each of these
#' candidates
#' @return Returns optimized pitch contour (numeric vector of the same length as
#' \code{pitch}).
#' @keywords internal
snake = function(pitch,
pitchCands,
pitchCert,
certWeight,
pitchCenterGravity,
snakeStep = 0.05,
snakePlot = FALSE) {
ran = diff(range(pitchCands, na.rm = TRUE)) # range of pitch
maxIter = floor(ran / snakeStep * 2) # just heuristic, no theory behind this
# plot for debugging or esthetic appreciation
if (snakePlot) {
# plot all pitch candidates and the initial path
plot(seq(1, ncol(pitchCands)), pitch,
type = 'n', xlab = 'FFT frame', ylab = 'log(pitch)',
ylim = c(
range(pitchCands, na.rm = TRUE)[1] - .3 * ran,
range(pitchCands, na.rm = TRUE)[2] + .3 * ran
)
)
for (r in 1:nrow(pitchCands)) {
points (seq(1, ncol(pitchCands)),
pitchCands[r, ],
cex = as.numeric(pitchCert[r, ]) * 2)
}
lines (seq(1, ncol(pitchCands)), pitch)
}
# optimization algorithm follows
i = 1
force_old = 1e10 # Inf causes NaN in force_delta
while (i < maxIter) {
force = forcePerPath(pitch = pitch,
pitchCands = pitchCands,
pitchCert = pitchCert,
pitchCenterGravity = pitchCenterGravity,
certWeight = certWeight)
force_new = mean(abs(force))
force_delta = (force_old - force_new) / force_old
force_old = force_new
if (is.na(force_delta) | force_delta < snakeStep) break
# wiggle the snake along the gradient of the total force acting on it
# (elastic + attraction of high-certainty pitch candidates)
pitch = pitch + snakeStep * force
if (snakePlot) {
lines(seq(1, length(pitch)), pitch,
type = 'l', col = 'green', lty = 4)
}
i = i + 1
}
if (snakePlot) {
lines(seq(1, length(pitch)), pitch,
type = 'l', col = 'blue', lwd = 3)
}
return (pitch)
}
#' Force per path
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contour. Returns the
#' total force acting on a snake (sum of internal and external gradients, i.e.
#' of the elastic force trying to straighten the snake [internal] and of the
#' force pushing the snake towards the most certain pitch estimates [external])
#' @inheritParams snake
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#' candidates per fft frame weighted by our certainty in each of these
#' candidates
#' @return Returns a numeric vector of the same length as \code{pitch} that
#' gives the total force acting on the snake at each point.
#' @keywords internal
forcePerPath = function(pitch,
pitchCands,
pitchCert,
pitchCenterGravity,
certWeight) {
ran = diff(range(pitchCands, na.rm = TRUE))
# external_force = -(pitch_path - pitchCenterGravity) / ran
external_force = pitch # just a quick way to initialize a vector of the right length
for (i in 1:ncol(pitchCands)) {
cands = na.omit(pitchCands[, i])
certs = na.omit(pitchCert[, i])
deltas = 1 / exp((cands - pitch[i]) ^ 2)
forces = certs * deltas
forces = ifelse(cands > pitch[i], forces, -forces)
external_force[i] = sum(forces)
}
# external_force is the "external" force - the attraction of high-certainty
# pitch candidates
internal_force = -findGrad(pitch)
# internal_force is the elastic force trying to make the curve smooth
total_force = certWeight * external_force + (1 - certWeight) * internal_force
# weighted average of internal and external forces
return(total_force)
}
#' Find gradient
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contour. Returns
#' the elastic force acting on a snake. See \code{\link{snake}}.
#' @param path numeric vector
#' @param interpol the number of points to interpolate beyond each end of the path
#' @return Returns a vector of the same length as input path giving its 4th derivative.
#' @keywords internal
findGrad = function(path, interpol = 3) {
# extrapolate 2 values before the first one and two after the last one based
# on /interpol/ number of points in case the path is shorter than the
# specified interpol:
interpol = ifelse(interpol > length(path), length(path), interpol)
if (interpol == 1) {
path = c(rep(path[1], 2), path, rep(path[length(path)], 2))
} else {
slopeLeft = suppressWarnings(summary(lm(
path[1:interpol] ~ seq(1, interpol)
))$coef[2, 1])
minus12 = path[1] - c(1, 2) * slopeLeft
slopeRight = suppressWarnings(summary(lm(
path[(length(path) - interpol + 1):length(path)] ~ seq(1, interpol)
))$coef[2, 1])
plus12 = path[length(path)] + c(1, 2) * slopeRight
path = c (minus12[2], minus12[1], path, plus12[1], plus12[2])
}
# take the 4th derivative of the path with extrapolated values
# (so that we get d4f over the entire length of the original path)
grad = rep(0, length(path))
for (i in 3:(length(path) - 2)) { # approximation
grad[i] = path[i - 2] - 4 * path[i - 1] + 6 * path[i] - 4 * path[i + 1] +
path[i + 2]
}
grad = grad[3:(length(grad) - 2)]
return(grad)
}
#' Median smoothing
#'
#' Internal soundgen function.
#'
#' Internal helper function for smoothing pitch contours or other contours. Only
#' outliers are modified, so it's not like smoothing with a kernel. NB: the
#' expected input is pitch, so deviance is calculated on a log-scale.
#' @param df dataframe (each column is processed separately, so multiple
#' contours can be fed into this function at once to speed things up)
#' @param smoothing_ww width of smoothing window (points)
#' @param smoothingThres tolerated deviance from moving median (semitones)
#' @param inviolable a vector of indices of the rows rows of df that should not
#' be modified (meant for manual pitch values)
#' @return Returns a dataframe of the same dimensions as df.
#' @keywords internal
#' @examples
#' df = data.frame(a = rnorm(20, mean = 100, sd = 20),
#' b = rnorm(20, mean = 100, sd = 10))
#' df1 = soundgen:::medianSmoother(df, smoothing_ww = 5,
#' smoothingThres = 1,
#' inviolable = c(rep(TRUE, 10), rep(FALSE, 10)))
#' plot(df[, 2], type='b')
#' lines(df1[, 2], type='b', col='blue', pch=3)
medianSmoother = function(df,
smoothing_ww,
smoothingThres,
inviolable = NULL) {
temp = df # to calculate median_over_window for original values
hw = floor(smoothing_ww / 2) # smooth over plus-minus half the smoothing_ww
for (i in 1:nrow(df)) {
window = c(max(i - hw, 1), min(i + hw, nrow(df))) # smoothing window
# to be conservative, treat NAs as repetitions of value i
winVal = as.matrix(temp[(window[1]:window[2]), ])
for (c in 1:ncol(winVal)) {
idx_na = which(is.na(winVal[, c]))
winVal[idx_na, c] = df[i, c]
}
median_over_window = apply(winVal, 2, function(x) {
median(unlist(x), na.rm = TRUE) # w/o unlist returns NULL for NA vectors (weird...)
# NB: use either temp or df, for original or smoothed values to be used
# for calculating median_over_window
})
if (!i %in% inviolable) {
if (smoothingThres > 0) {
# difference from median pitch etc over window, in semitones
deviance = 12 * log2(as.numeric(df[i, ]) / median_over_window)
cond = which(abs(deviance) > smoothingThres)
df[i, cond] = median_over_window[cond]
} else {
df[i, ] = median_over_window
}
}
}
return(df)
}
#' Find voiced segments
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contours. Merges voiced
#' segments at least \code{shortestSyl} ms long and separated by less than
#' \code{shortestPause} ms. Called by \code{\link{analyze}}
#' @param pitchCands matrix of possible pitch values per column. One column is
#' one fft frame, one row is one pitch candidate
#' @inheritParams analyze
#' @inheritParams spectrogram
#' @param samplingRate sampling rate (Hz)
#' @param minVoicedCands a frame is considered to be voiced if at least this
#' many pitch candidates are not NA. Defaults to 2: since dom is usually
#' defined, in practice this means that we also want at least one other pitch
#' candidate (autocor, cep or BaNa)
#' @param pitchMethods methods of pitch tracking in analyze()
#' @param manualV index of frames that should definitely be voiced (manual
#' candidates)
#' @param manualTryToV index of frames that should be treated as voiced as long
#' as they have any candidates at all (even <minVoicedCands)
#' @param manualUnv index of frames forced to be unvoiced
#' @return Returns a dataframe specifying where each voiced segment starts and
#' ends (in fft frames, not ms!)
#' @keywords internal
findVoicedSegments = function(pitchCands,
shortestSyl,
shortestPause,
step,
samplingRate,
minVoicedCands,
pitchMethods,
manualV = NULL,
manualTryToV = NULL,
manualUnv = NULL) {
resetMVC = FALSE
messageMVC = FALSE
if (is.null(minVoicedCands)) {
resetMVC = TRUE
}
if (is.numeric(minVoicedCands)) {
if (minVoicedCands < 1 | minVoicedCands > length(pitchMethods)) {
resetMVC = TRUE
messageMVC = TRUE
}
}
if (resetMVC) {
if ('dom' %in% pitchMethods & length(pitchMethods) > 1) {
# since dom is usually defined, we want at least one more pitch candidate
# (unless dom is the ONLY method that the user wants for pitch tracking)
minVoicedCands = 2
} else {
minVoicedCands = 1
}
if (messageMVC) {
message(paste0('minVoicedCands must be between 1 and length(pitchMethods);',
' resetting to ', minVoicedCands))
}
}
putativelyVoiced = apply(pitchCands, 2, function(x)
sum(!is.na(x)) >= minVoicedCands)
if (length(manualV) > 0) putativelyVoiced[manualV] = TRUE
if (length(manualTryToV)) {
# voice as forced by manualTryToV, as long as those frames have some cand-s
anyCands = which(apply(pitchCands, 2, function(x) any(!is.na(x))))
putativelyVoiced[manualTryToV[manualTryToV %in% anyCands]] = TRUE
}
if (length(manualUnv) > 0) putativelyVoiced[manualUnv] = FALSE
# the smallest number of consecutive non-NA pitch values that constitute a
# voiced segment; but at least 1
nRequired = max(1, ceiling(shortestSyl / step))
# the greatest number of NA values that we tolerate before we say a new voiced
# syllable begins
toleratedGap = floor(shortestPause / step)
# find and save separately all voiced segments
segmentStart = numeric()
segmentEnd = numeric()
i = 1
while (i < (length(putativelyVoiced) - nRequired + 1)) {
# find beginning
while (i < (length(putativelyVoiced) - nRequired + 1)) {
if (sum(putativelyVoiced[i:(i + nRequired - 1)]) == nRequired |
i %in% manualV) { # start a new syllable if we hit a manually voiced frame
segmentStart = c(segmentStart, i)
break
}
i = i + 1
}
# find end
if (length(segmentEnd) < length(segmentStart)) {
while (i < (length(putativelyVoiced) - toleratedGap + 1)) {
if (sum(putativelyVoiced[i:(i + toleratedGap)]) == 0 |
i %in% manualUnv ) { # interrupt the syllable if we hit a manuaUnv frame
segmentEnd = c(segmentEnd, i - 1)
i = i - 1
break
}
i = i + 1
}
if (length(segmentEnd) < length(segmentStart)) {
# if the end is not found, take the last voiced value
segmentEnd = c(segmentEnd, tail(which(putativelyVoiced), 1))
break
}
}
i = i + 1
}
return(data.frame(segmentStart = segmentStart, segmentEnd = segmentEnd))
}
#' Plot pitch candidates
#'
#' Internal soundgen function.
#'
#' Plots pitch candidates or adds them to a spectrogram.
#' @param pitchCands,pitchCert,pitchSource matrices of pitch candidates, their
#' certainty, and pitch tracking method used as generated internally by
#' analyze(); columns = STFT frames, rows = candidates
#' @param pitch best guess at pitch contour; length = ncol(pitchCands)
#' @param candPlot,pitchPlot lists of graphical settings for plotting candidates
#' and pitch contour, respectively
#' @param extraContour another contour to add to the plot, such as harmHeight, Hz
#' @param addToExistingPlot if TRUE, assumes that a spectrogram is already
#' plotted; if FALSE, sets up a new plot
#' @param showLegend if TRUE, shows a legend
#' @param y_Hz if TRUE, plot in Hz, otherwise in kHz
#' @param ... other graphical parameters used for creating a new plot if
#' addToExistingPlot = FALSE
#' @keywords internal
#' @examples
#' \dontrun{
#' s = soundgen()
#' a = analyze(s, 16000, windowLength = 25, step = 25,
#' summary = 'extended', plot = FALSE)
#' spectrogram(s, 16000, windowLength = 25, step = 5)
#' addPitchCands(pitchCands = a$pitchCands,
#' pitchCert = a$pitchCert,
#' pitchSource = a$pitchSource,
#' pitch = a$result$pitch)
#' }
addPitchCands = function(pitchCands,
pitchCert,
pitchSource,
pitch,
timestamps = NULL,
candPlot = list(),
pitchPlot = list(),
extraContour = NULL,
extraContour_pars = list(),
priorMean = NULL,
priorSD = NULL,
pitchFloor = NULL,
pitchCeiling = NULL,
addToExistingPlot = TRUE,
showLegend = TRUE,
y_Hz = FALSE,
yScale = c('orig', 'bark', 'mel')[1],
...) {
if (is.null(pitchCands) & is.null(pitch)) invisible()
if (length(pitchCands) < 1 & length(pitch) < 1) invisible()
if (is.null(timestamps) & !is.null(pitchCands)) {
timestamps = as.numeric(colnames(pitchCands))
}
if (is.null(pitchPlot$showPrior)) {
showPrior = TRUE
} else {
showPrior = pitchPlot$showPrior
}
if (showPrior & is.numeric(priorMean) & is.numeric(priorSD)) {
prior = getPrior(
priorMean = priorMean,
priorSD = priorSD,
pitchFloor = pitchFloor,
pitchCeiling = pitchCeiling,
len = 100,
plot = FALSE
)
} else {
prior = NULL
}
pitchPlot = pitchPlot[names(pitchPlot) != 'showPrior']
yScaleCoef = ifelse(y_Hz | yScale %in% c('bark', 'mel'), 1, 1/1000)
if (yScale == 'bark') {
# NB: tuneR::hz2bark can't handle NAs
pitchCands = 6 * asinh(pitchCands / 600)
pitch = 6 * asinh(pitch / 600)
if (!is.null(prior)) prior$freq = 6 * asinh(prior$freq / 600)
if (!is.null(extraContour)) extraContour = 6 * asinh(extraContour / 600)
} else if (yScale == 'mel') {
pitchCands = hz2mel(pitchCands)
pitch = hz2mel(pitch)
if (!is.null(prior)) prior$freq = hz2mel(prior$freq)
if (!is.null(extraContour)) extraContour = hz2mel(extraContour)
}
# If addToExistingPlot is FALSE, we first have to set up an empty plot
if (addToExistingPlot == FALSE) {
arguments = list(...) # save ... arguments as a list
m = max(pitchCands, na.rm = TRUE) * yScaleCoef # for ylim on the empty plot
if (is.na(m)) m = 5 # samplingRate / 2 * yScaleCoef
arguments$ylim = c(0, m)
do.call(plot, # need do.call, otherwise can't pass the modified ylim
args = c(list(x = timestamps,
y = rep(0, ncol(pitchCands)),
type = 'n'),
arguments))
}
# Prepare plot pars - combine user-defined with defaults
plotPars = defaults_analyze_pitchCand # see presets.R
candPlot$final = pitchPlot
for (i in 1:length(candPlot)) {
temp = candPlot[[i]]
if (length(temp) > 0) {
method = names(candPlot)[i]
idx = which(plotPars$method == method)
if (length(idx) > 0) {
for (j in 1:length(temp)) {
property = names(temp[j])[1]
if (!property %in% colnames(plotPars)) {
# set to default, otherwise other rows become NA
if (property == 'cex') {
plotPars[, property] = 2
} else if (property == 'lty') {
plotPars[, property] = 1 # par(lty) gives "solid" - a character
} else {
plotPars[, property] = par(property)
}
}
plotPars[idx, property] = temp[j]
}
}
}
}
# Add pitch candidates to the plot
pitchMethods = unique(na.omit(as.character(pitchSource)))
if (length(pitchMethods) > 0) {
for (r in 1:nrow(pitchCands)) {
method_r = as.character(pitchSource[r, ])
method_r[is.na(method_r)] = 'def'
pars_method = as.list(plotPars[match(method_r, plotPars$method), 2:ncol(plotPars)])
pars_method$cex = pars_method$cex *
plotPars$cex[plotPars$method == 'final'] *
pitchCert[r, ]
# for manual, certainty could be Inf, so we reset to 2
pars_method$cex[which(!is.na(pars_method$cex) &
pars_method$cex == Inf)] = 2
# pars_method$cex[is.na(pars_method$cex)] = 0
do.call(points, c(pars_method,
list(x = timestamps,
y = pitchCands[r, ] * yScaleCoef)))
}
} else {
showLegend = FALSE
}
# Add the final pitch contour to the plot
if (any(is.finite(pitch))) {
pars_pitchContour = as.list(plotPars[plotPars$method == 'final',
2:ncol(plotPars)])
# first points, in case there are isolated, unconnected points surrounded by
# NAs (not plotted with type = 'l')
do.call('points', c(list(
x = timestamps,
y = pitch * yScaleCoef
), pars_pitchContour))
# now join pitch contour with a line
do.call('points', c(list(
x = timestamps,
y = pitch * yScaleCoef,
type = 'l'
), pars_pitchContour))
}
# Add an extra contour such as harmHeight
if (!is.null(extraContour)) {
if (any(!is.na(extraContour)) & length(timestamps) > 0) {
if (is.null(extraContour_pars$type)) extraContour_pars$type = 'l'
if (extraContour_pars$type != 'n') {
if (is.null(extraContour_pars$lty)) extraContour_pars$lty = 2
if (is.null(extraContour_pars$lwd)) extraContour_pars$lwd = 2
if (is.null(extraContour_pars$col)) extraContour_pars$col = 'red'
do.call(points, c(list(x = timestamps, y = extraContour * yScaleCoef),
extraContour_pars))
}
}
}
# Show prior
if (!is.null(prior)) {
ran_x_5 = (tail(timestamps, 1) - timestamps[1]) * .075 # 7.5% of plot width
points(x = prior$prob * ran_x_5,
y = prior$freq * yScaleCoef, type = 'l', lty = 2)
text(x = ran_x_5,
y = priorMean * yScaleCoef,
pos = 2, labels = 'Prior', cex = 0.65, offset = 0.25)
}
text(x = 0,
y = pitchFloor * yScaleCoef,
pos = 4, labels = 'floor', cex = 0.65, offset = 0)
text(x = 0,
y = pitchCeiling * yScaleCoef,
pos = 4, labels = 'ceiling', cex = 0.65, offset = 0)
# Add a legend
if (showLegend) {
pl = plotPars[plotPars$method %in% c(pitchMethods, 'final'), ]
legend("topright",
legend = pl$method,
pch = pl$pch,
lty = ifelse(pl$method == 'final', pl$lty, NA),
lwd = pl$lwd,
col = pl$col,
bg = "white")
}
invisible()
}
#' Interpolate NAs
#'
#' Internal soundgen function
#'
#' Takes a numeric vector and fills in the NAs by linear
#' interpolation in the middle and constant or linear interpolation at the ends.
#' @param x numeric vector
#' @param idx_na which(is.na(x))
#' @param nPoints the number of points to use for interpolating leading and
#' trailing NAs: 1 = constant interpolation, 2 = use the first two non-NAs at
#' the beginning and the last two non-NAs at the end, etc.
#' @return Returns the same numeric vector with NAs filled in by interpolation.
#' @keywords internal
#' @examples
#' soundgen:::intplNA(c(NA, 405, 441, 460, NA, NA, NA, 480, 490, NA, NA))
#' soundgen:::intplNA(c(NA, 405, 441, 460, NA, NA, NA, 480, 490, NA, NA), nPoints = 3)
intplNA = function(x, idx_na = NULL, nPoints = 1) {
len = length(x)
if (is.null(idx_na)) idx_na = which(is.na(x))
n_na = length(idx_na)
if (n_na == len | n_na == 0) return(x)
idx_notNA = (1:len)[-idx_na]
# fill in NAs in the middle by linear interpolation
if (n_na > 0) {
idx_center = idx_notNA[1]:tail(idx_notNA, 1)
if (length(idx_center) > 1) {
xc = approx(x[idx_center], n = length(idx_center), na.rm = TRUE)$y
if (!inherits(xc, 'try-error')) x[idx_center] = xc
}
# fill in NAs at the ends
if (nPoints == 1) {
# constant interpolation
if (idx_notNA[1] > 1) {
x[1:(idx_notNA[1] - 1)] = x[idx_notNA[1]]
}
last_nonNA = tail(idx_notNA, 1)
if (last_nonNA < len) {
x[(last_nonNA + 1):len] = x[last_nonNA]
}
} else {
# linear interpolation of NAs at the beg & end
if (idx_notNA[1] > 1) {
a = idx_notNA[1]:min(len, (idx_notNA[1] + nPoints - 1))
b = x[a]
mod = lm(b ~ a)
x[1:(idx_notNA[1] - 1)] = predict(mod, newdata = list(a = 1:(idx_notNA[1] - 1)))
}
last_nonNA = tail(idx_notNA, 1)
if (last_nonNA < len) {
a = max(1, (last_nonNA - nPoints + 1)) : last_nonNA
b = x[a]
mod = lm(b ~ a)
x[(last_nonNA + 1):len] = predict(mod, newdata = list(a = (last_nonNA + 1):len))
}
}
}
return(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.