R/pitch_postprocessing.R
In soundgen: Sound Synthesis and Acoustic Analysis

Documented in addPitchCands costJumps costPerPath findGrad findVoicedSegments forcePerPath generatePath interpolate intplNA medianSmoother pathfinder pathfinding_fast pathfinding_slow snake

### UTILITIES FOR POSTPROCESSING OF PITCH CONTOURS ###

#' Pathfinder
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing pitch contour. Starts with a
#' reasonable guess and computes the more-or-less optimal pitch contour (not
#' quite the very optimal - too computationally expensive).
#' @param pitchCands a matrix of multiple pitch candidates per fft frame. Each
#'   column is one fft frame, each row is one candidate (the last row is always
#'   "manual")
#' @param pitchCert a matrix of the same dimensionality as pitchCands specifying
#'   our certainty in pitch candidates
#' @inheritParams analyze
#' @param interpolWin_bin when interpolating pitch candidates, the median is
#'   calculated over \code{plus-minus interpolWin_bin}
#' @param interpolTol when interpolating pitch candidates, the criterion
#'   for needing to interpolate is the absence of pitch candidates with values
#'   within \code{1 plus-minus interpolTol} of the median of pitch center of
#'   gravity over the interpolation window. For ex., if \code{interpolTol}
#'   is .05, we look for values from 0.95 to 1.05 time the median value over
#'   interpolation window.
#' @param interpolCert when interpolating pitch candidates, all generated pitch
#'   candidates are assigned a certainty equal to \code{interpolCert}
#' @param manualCert the certainty in manually added pitch candidates
#' @return Returns a numeric vector of pitch values representing the best found
#'   path through pitch candidates.
#' @keywords internal
pathfinder = function(pitchCands,
                      pitchCert,
                      pitchSource,
                      step,
                      manual = NULL,
                      certWeight = 0.5,
                      pathfinding = c('none', 'fast', 'slow')[2],
                      annealPars = list(maxit = 5000, temp = 1000),
                      interpolWin_bin = 3,
                      interpolTol = 0.05,
                      interpolCert = 0.3,
                      manualCert = 1,
                      snakeStep = 0,
                      snakePlot = FALSE) {
  nr = nrow(pitchCands)
  nc = ncol(pitchCands)

  # Add "inviolable" manual pitch values
  if (length(manual$frame) > 0) {
    freqMan = certMan = rep(NA, nc)
    freqMan[manual$frame] = manual$freq
    certMan[manual$frame] = manualCert
    pitchCands[, manual$frame] = NA  # ignore all other candidates in manual frames
    pitchCands = rbind(pitchCands, freqMan)
    pitchCert = rbind(pitchCert, certMan)
    pitchSource = rbind(pitchSource, rep('manual', nc))
    nr = nr + 1
  }

  # take log to approximate human perception of pitch differences
  pitchCands[!is.na(pitchCands)] = log2(pitchCands[!is.na(pitchCands)])
  if (!is.null(manual)) manual$freq = log2(manual$freq)

  # get the "center of gravity" of pitch candidates in each frame (mean of all
  # pitch candidates weighted by their respective certainties)
  pitchCenterGravity = rep(NA, ncol(pitchCands))
  for (i in seq_len(ncol(pitchCands))) {
    idxDeadCert = which(pitchSource[, i] == 'manual' & pitchCert[, i] == manualCert)
    if (length(idxDeadCert) == 1) {
      # special case: pitchCenterGravity has to pass through manual candidates
      pitchCenterGravity[i] = pitchCands[idxDeadCert, i]
    } else {
      pitchCenterGravity[i] = weighted.mean(x = pitchCands[, i],
                                            w = pitchCert[, i],
                                            na.rm = TRUE)
    }
  }

  ## INTERPOLATION
  # if a frame has no pitch candidate at all (NA) or no candidate
  # between the most likely candidates for the adjacent frames, add such a
  # candidate with ~low certainty
  if (length(interpolWin_bin) > 0 && interpolWin_bin > 0) {
    # order pitch candidates and certainties in each frame pushing NAs down so
    # as to simplify the matrix (the position of manual candidates is not
    # important since they are saved in 'manual')
    if (nrow(pitchCands) > 1) {
      colseq = seq_len(nc)
      o = sapply(colseq, function(x) order(pitchCands[, x]))
      pitchCands = sapply(colseq, function(x) pitchCands[o[, x], x])
      pitchCert = sapply(colseq, function(x) pitchCert[o[, x], x])
      pitchSource = sapply(colseq, function(x) pitchSource[o[, x], x])
    }
    intplt = interpolate(pitchCands = pitchCands,
                         pitchCert = pitchCert,
                         pitchSource = pitchSource,
                         pitchCenterGravity = pitchCenterGravity,
                         interpolWin_bin = interpolWin_bin,
                         interpolTol = interpolTol,
                         interpolCert = interpolCert)
    pitchCands = intplt$pitchCands
    pitchCert = intplt$pitchCert
    pitchSource = intplt$pitchSource
    pitchCenterGravity = intplt$pitchCenterGravity
  }
  # remove interpolated candidates in manual frames (otherwise they might be preferred)
  if (length(manual$frame) > 0) {
    for (f in manual$frame) {
      pitchCands[which(pitchSource[, f] != 'manual'), f] = NA
    }
  }

  # remove rows with all NA's
  keep_rows = which(rowSums(!is.na(pitchCands)) > 0)
  if (length(keep_rows) > 0) {
    pitchCands = pitchCands[keep_rows, , drop = FALSE]
    pitchCert = pitchCert[keep_rows, , drop = FALSE]
    pitchSource = pitchSource[keep_rows, , drop = FALSE]
  }

  # special case: only a single pitch candidate for all frames in a syllable
  # (no paths to chose among)
  if (nrow(pitchCands) == 1) {
    return(2 ^ pitchCands)
  }

  ## PATH-FINDING
  # find the best path through frame-by-frame pitch candidates
  if (nc < 2) pathfinding = 'none'
  if (pathfinding == 'fast') {
    bestPath = pathfinding_fast(
      pitchCands = pitchCands,
      pitchCert = pitchCert,
      pitchSource = pitchSource,
      pitchCenterGravity = pitchCenterGravity,
      certWeight = certWeight,
      step = step
    )
  } else if (pathfinding == 'slow') {
    bestPath = pathfinding_slow(
      pitchCands = pitchCands,
      pitchCert = pitchCert,
      certWeight = certWeight,
      pitchCenterGravity = pitchCenterGravity,
      manual = manual,
      annealPars = annealPars
    )
  } else {  # if (pathfinding == 'none')
    bestPath = rep(NA, nc)
    for (i in seq_len(nc)) {
      idx = which.min(abs(pitchCands[, i] - pitchCenterGravity[i]))
      if (length(idx) > 0 && !is.na(idx)) bestPath[i] = pitchCands[idx, i]
    }
    if (length(manual$frame) > 0) {
      bestPath[manual$frame] = manual$freq
    }
  }

  ## SNAKE
  # apply the snake algorithm to minimize the elastic forces acting on this
  # pitch contour without deviating too far from high-certainty anchors
  if (snakeStep > 0) {
    bestPath = snake(
      pitch = bestPath,
      pitchCands = pitchCands,
      pitchCert = pitchCert,
      certWeight = certWeight,
      pitchCenterGravity = pitchCenterGravity,
      snakeStep = snakeStep,
      snakePlot = snakePlot
    )
  }
  if (!is.numeric(bestPath) | length(bestPath) != nc) {
    # browser()
    bestPath = pitchCenterGravity
  }
  2 ^ bestPath
}


#' Interpolate
#'
#' Internal soundgen function.
#'
#' Interpolation: if a frame has no pitch candidate at all (NA) or no candidate
#' between the most likely candidates for the adjacent frames, add such a
#' candidate with some (low) certainty.
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#'   candidates per fft frame weighted by our certainty in each of these
#'   candidates
#' @return Returns a modified pitchCands matrix.
#' @keywords internal
interpolate = function(pitchCands,
                       pitchCert,
                       pitchSource,
                       pitchCenterGravity,
                       interpolWin_bin = 3,
                       interpolTol = 0.3,
                       interpolCert = 0.3) {
  # ... add an empty row for new, interpolated pitch candidates
  nr = nrow(pitchCands)
  nc = ncol(pitchCands)
  pitchCands = rbind(rep(NA, nc), pitchCands)
  pitchCert = rbind(rep(NA, nc), pitchCert)
  pitchSource = rbind(rep('intpl', nc), pitchSource)
  for (f in seq_len(nc)) {
    left = max(1, f - interpolWin_bin)
    right = min(ncol(pitchCands), f + interpolWin_bin)
    win = left:right
    med = NA
    noLocalCands = FALSE
    if (length(win) > 1) {
      # median over interpolation window (by default plus-minus 2 points)
      idx_man = which(pitchSource[, win] == 'manual' &
                        !is.na(pitchCert[, win]))
      if (length(idx_man) > 0) {
        # special case: if any manual cand-s over interpolWin_bin, only consider them
        # med = median(pitchCands[, win] [idx_man], na.rm = TRUE)
        idx_man_rc = which(pitchSource[, win] == 'manual' &
                             !is.na(pitchCert[, win]), arr.ind = TRUE)
        curve = rep(NA, length(win))
        curve[idx_man_rc[, 'col']] = pitchCands[, win] [idx_man]
      } else {
        # no manual cand's over interpolWin_bin --> use any cand-s for
        # interpolation
        curve = pitchCenterGravity[win]
        curve[f + 1 - left] = NA
      }
      curve_df = na.omit(data.frame(x = seq_along(curve),
                                    y = curve))
      if (nrow(curve_df) > 2) {
        # >2 points - use loess
        l = try(suppressWarnings(loess(y ~ x, data = curve_df, span = 1)))
        if (!inherits(med, 'try-error')) {
          p = predict(l, newdata = seq_len(.subset2(curve_df$x, length(curve_df$x))))
          med = p[f + 1 - left]
          # print(c(f, 'loess'))
        }
      } else {
        # a single point - just take the middle
        med = mean(curve_df$y)
        # print(c(f, 'mean'))
      }
      noLocalCands = sum(
        pitchCands[, f] > (1 - interpolTol) * med &
          pitchCands[, f] < (1 + interpolTol) * med,
        na.rm = TRUE
      ) == 0
      if (noLocalCands & !is.na(med)) {
        # if there are no pitch candidates in the frequency range
        # expected based on pitch candidates in the adjacent frames...
        # use median of adjacent frames for the new pitch cand
        pitchCands[1, f] = med
        # certainty assigned to interpolated frames
        pitchCert[1, f] = interpolCert
        # update pitchCenterGravity for the interpolated frame
        pitchCenterGravity[f] = med
      } else {
        pitchCenterGravity[f] = weighted.mean(x = pitchCands[, f],
                                              w = pitchCert[, f],
                                              na.rm = TRUE)
      }
    }
  }
  list(pitchCands = pitchCands,
       pitchCert = pitchCert,
       pitchSource = pitchSource,
       pitchCenterGravity = pitchCenterGravity)
}


#' Path through pitch candidates: fast
#'
#' Internal soundgen function.
#'
#' Uses a modified Viterbi algorithm to find a reasonable path though pitch
#' candidates. The idea is to start at several random "seeds", each time
#' exploring as many paths as there are candidates per seed. The path with the
#' lowest global cost is returned. Transition costs are a weighted mean of two
#' penalties: based on pitch certainties (corrected for distance from pitch
#' center of gravity) and on pitch jumps.
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#'   candidates per fft frame weighted by our certainty in each of these
#'   candidates
#' @param seed_ms start a new seed roughly every ... ms
#' @param plot if TRUE, plots pitch candidates and explored paths
#' @keywords internal
pathfinding_fast = function(pitchCands,
                            pitchCert,
                            pitchSource,
                            step,
                            pitchCenterGravity,
                            certWeight,
                            seed_ms = 250,
                            plot = FALSE) {
  nc = ncol(pitchCands)
  nr = nrow(pitchCands)

  ## Boost certainties of similar pitch candidates per frameclose to pitchCenterGravity)
  for (i in seq_len(nc)) {
    pitchCert[, i] = pitchCert[, i] / (1 + abs(pitchCands[, i] - pitchCenterGravity[i]))
  }
  pitchUncert = 1 - pitchCert

  ## Pick a few points along the contour for re-seeding the path search
  nSeeds = max(1, round(nc * step / seed_ms))
  if (nSeeds == 1) seed_approx_pos = 1
  seed_step = max(2, round(nc / (nSeeds + 1)))
  seed_half_step = floor(seed_step / 2)
  seed_approx_pos = round(seed_step * (seq_len(nSeeds)))
  seed_approx_pos = unique(seed_approx_pos[seed_approx_pos <= nc])
  nSeeds = length(seed_approx_pos)

  # Repeat pathfinding at each seed to explore possible paths without getting stuck
  paths = costs = vector('list', nSeeds)
  for (seed in seq_len(nSeeds)) {
    # Find the frame with the greatest number of candidates (to explore as many paths as possible)
    seed_win = (seed_approx_pos[seed] - seed_half_step) :
      (seed_approx_pos[seed] + seed_half_step)
    seed_win = seed_win[which(seed_win >= 1 & seed_win <= nc)]
    idx_max = which.max(apply(
      pitchCands[, seed_win, drop = FALSE], 2, function(x) sum(!is.na(x))
    ))
    start = seed_win[1] - 1 + idx_max
    start_cands_idx = which(!is.na(pitchCands[, start]))
    start_cands = pitchCands[start_cands_idx, start]

    # We'll try as many paths as there are candidates in the starting frame
    nPaths = length(start_cands)
    paths[[seed]] = costs[[seed]] = vector('list', nPaths)

    # Work backwards and forwards from each starting candidate, taking the least costly transitions
    for (p in seq_len(nPaths)) {
      idx_start = start_cands_idx[p]
      point_current = pitchCands[idx_start, start]
      cost_start = certWeight * pitchUncert[idx_start, start]

      # work backwards
      cost_back = 0
      if (start > 1) {
        path_back = rep(NA, start - 1)
        for (i in rev(seq_len(start - 1))) {
          cands = pitchCands[, i]
          if (any(!is.na(cands))) {
            cost_cert = pitchUncert[, i]
            # get the cost of transition from the current point to each of the pitch
            # candidates in the next frame
            cost_pitchJump = abs(point_current - cands) * (25 / step)
            if (length(cost_pitchJump) == 0 || !any(!is.na(cost_pitchJump))) cost_pitchJump = 0
            # get a weighted average of transition costs associated with the certainty
            # of each estimate vs. the magnitude of pitch jumps
            costs_i = certWeight * cost_cert + (1 - certWeight) * cost_pitchJump
            idx_i = which.min(costs_i)
            point_current = pitchCands[idx_i, i]
            path_back[i] = point_current
            cost_back = cost_back + costs_i[idx_i]
          } else {
            path_back[i] = NA
          }
        }
      } else {
        path_back = numeric(0)
      }

      # work forwards
      cost_forward = 0
      if (start < nc) {
        point_current = pitchCands[idx_start, start]
        path_forward = rep(NA, nc - start)
        for (i in (start + 1):nc) {
          cands = pitchCands[, i]
          if (any(!is.na(cands))) {
            cost_cert = pitchUncert[, i] # abs(cands - pitchCenterGravity[i])
            # get the cost of transition from the current point to each of the pitch
            # candidates in the next frame
            cost_pitchJump = abs(point_current - cands) * (25 / step)
            if (length(cost_pitchJump) == 0 || !any(!is.na(cost_pitchJump))) cost_pitchJump = 0
            # get a weighted average of transition costs associated with the certainty
            # of each estimate vs. the magnitude of pitch jumps
            costs_i = certWeight * cost_cert + (1 - certWeight) * cost_pitchJump
            idx_i = which.min(costs_i)
            point_current = pitchCands[idx_i, i]
            path_forward[i - start] = point_current
            cost_forward = cost_forward + costs_i[idx_i]
          } else {
            path_forward[i - start] = NA
          }
        }
      } else {
        path_forward = numeric(0)
      }
      paths[[seed]][[p]] = c(path_back, pitchCands[idx_start, start], path_forward)
      costs[[seed]][[p]] = cost_back + cost_start + cost_forward
    }
  }
  idx_best_path = which.min(unlist(costs))
  paths = do.call(c, paths)
  path = paths[[idx_best_path]]

  if (plot) {
    plot(seq_len(nc), type = 'n', xlab = '', ylab = '',
         ylim = range(pitchCands, na.rm = TRUE))
    for (i in seq_len(nr)) {
      points(pitchCands[i, ], cex = 2 * pitchCert[i, ])
    }
    for (p in (seq_along(paths))[-idx_best_path]) {
      points(paths[[p]], type = 'l', col = 'gray')
    }
    points(paths[[idx_best_path]], type = 'l', col = 'blue')
  }
  path
}


#' Cost of jumps
#'
#' Internal soundgen function.
#'
#' Internal helper function for calculating the cost of transitions between
#' pitch candidates. Needed for postprocessing of pitch contour - finding the
#' optimal pitch contour.
#' @param cand1,cand2 two candidate pitch values
#' @keywords internal
#' @examples
#' a = seq(-3, 3, by = .01)
#' b = 1 / (1 + 10 * exp(3 - 7 * abs(a)))
#' plot(a, b, type = 'l')
costJumps = function(cand1, cand2) {
  1 / (1 + 10 * exp(3 - 7 * abs(cand1 - cand2)))
}


#' Path through pitch candidates: slow
#'
#' Internal soundgen function.
#'
#' Optimizes the path through pitch candidates using simulated annealing with
#' \code{\link[stats]{optim}}. This can be really slow, depending on control
#' parameters.
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#'   candidates per fft frame weighted by our certainty in each of these
#'   candidates
#' @param manual a dataframe of manual pitch candidates from pathfinder()
#' @keywords internal
pathfinding_slow = function(pitchCands = pitchCands,
                            pitchCert = pitchCert,
                            certWeight = certWeight,
                            pitchCenterGravity = pitchCenterGravity,
                            manual = NULL,
                            annealPars = list(maxit = 5000, temp = 1000)) {
  # start with the pitch contour most faithful to center of gravity of pitch
  # candidates for each frame
  colseq = seq_len(ncol(pitchCands))
  path_init = sapply(colseq, function(x) {
    which.min(abs(pitchCands[, x] - pitchCenterGravity[x]))
  })

  # use annealing to wiggle the path, attempting to minimize its cost
  o = optim(
    par = path_init,
    fn = costPerPath,
    gr = generatePath,
    pitchCands = pitchCands,
    pitchCert = pitchCert,
    certWeight = certWeight,
    pitchCenterGravity = pitchCenterGravity,
    manual = manual,
    method = 'SANN',
    control = annealPars
  )

  bestPath = sapply(colseq, function(x) {
    pitchCands[o$par[x], x]
  })
  bestPath
}


#' Cost per path
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contours called by
#' \code{\link{pathfinding_slow}}. Calculates the cost of a particular path
#' through pitch candidates based on pitch jumps and distance from
#' high-certainty candidates.
#' @param path evaluated path through pitch candidates (as integers specifying
#'   the rows in pitchCands, not the actual values of pitch)
#' @inheritParams pathfinder
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#'   candidates per fft frame weighted by our certainty in each of these
#'   candidates
#' @param manual a dataframe of manual pitch candidates from pathfinder()
#' @keywords internal
costPerPath = function(path,
                       pitchCands,
                       pitchCert,
                       certWeight,
                       pitchCenterGravity,
                       manual = NULL) {
  # if there is nothing to wiggle, generatePath() returns NA and we want
  # annealing to terminate quickly, so we return very high cost
  if (is.na(path[1])) return(1e10)

  # if the path fails to pass through any of the manual pitch values,
  # also return immediately with very high cost
  if (!is.null(manual)) {
    # the last cand in every column is the manual one, so the path
    # should always go through the last cand in the manual frames
    if (any(path[manual$frame] != nrow(pitchCands))) {
      return(1e10)
    }
  }

  # get the cost of transition from the current point to each of the pitch
  # candidates in the next frame
  len = length(path)
  cost_pitchJump = sapply(seq_len(len - 1), function(x) {
    costJumps(pitchCands[path[x], x], pitchCands[path[x + 1], x + 1])
  })
  cost_pitchJump = mean(cost_pitchJump, na.rm = TRUE)

  # get the cost of deviating from pitchCenterGravity
  cost_cert = sapply(seq_len(len), function(x) {
    abs(pitchCands[path[x], x] - pitchCenterGravity[x])
  })
  cost_cert = mean(cost_cert, na.rm = TRUE)

  # get a weighted average of transition costs associated with the certainty
  # of each estimate vs. the magnitude of pitch jumps
  costs = certWeight * cost_cert + (1 - certWeight) * cost_pitchJump
  costs
}


#' Generate path
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contours called by
#' \code{\link{pathfinding_slow}}. Generates proposals for new paths through
#' pitch candidates. It gives up and returns NA after 100 attempts, which stops
#' annealing - so the adaptation of pitch contour doesn't happen
#' @param path currently evaluated path
#' @inheritParams pathfinder
#' @param ... nothing really, but otherwise optim() complains
#' @keywords internal
generatePath = function(path, pitchCands, ...) {
  i = 1
  while (i < 100) {
    point_to_wiggle = sample(seq_along(path), 1)
    idx = which(!is.na(pitchCands[, point_to_wiggle])) [-path[point_to_wiggle]]
    if (length(idx) > 0 & is.numeric(idx)) {
      path[point_to_wiggle] = sample(idx, size = 1)
      return(path)
    }
    i = i + 1
  }
  NA
}


#' Snake
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contour. Wiggles a snake
#' along the gradient of internal + external forces. NB: if the snake is run,
#' the final contour may deviate from the actually measured pitch candidates!
#' @param pitch numeric vector representing our best guess at pitch contour,
#'   which we are now attempting to improve by minimizing its elastic tension
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#'   candidates per fft frame weighted by our certainty in each of these
#'   candidates
#' @return Returns optimized pitch contour (numeric vector of the same length as
#'   \code{pitch}).
#' @keywords internal
snake = function(pitch,
                 pitchCands,
                 pitchCert,
                 certWeight,
                 pitchCenterGravity,
                 snakeStep = 0.05,
                 snakePlot = FALSE) {
  ran = diff(range(pitchCands, na.rm = TRUE)) # range of pitch
  maxIter = floor(ran / snakeStep * 2)  # just heuristic, no theory behind this

  # plot for debugging or esthetic appreciation
  if (snakePlot) {
    # plot all pitch candidates and the initial path
    plot(seq(1, ncol(pitchCands)), pitch,
         type = 'n', xlab = 'FFT frame', ylab = 'log(pitch)',
         ylim = c(
           range(pitchCands, na.rm = TRUE)[1] - .3 * ran,
           range(pitchCands, na.rm = TRUE)[2] + .3 * ran
         )
    )
    for (r in seq_len(nrow(pitchCands))) {
      points(seq(1, ncol(pitchCands)),
             pitchCands[r, ],
             cex = as.numeric(pitchCert[r, ]) * 2)
    }
    lines(seq(1, ncol(pitchCands)), pitch)
  }

  # optimization algorithm follows
  i = 1
  force_old = 1e10  # Inf causes NaN in force_delta
  while (i < maxIter) {
    force = forcePerPath(pitch = pitch,
                         pitchCands = pitchCands,
                         pitchCert = pitchCert,
                         pitchCenterGravity = pitchCenterGravity,
                         certWeight = certWeight)
    force_new = mean(abs(force))
    force_delta = (force_old - force_new) / force_old
    force_old = force_new
    if (is.na(force_delta) | force_delta < snakeStep) break
    # wiggle the snake along the gradient of the total force acting on it
    # (elastic + attraction of high-certainty pitch candidates)
    pitch = pitch + snakeStep * force
    if (snakePlot) {
      lines(seq(1, length(pitch)), pitch,
            type = 'l', col = 'green', lty = 4)
    }
    i = i + 1
  }

  if (snakePlot) {
    lines(seq(1, length(pitch)), pitch,
          type = 'l', col = 'blue', lwd = 3)
  }
  pitch
}


#' Force per path
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contour. Returns the
#' total force acting on a snake (sum of internal and external gradients, i.e.
#' of the elastic force trying to straighten the snake [internal] and of the
#' force pushing the snake towards the most certain pitch estimates [external])
#' @inheritParams snake
#' @inheritParams pathfinder
#' @inheritParams analyze
#' @param pitchCenterGravity numeric vector giving the mean of all pitch
#'   candidates per fft frame weighted by our certainty in each of these
#'   candidates
#' @return Returns a numeric vector of the same length as \code{pitch} that
#'   gives the total force acting on the snake at each point.
#' @keywords internal
forcePerPath = function(pitch,
                        pitchCands,
                        pitchCert,
                        pitchCenterGravity,
                        certWeight) {
  ran = diff(range(pitchCands, na.rm = TRUE))
  # external_force = -(pitch_path - pitchCenterGravity) / ran
  external_force = pitch # just a quick way to initialize a vector of the right length
  for (i in seq_len(ncol(pitchCands))) {
    cands = na.omit(pitchCands[, i])
    certs = na.omit(pitchCert[, i])
    deltas = 1 / exp((cands - pitch[i]) ^ 2)
    forces = certs * deltas
    forces = ifelse(cands > pitch[i], forces, -forces)
    external_force[i] = sum(forces)
  }
  # external_force is the "external" force - the attraction of high-certainty
  # pitch candidates

  internal_force = -findGrad(pitch)
  # internal_force is the elastic force trying to make the curve smooth

  total_force = certWeight * external_force + (1 - certWeight) * internal_force
  # weighted average of internal and external forces

  total_force
}


#' Find gradient
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contour. Returns
#' the elastic force acting on a snake. See \code{\link{snake}}.
#' @param path numeric vector
#' @param interpol the number of points to interpolate beyond each end of the path
#' @return Returns a vector of the same length as input path giving its 4th derivative.
#' @keywords internal
findGrad = function(path, interpol = 3) {
  # extrapolate 2 values before the first one and two after the last one based
  # on /interpol/ number of points in case the path is shorter than the
  # specified interpol:
  len = length(path)
  interpol = ifelse(interpol > len, len, interpol)
  if (interpol == 1) {
    path = c(rep(.subset2(path, 1), 2), path, rep(.subset2(path, len), 2))
  } else {
    slopeLeft = suppressWarnings(summary(lm(
      path[seq_len(interpol)] ~ seq(1, interpol)
    ))$coef[2, 1])
    minus12 = .subset2(path, 1) - c(1, 2) * slopeLeft
    slopeRight = suppressWarnings(summary(lm(
      path[(len - interpol + 1):len] ~ seq(1, interpol)
    ))$coef[2, 1])
    plus12 = .subset2(path, len) + c(1, 2) * slopeRight
    path = c (.subset2(minus12, 2), .subset2(minus12, 1),
              path, .subset2(plus12, 1), .subset2(plus12, 2))
  }

  # take the 4th derivative of the path with extrapolated values
  # (so that we get d4f over the entire length of the original path)
  grad = rep(0, len)
  for (i in 3:(len - 2)) {  # approximation
    grad[i] = .subset2(path, i - 2) - 4 * .subset2(path, i - 1) +
      6 * .subset2(path, i) - 4 * .subset2(path, i + 1) + .subset2(path, i + 2)
  }
  grad = grad[3:(len - 2)]
  grad
}


#' Median smoothing
#'
#' Internal soundgen function.
#'
#' Internal helper function for smoothing pitch contours or other contours. Only
#' outliers are modified, so it's not like smoothing with a kernel. NB: the
#' expected input is pitch, so deviance is calculated on a log-scale.
#' @param df dataframe (each column is processed separately, so multiple
#'   contours can be fed into this function at once to speed things up)
#' @param smoothing_ww width of smoothing window (points)
#' @param smoothingThres tolerated deviance from moving median (semitones)
#' @param inviolable a vector of indices of the rows of df that should not be
#'   modified (meant for manual pitch values)
#' @return Returns a dataframe of the same dimensions as df.
#' @keywords internal
#' @examples
#' df = data.frame(a = rnorm(40, mean = 100, sd = 20),
#'                 b = rnorm(40, mean = 100, sd = 10))
#' df1 = soundgen:::medianSmoother(df, smoothing_ww = 5,
#'       smoothingThres = 1, inviolable = 1:10)
#' plot(df[, 2], type='b')
#' lines(df1[, 2], type='b', col='blue', pch=3)
medianSmoother = function(df,
                          smoothing_ww,
                          smoothingThres,
                          inviolable = NULL) {
  temp = df # to calculate median_over_window for original values
  hw = floor(smoothing_ww / 2) # smooth over plus-minus half the smoothing_ww
  for (i in seq_len(nrow(df))) {
    window = c(max(i - hw, 1), min(i + hw, nrow(df))) # smoothing window
    # to be conservative, treat NAs as repetitions of value i
    winVal = as.matrix(temp[(window[1]:window[2]), ])
    for (c in seq_len(ncol(winVal))) {
      idx_na = which(is.na(winVal[, c]))
      winVal[idx_na, c] = df[i, c]
    }
    median_over_window = apply(winVal, 2, median, na.rm = TRUE)
    # NB: use either temp or df, for original or smoothed values to be used
    # for calculating median_over_window

    if (!any(inviolable == i)) {
      if (smoothingThres > 0) {
        # difference from median pitch etc over window, in semitones
        deviance = 12 * log2(as.numeric(df[i, ]) / median_over_window)
        cond = which(abs(deviance) > smoothingThres)
        df[i, cond] = median_over_window[cond]
      } else {
        df[i, ] = median_over_window
      }
    }
  }
  df
}


#' Find voiced segments
#'
#' Internal soundgen function.
#'
#' Internal helper function for postprocessing of pitch contours. Merges voiced
#' segments at least \code{shortestSyl} ms long and separated by less than
#' \code{shortestPause} ms. Called by \code{\link{analyze}}
#' @param pitchCands matrix of possible pitch values per column. One column is
#'   one fft frame, one row is one pitch candidate
#' @inheritParams analyze
#' @inheritParams spectrogram
#' @param minVoicedCands a frame is considered to be voiced if at least this
#'   many pitch candidates are not NA. Defaults to 2: since dom is usually
#'   defined, in practice this means that we also want at least one other pitch
#'   candidate (autocor, cep or BaNa)
#' @param pitchMethods methods of pitch tracking in analyze()
#' @param manualV index of frames that should definitely be voiced (manual
#'   candidates)
#' @param manualTryToV index of frames that should be treated as voiced as long
#'   as they have any candidates at all (even <minVoicedCands)
#' @param manualUnv index of frames forced to be unvoiced
#' @return Returns a dataframe specifying where each voiced segment starts and
#'   ends (in fft frames, not ms!)
#' @keywords internal
#' @examples
#' pitch = c(rep(NA, 5), rnorm(15, 300, 15), rep(NA, 7), rnorm(10, 400, 10), rep(NA, 6))
#' plot(pitch, type = 'b')
#' soundgen:::findVoicedSegments(
#'   pitchCands = matrix(pitch, nr = 1),
#'   shortestSyl = 20,
#'   shortestPause = 60,
#'   step = 10,
#'   minVoicedCands = 1,
#'   pitchMethods = 'blabla',
#'   manualV = NULL,
#'   manualTryToV = NULL,
#'   manualUnv = NULL
#' )
findVoicedSegments = function(pitchCands,
                              shortestSyl,
                              shortestPause,
                              step,
                              minVoicedCands,
                              pitchMethods,
                              manualV = NULL,
                              manualTryToV = NULL,
                              manualUnv = NULL) {
  resetMVC = FALSE
  messageMVC = FALSE
  if (is.null(minVoicedCands)) {
    resetMVC = TRUE
  }
  if (is.numeric(minVoicedCands)) {
    if (minVoicedCands < 1 | minVoicedCands > length(pitchMethods)) {
      resetMVC = TRUE
      messageMVC = TRUE
    }
  }
  if (resetMVC) {
    if ('dom' %in% pitchMethods & length(pitchMethods) > 1) {
      # since dom is usually defined, we want at least one more pitch candidate
      # (unless dom is the ONLY method that the user wants for pitch tracking)
      minVoicedCands = 2
    } else {
      minVoicedCands = 1
    }
    if (messageMVC) {
      message(paste0('minVoicedCands must be between 1 and length(pitchMethods);',
                     ' resetting to ', minVoicedCands))
    }
  }
  putativelyVoiced = apply(pitchCands, 2, function(x)
    sum(!is.na(x)) >= minVoicedCands)
  if (length(manualV) > 0) putativelyVoiced[manualV] = TRUE
  if (length(manualTryToV)) {
    # voice as forced by manualTryToV, as long as those frames have some cand-s
    anyCands = which(apply(pitchCands, 2, function(x) any(!is.na(x))))
    putativelyVoiced[manualTryToV[manualTryToV %in% anyCands]] = TRUE
  }
  if (length(manualUnv) > 0) putativelyVoiced[manualUnv] = FALSE
  lenV = length(putativelyVoiced)

  # the smallest number of consecutive non-NA pitch values that constitute a
  # voiced segment; but at least 1
  nRequired = max(1, ceiling(shortestSyl / step))
  # the greatest number of NA values that we tolerate before we say a new voiced
  # syllable begins
  toleratedGap = floor(shortestPause / step)

  # find and save separately all voiced segments
  segmentStart = numeric()
  segmentEnd = numeric()
  i = 1
  while (i < (lenV - nRequired + 1)) {
    # find beginning
    while (i < (lenV - nRequired + 1)) {
      if (sum(putativelyVoiced[i:(i + nRequired - 1)]) == nRequired |
          any(manualV == i)) {  # start a new syllable if we hit a manually voiced frame
        segmentStart = c(segmentStart, i)
        break
      }
      i = i + 1
    }
    # find end
    if (length(segmentEnd) < length(segmentStart)) {
      while (i < (lenV - toleratedGap + 1)) {
        if (sum(putativelyVoiced[i:(i + toleratedGap)]) == 0 |
            any(manualUnv == i)) {  # interrupt the syllable if we hit a manuaUnv frame
          segmentEnd = c(segmentEnd, i - 1)
          i = i - 1
          break
        }
        i = i + 1
      }
      if (length(segmentEnd) < length(segmentStart)) {
        # if the end is not found, take the last voiced value
        # (or terminate at the first manually unvoiced value after segmentStart, if any)
        segmentEnd = c(segmentEnd, min(
          manualUnv[which(manualUnv > tail(segmentStart, 1))] - 1,
          tail(which(putativelyVoiced), 1)
        ))
        break
      }
    }
    i = i + 1
  }
  data.frame(segmentStart = segmentStart, segmentEnd = segmentEnd)
}


#' Plot pitch candidates
#'
#' Internal soundgen function.
#'
#' Plots pitch candidates or adds them to a spectrogram.
#' @param pitchCands,pitchCert,pitchSource matrices of pitch candidates, their
#'   certainty, and pitch tracking method used as generated internally by
#'   analyze(); columns = STFT frames, rows = candidates
#' @param pitch best guess at pitch contour; length = ncol(pitchCands)
#' @param candPlot,pitchPlot lists of graphical settings for plotting candidates
#'   and pitch contour, respectively
#' @param extraContour another contour to add to the plot, such as harmHeight, Hz
#' @param addToExistingPlot if TRUE, assumes that a spectrogram is already
#'   plotted; if FALSE, sets up a new plot
#' @param showLegend if TRUE, shows a legend
#' @param y_Hz if TRUE, plot in Hz, otherwise in kHz
#' @param ... other graphical parameters used for creating a new plot if
#'   addToExistingPlot = FALSE
#' @keywords internal
#' @examples
#' s = soundgen()
#' a = soundgen:::.analyze(soundgen:::readAudio(s, samplingRate = 16000),
#'  windowLength = 25, step = 25,
#'  returnPitchCands = TRUE, plot = FALSE)
#' spectrogram(s, 16000, windowLength = 25, step = 5, ylim = c(0, 1), osc = FALSE)
#' soundgen:::addPitchCands(
#'   pitchCands = a$pitchCands$freq,
#'   pitchCert = a$pitchCands$cert,
#'   pitchSource = a$pitchCands$source,
#'   pitch = a$result$pitch,
#'   timestamps = a$result$time / 1000)
addPitchCands = function(pitchCands,
                         pitchCert,
                         pitchSource,
                         pitch,
                         timestamps = NULL,
                         candPlot = list(),
                         pitchPlot = list(),
                         extraContour = NULL,
                         extraContour_pars = list(),
                         priorMean = NULL,
                         priorSD = NULL,
                         pitchFloor = NULL,
                         pitchCeiling = NULL,
                         addToExistingPlot = TRUE,
                         showLegend = TRUE,
                         y_Hz = FALSE,
                         yScale = c('orig', 'bark', 'mel')[1],
                         ...) {
  if (is.null(pitchCands) & is.null(pitch)) invisible()
  if (length(pitchCands) < 1 & length(pitch) < 1) invisible()
  if (is.null(timestamps) & !is.null(pitchCands)) {
    timestamps = as.numeric(colnames(pitchCands))
  }
  if (is.null(pitchPlot$showPrior)) {
    showPrior = TRUE
  } else {
    showPrior = pitchPlot$showPrior
  }
  if (showPrior & is.numeric(priorMean) & is.numeric(priorSD)) {
    prior = getPrior(
      priorMean = priorMean,
      priorSD = priorSD,
      pitchFloor = pitchFloor,
      pitchCeiling = pitchCeiling,
      len = 100,
      plot = FALSE
    )
  } else {
    prior = NULL
  }
  pitchPlot = pitchPlot[names(pitchPlot) != 'showPrior']
  yScaleCoef = ifelse(y_Hz | yScale %in% c('bark', 'mel', 'ERB'), 1, 1/1000)
  if (yScale == 'bark') {
    # NB: tuneR::hz2bark can't handle NAs
    pitchCands = 6 * asinh(pitchCands / 600)
    pitch = 6 * asinh(pitch / 600)
    if (!is.null(prior)) prior$freq = 6 * asinh(prior$freq / 600)
    if (!is.null(extraContour)) extraContour = 6 * asinh(extraContour / 600)
  } else if (yScale == 'mel') {
    pitchCands = hz2mel(pitchCands)
    pitch = hz2mel(pitch)
    if (!is.null(prior)) prior$freq = hz2mel(prior$freq)
    if (!is.null(extraContour)) extraContour = hz2mel(extraContour)
  }

  # If addToExistingPlot is FALSE, we first have to set up an empty plot
  if (addToExistingPlot == FALSE) {
    arguments = list(...)  # save ... arguments as a list
    m = max(pitchCands, na.rm = TRUE) * yScaleCoef  # for ylim on the empty plot
    if (is.na(m)) m = 5  # samplingRate / 2 * yScaleCoef
    arguments$ylim = c(0, m)
    do.call(plot,   # need do.call, otherwise can't pass the modified ylim
            args = c(list(x = timestamps,
                          y = rep(0, ncol(pitchCands)),
                          type = 'n'),
                     arguments))
  }

  # Prepare plot pars - combine user-defined with defaults
  plotPars = defaults_analyze_pitchCand  # see presets.R
  candPlot$final = pitchPlot

  for (i in seq_along(candPlot)) {
    temp = candPlot[[i]]
    if (length(temp) > 0) {
      method = names(candPlot)[i]
      idx = which(plotPars$method == method)
      if (length(idx) > 0) {
        for (j in seq_along(temp)) {
          property = names(temp[j])[1]
          if (!property %in% colnames(plotPars)) {
            # set to default, otherwise other rows become NA
            if (property == 'cex') {
              plotPars[, property] = 2
            } else if (property == 'lty') {
              plotPars[, property] = 1  # par(lty) gives "solid" - a character
            } else {
              plotPars[, property] = par(property)
            }
          }
          plotPars[idx, property] = temp[j]
        }
      }
    }
  }

  # Add pitch candidates to the plot
  pitchMethods = unique(na.omit(as.character(pitchSource)))
  if (length(pitchMethods) > 0) {
    for (r in seq_len(nrow(pitchCands))) {
      method_r = as.character(pitchSource[r, ])
      method_r[is.na(method_r)] = 'def'
      pars_method = as.list(plotPars[match(method_r, plotPars$method), 2:ncol(plotPars)])
      pars_method$cex = pars_method$cex *
        plotPars$cex[plotPars$method == 'final'] *
        pitchCert[r, ]
      # for manual, certainty could be Inf, so we reset to 2
      pars_method$cex[which(!is.na(pars_method$cex) &
                              pars_method$cex == Inf)] = 2
      # pars_method$cex[is.na(pars_method$cex)] = 0
      do.call(points, c(pars_method,
                        list(x = timestamps,
                             y = pitchCands[r, ] * yScaleCoef)))
    }
  } else {
    showLegend = FALSE
  }

  # Add the final pitch contour to the plot
  if (any(is.finite(pitch))) {
    pars_pitchContour = as.list(plotPars[plotPars$method == 'final',
                                         2:ncol(plotPars)])
    # first points, in case there are isolated, unconnected points surrounded by
    # NAs (not plotted with type = 'l')
    do.call('points', c(list(
      x = timestamps,
      y = pitch * yScaleCoef
    ), pars_pitchContour))
    # now join pitch contour with a line
    do.call('points', c(list(
      x = timestamps,
      y = pitch * yScaleCoef,
      type = 'l'
    ), pars_pitchContour))
  }

  # Add an extra contour such as harmHeight
  if (!is.null(extraContour)) {
    if (any(!is.na(extraContour)) & length(timestamps) > 0) {
      if (is.null(extraContour_pars$type)) extraContour_pars$type = 'l'
      if (extraContour_pars$type != 'n') {
        if (is.null(extraContour_pars$lty)) extraContour_pars$lty = 2
        if (is.null(extraContour_pars$lwd)) extraContour_pars$lwd = 2
        if (is.null(extraContour_pars$col)) extraContour_pars$col = 'red'
        do.call(points, c(list(x = timestamps, y = extraContour * yScaleCoef),
                          extraContour_pars))
      }
    }
  }

  # Show prior
  if (!is.null(prior)) {
    ran_x_5 = (tail(timestamps, 1) - timestamps[1]) * .075   # 7.5% of plot width
    points(x = prior$prob * ran_x_5,
           y = prior$freq * yScaleCoef, type = 'l', lty = 2)
    text(x = ran_x_5,
         y = priorMean * yScaleCoef,
         pos = 2, labels = 'Prior', cex = 0.65, offset = 0.25)
  }
  text(x = 0,
       y = pitchFloor * yScaleCoef,
       pos = 4, labels = 'floor', cex = 0.65, offset = 0)
  text(x = 0,
       y = pitchCeiling * yScaleCoef,
       pos = 4, labels = 'ceiling', cex = 0.65, offset = 0)

  # Add a legend
  if (showLegend) {
    pl = plotPars[plotPars$method %in% c(pitchMethods, 'final'), ]
    legend("topright",
           legend = pl$method,
           pch = pl$pch,
           lty = ifelse(pl$method == 'final', pl$lty, NA),
           lwd = pl$lwd,
           col = pl$col,
           bg = "white")
  }
  invisible()
}


#' Interpolate NAs
#'
#' Internal soundgen function
#'
#' Takes a numeric vector and fills in the NAs by linear
#' interpolation in the middle and constant or linear interpolation at the ends.
#' @param x numeric vector
#' @param idx_na which(is.na(x))
#' @param nPoints the number of points to use for interpolating leading and
#'   trailing NAs: 1 = constant interpolation, 2 = use the first two non-NAs at
#'   the beginning and the last two non-NAs at the end, etc.
#' @return Returns the same numeric vector with NAs filled in by interpolation.
#' @keywords internal
#' @examples
#' soundgen:::intplNA(c(NA, 405, 441, 460, NA, NA, NA, 480, 490, NA, NA))
#' soundgen:::intplNA(c(NA, 405, 441, 460, NA, NA, NA, 480, 490, NA, NA), nPoints = 3)
intplNA = function(x, idx_na = NULL, nPoints = 1) {
  len = length(x)
  if (is.null(idx_na)) idx_na = which(is.na(x))
  n_na = length(idx_na)
  if (n_na == len | n_na == 0) return(x)
  idx_notNA = seq_len(len) [-idx_na]
  len_notNA = length(idx_notNA)

  # fill in NAs in the middle by linear interpolation
  if (n_na > 0) {
    idx_center = .subset2(idx_notNA, 1) : .subset2(idx_notNA, len_notNA)
    if (length(idx_center) > 1) {
      xc = approx(x[idx_center], n = length(idx_center), na.rm = TRUE)$y
      if (!inherits(xc, 'try-error')) x[idx_center] = xc
    }
    first_nonNA = .subset2(idx_notNA, 1)
    last_nonNA = .subset2(idx_notNA, len_notNA)

    # fill in NAs at the ends
    if (nPoints == 1) {
      # constant interpolation
      if (.subset2(idx_notNA, 1) > 1) {
        x[seq_len(.subset2(idx_notNA, 1) - 1)] = x[first_nonNA]
      }

      if (last_nonNA < len) {
        x[(last_nonNA + 1):len] = .subset2(x, last_nonNA)
      }
    } else {
      # linear interpolation of NAs at the beg & end
      if (first_nonNA > 1) {
        a = first_nonNA : min(len, (first_nonNA + nPoints - 1))
        b = x[a]
        mod = lm(b ~ a)
        x[seq_len(first_nonNA - 1)] = predict(
          mod, newdata = list(a = seq_len(first_nonNA - 1)))
      }
      if (last_nonNA < len) {
        a = max(1, (last_nonNA - nPoints + 1)) : last_nonNA
        b = x[a]
        mod = lm(b ~ a)
        x[(last_nonNA + 1):len] = predict(mod, newdata = list(a = (last_nonNA + 1):len))
      }
    }
  }
  x
}
Any scripts or data that you put into this service are public.
soundgen documentation built on April 4, 2025, 3:44 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
soundgen
Sound Synthesis and Acoustic Analysis

R/pitch_postprocessing.R
In soundgen: Sound Synthesis and Acoustic Analysis

Defines functions intplNA addPitchCands findVoicedSegments medianSmoother findGrad forcePerPath snake generatePath costPerPath pathfinding_slow costJumps pathfinding_fast interpolate pathfinder

Documented in addPitchCands costJumps costPerPath findGrad findVoicedSegments forcePerPath generatePath interpolate intplNA medianSmoother pathfinder pathfinding_fast pathfinding_slow snake

Try the soundgen package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

soundgen Sound Synthesis and Acoustic Analysis

R/pitch_postprocessing.R In soundgen: Sound Synthesis and Acoustic Analysis

Defines functions intplNA addPitchCands findVoicedSegments medianSmoother findGrad forcePerPath snake generatePath costPerPath pathfinding_slow costJumps pathfinding_fast interpolate pathfinder

Documented in addPitchCands costJumps costPerPath findGrad findVoicedSegments forcePerPath generatePath interpolate intplNA medianSmoother pathfinder pathfinding_fast pathfinding_slow snake

Try the soundgen package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

soundgen
Sound Synthesis and Acoustic Analysis

R/pitch_postprocessing.R
In soundgen: Sound Synthesis and Acoustic Analysis