R/assess.doneness.R
In flightcallr: Classify Night Flight Calls Based on Acoustic Measurements

Documented in assess.doneness

assess.doneness = function(
### Do we have a big enough call-vs-noise sample size to be able to make a
### model which will save us labor overall?
bootstrap
### The bootstrap call-vs-noise data set. Features from generate.seewave.measures() must be present, as well as a Call_vs_Noise column
### filled in with "Call" and "Noise".
) {
  seewave.measures = c("Rugosity", "Crest_Factor", "Temporal_Entropy", "Shannon_Entropy", "Shannon_Entropy_Bandlimited", "Spectral_Flatness_Measure", 
                       "Spectral_Flatness_Measure_Bandlimited", "Spectrum_Roughness", "Spectrum_Roughness_Bandlimited", "Autocorrelation_Mean", 
                       "Autocorrelation_Median", "Autocorrelation_Standard_Error", "Dominant_Frequency_Mean", "Dominant_Frequency_Standard_Error", 
                       "Specprop_Mean", "Specprop_SD", "Specprop_SEM", "Specprop_Median", "Specprop_Mode", "Specprop_Q25", "Specprop_Q75", 
                       "Specprop_IQR", "Specprop_Cent", "Specprop_Skewness", "Specprop_Kurtosis", "Specprop_SFM", "Specprop_SH")
  bootstrap = bootstrap[, !names(bootstrap) %in% c("Detector_numeric")]  # Detector_numeric column is annoying duplication of Detector
  previous.level.rejected = T
  percentiles = sort(unique(bootstrap$Random.Percent))
  perf = list()
  fp.left = c()
  my.fpr = list()
  op = c()
  # for each bootstrap percent
  for (i in percentiles) {
    # assess how many calls are in that percent
    num.calls = nrow(bootstrap[(bootstrap$Random.Percent <= i & bootstrap$Call_vs_Noise == "Call"), ])
    num.noise = nrow(bootstrap[(bootstrap$Random.Percent <= i & bootstrap$Call_vs_Noise == "Noise"), ])
    if (num.calls + num.noise != nrow(bootstrap[bootstrap$Random.Percent <=i, ])) stop("Assumption Violated")
    cat("i is", i, " and num.calls is", num.calls, "\n")
    # if it's fewer than, say, 10, call it a bust and move on to the next percent (because a cross-validation run might not receive
    # any calls!)
    if (num.calls < 13) { # Lucky 13!
      # I DON'T THINK I NEED TO DO ANYTHING WITH PERF, OP, or ACTUAL.FP.LEFT
      fp.left = c(fp.left, num.noise)
      my.fpr = c(my.fpr, NA)
      next
    }
    # perform tenfold cross-validation on the portion with random.percent <= your percent. This is to get an idea of the expected FPRs and such.
    # Calculate the numbers expected to remain and make some decisions about whether it's a good idea to stop yet.
    my.perf = nfold.xval(bootstrap[bootstrap$Random.Percent <= i, ], fold=10, annotation = paste(i, "% Bootstrap Sample", sep=""))[[1]]
    ##seealso<< \code{\link{nfold.xval}}
    perf = rejigger.perf(my.perf)
    op.2 = calculate.operating.parameters(perf, method="confidence.range")$op
    op.3 = calculate.operating.parameters(perf, method="frequency")$op
    op = ifelse(op.2 < op.3, op.2, op.3)
    fpr = list(perf[perf$alpha.values == op, "x.values"])
    my.fpr = c(my.fpr, fpr)
    windows()
    plot(c(1, i), c(0, 1), type="n", xlab="Bootstrap Percent", ylab="FPR", main="Learning Curve")
    for (j in 1:i) {
      points(rep(j, length(my.fpr[[j]])), my.fpr[[j]])
    }
    names(my.fpr) = 1:i
    windows()
    # TRY A lattice BWPLOT
    # TRY A loess FIT!!
    boxplot(my.fpr, main="Learning Curve", xlab="Bootstrap Percent", ylab ="FPR")
  }
    
### Currently no return value; the doneness must be assessed by visual 
### evaluation of the plots generated by this function.
}