R/sigtests.R
In Dimodal: Spacing Tests for Multi-Modality

Documented in Diexcurht.test Diflat.critval Diflat.test Dinrun.test Dipeak.critval Dipeak.test Dipermht.test Dirunlen.test

# sigtests.R -
# Feature significance tests.
#
# c 2024-2026 Greg Kreider, Primordial Machine Vision Systems, Inc.

## To Do:
# - 
#

#### Public Interface

# Determine the quantile/significance level of the local maximum with height(s)
# ht in a sample with n data points and after low-pass filtering the spacing
# with the kernel type filter of size flp, which may be the total window size
# or a fraction of the data.  lower.tail determines whether the probability
# of the null distribution is less than/equal (TRUE) or greater than ht.
# Returns a list of class 'Ditest' with elements:
#    $method            descriptive text
#    $statfn            name (text) of function used to evaluate statistic
#    $statistic         what is tested, the adjusted standardized peak height
#    $statname          text name of the statistic
#    $parameter         inverse Gaussian/Wald parameters corrht, mu, lambda
#    $p.value           probability of height
#    $alternative       direction of test
#    $model             model parameters ht, n, flp, filter from arguments
# The statistic and p.value will have the length of the ht argument.
Dipeak.test <- function(ht, n, flp, filter, lower.tail=TRUE) {

  n <- n[1L]
  flp <- flp[1L]
  filter <- filter[1L]
  
  check.modelargs(n, flp, lower.tail)
  
  if (1 <= flp) {
    flp <- flp / n
  }

  # NULL ht returns empty vector.  Other invalid values to NA, coercing the
  # vector out of its original type in the process.
  if (0 < length(ht)) {
    ht[!is.numeric(ht)] <- NA_real_
    ht <- as.numeric(ht)
  }

  frat <- peak.filterratio(filter)

  corr <- peak.correction(flp)

  x <- 10^(((ht / frat) - corr["badj"]) / corr["madj"])
  names(x) <- NULL

  mu <- peak.qwald.param(1, n, flp)
  lambda <- peak.qwald.param(2, n, flp)
  p <- statmod::pinvgauss(x, mu, lambda, lower.tail=lower.tail)

  p[is.na(ht)] <- NA_real_
  p[is.nan(ht)] <- NaN

  res <- list(method="low-pass spacing height model",
              statfn="pinvgauss", statistic=ht, statname="ht",
              parameter=list(corrht=x, mu=mu, lambda=lambda), p.value=p,
              alternative=ifelse(lower.tail, "less", "greater"),
              model=c(n=n, flp=flp, filter=filter))
  class(res) <- "Ditest"
  res
}

# Return the local maxima height(s) at significance level(s) pval in a sample
# with n data points and after low-pass filtering with kernel filter of size
# flp, which may be the vector length or a fraction of the data.  Invalid pval
# evaluate to NaN.
# NOTE: when building and verifying models based on quantiles, use pval=1-q.
Dipeak.critval <- function(pval, n, flp, filter) {

  n <- n[1L]
  flp <- flp[1L]
  filter <- filter[1L]

  check.modelargs(n, flp, TRUE)
  
  if (1 <= flp) {
    flp <- flp / n
  }

  if (0 < length(pval)) {
    pval[!is.numeric(pval)] <- NA_real_
    pval <- as.numeric(pval)
  }
  # This is the behavior of the quantile functions.
  pgood <- (pval >= 0) & (pval <= 1)
  pval[!pgood] <- NaN

  frat <- peak.filterratio(filter)

  # Model of the Weibull scale=2, shape=4 null distribution.
  mu <- peak.qwald.param(1, n, flp)
  lambda <- peak.qwald.param(2, n, flp)
  htpred <- statmod::qinvgauss(1-pval, mu, lambda)

  # Could also modify prediction for different distributions.
  #   beta    = 5.76 - (4.62 * htpred)
  #   normal  = 0.93 * htpred
  #   gumbel, gamma, chisq, logistic  = -1.40 + (2.25 * htpred)
  corr <- peak.correction(flp)
  cv <- frat * (corr["badj"] + (corr["madj"] * log10(htpred)))
  names(cv) <- NULL
  cv
}

# Return the quantile/significance level of a flat of length(s) len in a
# sample of n data points drawn from distribution basedist, then low-pass
# filtered with kernel filter of size flp (may be fractional or the actual
# size).  lower.tail determines whether the probability of the null
# distribution is less than/equal (TRUE) or greater than (FALSE) the length.
# Returns a list of class 'Ditest' with elements
#    $method             descriptive text
#    $statfn             name (text) of function used to evaluate statistic
#    $statistic          what is tested, the flat length
#    $statname           text name of the statistic
#    $p.value            probability of length in model
#    $alternative        direction of test
#    $model              model parameters n, flp, filter, basedist from args
# The statistic and p.value will have the length of the len argument.  The
# p-value will be NaN if there's a problem setting up the test, otherwise
# a value between 0 and 1.
Diflat.test <- function(len, n, flp, filter, basedist, lower.tail=TRUE) {

  n <- n[1L]
  flp <- flp[1L]
  filter <- filter[1]
  basedist <- basedist[1L]

  check.modelargs(n, flp, lower.tail)

  if (1 <= flp) {
    flp <- flp / n
  }

  if (0 < length(len)) {
    len[!is.numeric(len)] <- NA_real_
    len <- as.numeric(len)
  }

  pcf <- flat.pcoef(n, flp, filter, basedist)

  optfn <- function(p, l, pcf) {
    sum( c(1, p, p^2, log(p/(1-p))) * pcf ) - l
  }
  
  # Needed for optimize, not uniroot.
  sefn <- function(p, l, pcf) {
    (sum( c(1, p, p^2, log(p/(1-p))) * pcf ) - l)^2
  }

  pmin <- flat.pmin(pcf)
  if (is.na(pmin)) {
    # logit blows up at 0 and 1, so offset a little for finite boundaries.
    prng <- c(.Machine$double.eps, 1-.Machine$double.eps)
  } else {
    # We've built the model only for q >= 0.9.  Assume a linear drop below
    # this, although it doesn't match the actual behavior.
    prng <- c(pmin, 1-.Machine$double.eps)
  }
  lmin <- optfn(prng[1L], 0, pcf)
  lmax <- optfn(prng[2L], 0, pcf)

  p <- sapply(len,
              function(l) {
                if (is.infinite(l)) {
                  ifelse(l < 0, 0, 1)
                } else if (!is.finite(l)) {
                  NaN
                } else if (l < lmin) {
                  pmin * (l / lmin)
                } else if (lmax < l) {
                  1.0
                } else {
                  pval <- safe.uniroot(optfn, prng, l, pcf, maxiter=2500,
                                       tol=.Machine$double.eps)$root
                  if (is.na(pval)) {
                    pval <- optimize(sefn, prng, l, pcf,
                                     lower=prng[1], upper=prng[2],
                                     tol=.Machine$double.eps)$minimum
                  }
                  pval
                }
              })

  if (!lower.tail && (0 < length(p))) {
    p <- 1 - p
  }
  
  res <- list(method="low-pass spacing height model",
              statfn="Diflat.test", statistic=len, statname="len",
              p.value=p,
              model=c(n=n, flp=flp, filter=filter, basedist=basedist),
              alternative=ifelse(lower.tail, "less", "greater"))
              
  class(res) <- "Ditest"
  res
}

# Return the flat length at significance level pval for a draw of size n from
# distribution basedist and smoothed by a low-pass kernel of type filter and
# size flp (may be fractional or the actual size).  Invalid p, or p values
# that are outside the valid region of our model, evaluate to NaN.
# NOTE: when building and verifying models based on quantiles, use pval=1-q.
Diflat.critval <- function(pval, n, flp, filter, basedist) {

  n <- n[1L]
  flp <- flp[1L]
  filter <- filter[1L]
  basedist <- basedist[1L]

  check.modelargs(n, flp, TRUE)

  if (1 <= flp) {
    flp <- flp / n
  }

  if (0 < length(pval)) {
    pval[!is.numeric(pval)] <- NA_real_
    pval <- as.numeric(pval)
  }
  pgood <- (pval >= 0) & (pval <= 1)
  pval[!pgood] <- NaN

  pcf <- flat.pcoef(n, flp, filter, basedist)
  pmin <- flat.pmin(pcf)
  if (is.na(pmin)) {
    pmin <- 0
  }

  # Internally we work with quantiles.
  p <- 1 - pval

  pmat <- matrix(c(rep(1, length(p)), p, p^2, log(p/(1-p))),
                 ncol=4, byrow=FALSE)

  # Replace the model with a linear drop from lmin to 0 over pmin.
  lmin <- (c(1, pmin,  pmin^2, log(pmin/(1-pmin))) %*% pcf)[1]
  # Remove NA values from the selection.
  ltmin <- p < pmin
  ltmin[is.na(ltmin)] <- FALSE
  pmat[ltmin,c(1,3,4)] <- 0
  pmat[ltmin,2] <- p[ltmin] * lmin / (pmin * pcf[2])

  cv <- as.vector( pmat %*% pcf)
  cv[(cv < 0) | !is.finite(cv)] <- NaN
  cv
}

# Perform the Kaplansky-Riordan test on the number of runs in the data vector
# x, using the find.runs detector to count runs whose values are within feps
# of their average (ignored for character, logical, or integer values).
# Runs are tested within each interval between start point(s) stID and
# endpoint(s) endID (incl.).  NA and NaN indices generate NA test values,
# real values are rounded to integers, and non-numeric arguments raise
# errors.  lower.tail if TRUE the probability is that the run count is not
# more than the observed.  Returns a list with class 'Ditest' with elements:
#    $method             descriptive text
#    $statfn             name (text) of function used to evaluate statistic
#    $statistic          what is tested, the number of runs
#    $statname           text name of statistic
#    $parameter          Gaussian distribution parameters Erun, Vrun; feps
#    $p.value            probability of number of runs is <= actual
#    $alternative        direction of test
# The parameters are used to standardize the statistic for the test, with
# Erun being the expected number of runs and Vrun the variance in the count.
# The statistic and p.value will have the same length, one per start/end
# segment, which may be 0 if stID and endID are empty.
Dinrun.test <- function(x, stID, endID, feps, lower.tail=TRUE) {

  check.endargs(stID, endID, length(x))

  if (0 == length(stID)) {
    stats <- matrix(NA_real_, nrow=4, ncol=0)
  } else {
    stID <- round(stID)
    endID <- round(endID)

    # A straightforward implementation of the equations in Example 1 of
    # Section 6 of Kaplansky and Riordan.

    stats <- sapply(seq_along(stID),
                    function(s) {
                      if (is.na(stID[s]) || is.nan(stID[s]) ||
                          is.na(endID[s]) || is.nan(endID[s])) {
                         return(c(NA_real_, NA_real_, NA_real_, NA_real_))
                      }

                      sel <- x[stID[s]:endID[s]]
                      runs <- find.runs(sel, feps)

                      n <- table(sel)
                      # Something changed between R 4.0.1 and 4.4.1 where
                      # n became integers that would overflow on large sets.
                      # Making real fixes, and also wipes the element names.
                      n <- as.double(n)
                      nuniq <- length(n)

                      a1 <- sum(n)
                      if (2 == nuniq) {
                        # Actually the Wald/Wolfowitz test.
                        a2 <- n[1] * n[2]
                        a3 <- 0
                      } else if (3 == nuniq) {
                        a2 <- (n[1]*n[2]) + (n[1]*n[3]) + (n[2]*n[3])
                        a3 <- n[1] * n[2] * n[3]
                      } else {
                        a2 <- 0
                        a3 <- 0
                        for (i in 1:(nuniq-1)) {
                          for (j in (i+1):nuniq) {
                            aij <- n[i] * n[j]
                            a2 <- a2 + aij
                            if ((i < (nuniq - 1)) && (j < nuniq)) {
                              for (k in (j+1):nuniq) {
                                a3 <- a3 + (aij * n[k])
                              }
                            }
                          }
                        }
                      }

                      if (nuniq < 2) {
                        erun <- 0
                        vrun <- 0
                      } else {
                        erun <- 1 + ((2 * a2) / a1)
                        vrun <- (((2*a2) * ((2*a2) - a1)) -
                                 (6*a1*a3)) / (a1 * a1 * (a1-1))
                      }

                      # Per Wald/Wolfowitz (last paragraph Sec. 5),
                      # p is probability of nrun or fewer runs.
                      p <- pnorm(runs$stats$nrun, erun, sqrt(vrun),
                                 lower.tail=lower.tail)
                      c(runs$stats$nrun, erun, vrun, p)
                    })
  }

  res <- list(method="Kaplanksy-Riordan test on number of runs",
              statfn="pnorm", statistic=stats[1,], statname="nrun",
              parameter=list(Erun=stats[2,], Vrun=stats[3,], feps=feps),
              p.value=stats[4,],
              alternative=ifelse(lower.tail, "less", "greater"))
  class(res) <- "Ditest"
  res
}

# Determine the probability of the longest run   in the data vector x, using
# the find.runs detector to count runs whose values are within feps of the
# average (ignored for character, logical, or integer values).  Runs are
# checked within each each interval between start point(s) stID and endpoint(s)
# endID (incl.).  NA and NaN indices generate NA test values, reals are
# rounded to integers, and non-numeric or OOB endpoints raise erros.  Returns
# a list of class 'Ditest' with elements
#    $method             descriptive text
#    $statfn             name of function used to evaluate statistic
#    $statistic          what is tested, the maximum run length
#    $statname           text name of statistic
#    $parameter          test function parameter featlen (endID-stID+1)
#    $p.value            probability of having a run of the given length
#    $tmat               Markov chain transition matrix
#    $smat               Markov chain stationary state
# The statistic, parameter, and p.value will have the same length, one per
# start/end segment.
Dirunlen.test <- function(x, stID, endID, feps) {

  check.endargs(stID, endID, length(x))

  if (0 == length(x)) {
    runlen <- numeric(0)
    featlen <- endID - stID + 1
    p <- numeric(0)
    tmat <- NULL
    smat <- NULL
  } else {
    tmat <- mkv.transition(x)
    smat <- mkv.stationary(tmat)

    if (0 == length(stID)) {
      runlen <- numeric(0)
      featlen <- endID - stID + 1
    } else {
      rl <- sapply(seq_along(stID),
                   function(s) {
                     if (is.na(stID[s]) || is.nan(stID[s]) ||
                         is.na(endID[s]) || is.nan(endID[s])) {
                        c(NA_real_, NA_real_)
                     } else {
                       sel <- x[stID[s]:endID[s]]
                       c(find.runs(sel, feps)$stats$maxrun,
                         sum(!is.na(sel) & !is.nan(sel)))
                     }
                   })
      runlen <- rl[1,]
      featlen <- rl[2,]
    }

    if (0 == length(stID)) {
      p <- numeric(0)
    } else  {
      p1 <- sapply(seq_along(stID),
                   function(s) {
                     test.markov.runlen(runlen[s], featlen[s], tmat, smat)
                   })

      p2 <- sapply(seq_along(stID),
                   function(s) {
                     test.markov.runlen(runlen[s]-1, featlen[s], tmat, smat)
                   })

      p <- p1 - p2
    }
  }
  
  res <- list(method="Markov chain model of longest run",
              statfn="test.markov.runlen", statistic=runlen, statname="runlen",
              parameter=list(featlen=featlen), p.value=p, tmat=tmat, wt=smat)
  class(res) <- "Ditest"
  res
}

# Build a distribution of heights by nexcur draws from data xbase, each draw
# of ndraw points, reconstruct the signal and height depending on whether
# is.peak is TRUE or FALSE for flats, and determine the probability of each
# height ht.  ht and ndraw may be vectors and must have the same length.  If
# lower.tail is TRUE count the number of simulations whose reconstructed
# height is smaller than ht, if FALSE larger; in other words use TRUE for
# flats and FALSE for peaks.  seed if a positive integer is used to set up
# the RNG before the draws.  Returns a list of class 'Ditest' with elements
#    $method             descriptive text
#    $statfn             name (text) of function used to evaluate statistic
#    $statistic          what is tested, the number of runs
#    $statname           text name of statistic
#    $parameter          function arguments stID, endID, permute, nexcur, seed
#    $p.value            probability of number of runs
#    $alternative        direction of test
#    $critval            critical values at quantiles 0.10, 0.05, 0.01, 0.005
#                          (or 1-q if lower.tail TRUE)
#    $xbase              base of samples
Diexcurht.test <- function(ht, ndraw, xbase, nexcur, is.peak, lower.tail=TRUE,
                           seed=0) {

  nexcur <- round(nexcur[1])
  check.htargs(ht, ndraw, xbase, nexcur, lower.tail)

  if (0 < seed) {
    set.seed(seed)
  }

  if (lower.tail) {
    cvID <- round(c(0.005, 0.01, 0.05, 0.10) * nexcur)
    cvnames <- c("cv005", "cv01", "cv05", "cv10")
  } else {
    cvID <- round(c(0.90, 0.95, 0.99, 0.995) * nexcur)
    cvnames <- c("cv90", "cv95", "cv99", "cv995")
  }
  cvID <- pmax(cvID, 1)
  resnames <- c("ht", "pval", cvnames)

  if ((0 == length(xbase)) || (0 == length(ht))) {
    pe <- matrix(NA_real_, nrow=length(ht), ncol=length(resnames),
                 dimnames=list(NULL, resnames))
  } else {
    pe <- sapply(seq_along(ndraw),
                 function(i) {
                   # Minimum draw size to ensure a peak goes to either side.
                   if (!is.finite(ht[i]) || 
                       !is.finite(ndraw[i]) || (ndraw[i] < 3)) {
                     p <- c(ht[i], NA_real_, rep(NA_real_, length(cvID)))
                   } else {
                     excht <- .Call("C_excursion", xbase, nexcur,ndraw[i],
                                    is.peak, TRUE, PACKAGE="Dimodal")
                     pval <- .Call("C_htprob", excht, ht[i], lower.tail,
                                   PACKAGE="Dimodal")
                     p <- c(ht[i], pval, excht[cvID])
                   }
                   names(p) <- resnames
                   p
                 })
    pe <- t(pe)
  }

  res <- list(method="excursion height test",
              statfn="Diexcurht.test", statistic=pe[,"ht"],
              statname="ht",
              parameter=list(ndraw=ndraw, nexcur=nexcur, seed=seed),
              p.value=pe[,"pval"],
              alternatives=ifelse(lower.tail, "less", "greater"),
              critval=pe[,-c(1,2), drop=FALSE], xbase=xbase)
  names(res$statistic) <- NULL
  names(res$p.value) <- NULL
  class(res) <- "Ditest"
  res
}

# Carry out nperm permutations of the values in signed difference vector
# xbase and collect the height (difference of the resulting range) for each.
# We forbid the permutation to have adjacent same-sign symbols.  The
# probability of height(s) ht against this distribution are the number of
# permutations smaller than ht if lower.tail is TRUE, or larger if FALSE.
# seed if a positive integer is used to set the RNG before the draws.
# Returns a list of class 'Ditest' with elements
#    $method             descriptive text
#    $statfn             name (text) of function used to evaluate statistic
#    $statistic          what is tested, the number of runs
#    $statname           text name of statistic
#    $parameter          function arguments stID, endID, permute, nperm, seed
#    $p.value            probability of number of runs
#    $alternative        direction of test
#    $critval            critical values at quantiles 0.10, 0.05, 0.01, 0.005
#                          (or 1-q if lower.tail TRUE)
#    $xbase              base of samples
# Statistic, p.value, critval will be NA if there are two or fewer xbase points
# or no lengths.
Dipermht.test <- function(ht, xbase, nperm, lower.tail=TRUE, seed=0) {

  nperm <- round(nperm[1])
  check.htargs(ht, FALSE, xbase, nperm, lower.tail)

  if (lower.tail) {
    qcv <- c(0.005, 0.01, 0.05, 0.10)
    cvnames <- c("cv005", "cv01", "cv05", "cv10")
  } else {
    qcv <- c(0.90, 0.95, 0.99, 0.995)
    cvnames <- c("cv90", "cv95", "cv99", "cv995")
  }

  if (0 < seed) {
    set.seed(seed)
  }

  # Minimum base size to ensure a peak goes up and down.
  # Does it make sense to impose a stronger minimum, because the resolution
  # will be too low for only a few runs, ie. 5! = 120 ~ 1%?
  if ((length(xbase) < 2) || (0 == length(ht))) {
    cval <- numeric(0)
    pe <- matrix(NA_real_, nrow=length(ht), ncol=2,
                 dimnames=list(NULL, c("ht", "pval")))
  } else {
    xgrp <- sign(xbase)
    # The alternating requirement knocks out a large fraction of the possible
    # permutations.
    if (lfactorial(length(xbase)) < (log(nperm) - log(0.05))) {
      permht <- apply(permute.all.alt(xbase), 1,
                      function(r) {
                        cp <- cumsum(r)
                        max(cp) - min(0, cp[length(cp)]) })
    } else {
      gspec <- setup.permute(xgrp)
      permht <- replicate(nperm, alt.permute(xbase, gspec))
    }
    permht <- sort(permht)

    cval <- permht[pmax(round(qcv * length(permht)), 1)]
    names(cval) <- cvnames

    pval <- sapply(ht,
                   function(h) {
                     if (is.na(h) || is.nan(h)) {
                       NA_real_
                     } else {
                       .Call("C_htprob", permht, h, lower.tail,
                             PACKAGE="Dimodal")
                     }
                   })
    pe <- cbind(ht=ht, pval=pval)
  }

  res <- list(method="run height permutation test",
              statfn="Dipermht.test", statistic=pe[,"ht"],
              statname="ht",
              parameter=list(nperm=nperm, seed=seed),
              p.value=pe[,"pval"],
              alternatives=ifelse(lower.tail, "less", "greater"),
              critval=cval, xbase=xbase)
  names(res$statistic) <- NULL
  names(res$p.value) <- NULL
  class(res) <- "Ditest"
  res
}


#### Implementation Details

# Validity check on the draw and window size arguments or lower.tail flag to
# any of the model test/critical value functions.  Bad values raise an error.
# filter and basedist have defaults.
check.modelargs <- function(n, flp, lower.tail) {

  # This is consistent with R's behavior for pnorm - non-numeric arguments
  # raise errors, logical are coerced to integers.
  if ((0 == length(n)) || !is.finite(n) ||
      (!is.numeric(n) && !is.logical(n))) {
    stop("non-numeric argument n", call.=FALSE)
  }
  if ((0 == length(flp)) || !is.finite(flp) ||
      (!is.numeric(flp) && !is.logical(flp))) {
    stop("non-numeric argument flp", call.=FALSE)
  }
  if (n < 1) {
    stop("argument n must be at least 1", call.=FALSE)
  }
  # This fails if the filter size was given absolutely and is more than
  # half the data.  You probably want to pass the fractional size.
  if ((flp <= 0) || (n < flp)) {
    stop("argument flp must be positive and less than n", call.=FALSE)
  }

  if ((0 == length(lower.tail)) || !is.logical(lower.tail)) {
    stop("non-logical argument lower.tail", call.=FALSE)
  }

  if (((flp < 1.0) && ((flp < 0.05) || (0.30 < flp))) ||
      ((n < 60) || (500 < n))) {
    warning("models used outside recommended range (flp: 0.05-0.30, n 60-500)")
  }
}

# Validity checks on the start indices stID and endpoints endID of intervals
# in a vector of length nx.  Bad values (not numeric, not within vector,
# endID < stID) raise an error.  NA and NaN values are allowed.
check.endargs <- function(stID, endID, nx) {

  if (length(stID) != length(endID)) {
    stop("start and endpoint vectors do not have same length")
  }

  if (any(is.na(stID) | is.nan(stID) | is.na(endID) | is.nan(endID))) {
    return()
  }

  if (0 < length(stID)) {
    if (any(!is.numeric(stID))) {
      stop("not all start points numeric")
    }   
    if (any((stID < 1) | (nx < stID))) {
      stop("some start points lie outside data")
    }
  }

  if (0 < length(endID)) {
    if (any(!is.numeric(endID))) {
      stop("not all endpoints numeric")
    }
    if (any((endID < 1) | (nx < endID))) {
      stop("some endpoints lie outside data")
    }
  }

  if (any(endID < stID)) {
    stop("have endpoints before start points")
  }
}

# Validity checks on the Diexcurht.test and Dipermht.test arguments.  Pass
# FALSE for ndraw for the permutation tests where it is not used.  Bad values
# (empty/non-finite/non-numeric xdraw, negative nperm, ) raise an error.
# ht and ndraw may have NA, NaN values or be an empty vector.
check.htargs <- function(ht, ndraw, xbase, nperm, lower.tail) {

  if ((0 < length(xbase)) &&
      (any(!is.integer(xbase) & !is.numeric(xbase)) ||
       any(!is.finite(xbase)))) {
    stop("xbase set is empty or has non-numeric or non-finite values")
  }

  if ((!is.numeric(nperm) && !is.integer(nperm)) || !is.finite(nperm) ||
      (nperm <= 0)) {
    stop("nperm must be positive")
  }

  if ((1 != length(ndraw)) || !is.logical(ndraw) || (FALSE != ndraw)) {
    if (length(ndraw) != length(ht)) {
      stop("draw sizes do not match heights")
    }

    if ((0 < length(ndraw)) && any(!is.integer(ndraw) & !is.numeric(ndraw))) {
      stop("non-numeric draw sizes")
    }
    if (0 < length(ndraw)) {
      valid <- is.finite(ndraw)
      if (0 < sum(valid)) {
        if (any(ndraw[valid] > length(xbase), na.rm=TRUE)) {
          stop("excursion test samples more points than available")
        }
        if (any(ndraw[valid] < 0)) {
          stop("draw sizes cannot be negative")
        }
      }
    }
  }

  if ((0 < length(ht)) && any(!is.integer(ht) & !is.numeric(ht))) {
    stop("non-numeric heights")
  }
  if (0 < length(ht)) {
    valid <- is.finite(ht)
    if (0 < sum(valid)) {
      if (any(ht[valid] < 0)) {
        stop("feature heights cannot be negative")
      }
    }
  }

  if ((0 == length(lower.tail)) || !is.logical(lower.tail)) {
    stop("non-logical argument lower.tail", call.=FALSE)
  }
}

## Height Models

# Model the correction of the local maxima height relative the FIR standard
# deviation, htsd, for the named filter.
peak.filterratio <- function(filter) {

  # There's a small variation in the correction for the window size and
  # sample draw - not modeling, 10% for n=100/500, win=0.10/0.20 combinations
  # for the first three, 25% for gauss and blackman, 40% for the last two.
  # Also a small variation in quantile.  We're picking the ratio to the Kaiser
  # filter at q=0.999; other quantiles will give larger ratios which
  # under-predicts the ht/sd.
  if ((0 == length(filter)) || is.na(filter) || !is.character(filter)) {
    warning("filter not a character string, using Kaiser")
    frat <- 1.0
  } else if (("triangular" == filter) || ("bartlett" == filter)) {
    frat <- 1.086
  } else if ("hamming" == filter) {
    frat <- 1.122
  } else if ("hanning" == filter) {
    frat <- 1.155
  } else if (("gaussian" == filter) || ("normal" == filter)) {
    frat <- 1.239
  } else if ("blackman" == filter) {
    frat <- 1.248
  } else {
    if ("kaiser" != filter) {
      warning("unsupported filter for peak test, using Kaiser")
    }
    frat <- 1.0
  }

  frat
}

# Calculate the Wald/inverse gaussian parameters param (1 for the mu/mean,
# 2 for the lambda/shape; or by name) for the sample draw size n and
# FIR window width flp (as a fraction of n).
peak.qwald.param <- function(param, n, flp) {

  # Modeled at flp=0.15 only.
  #  p1 <- 3.8356 - (1.1459 * log10(n))
  #  p2 <- 4.762

  # Our model for either parameter is the same: linear against the log of
  # the draw size, where the slope and intercept are linear with the width.
  if ((1 == param) || ("mu" == param) || ("mean" == param)) {
    b <-  5.8158 + (2.4152 * log10(flp))
    m <- -1.9704 - (1.0131 * log10(flp))
  } else if ((2 == param) || ("lambda" == param) || ("shape" == param)) {
    b <- -2.0204 + (49.7357 * flp)
    m <-  2.6034 - (19.5195 * flp)
  }

  # Clip invalid (negative) values to 0.  Returning NA will not work
  # with pinvgauss, if the values also contain NAs.
  max(0, b + (m * log10(n)))
}

# Correction of height quantile in peak model, which depends on the
# fraction filter kernel size flp.  Returns a pair with elements $badj, $madj,
# the intercept and slope of the (linear) correction.
peak.correction <- function(flp) {

  c(badj=-0.2305 + (11.8716 * flp) - (46.9360 * (flp^2)) + (85.0096 * (flp^3)),
    madj=4.2412 - (7.2054 * flp) + (0.1547 / flp))
}

## Length Models

# Get the flat length model coefficients for a parametric draw of size n from
# distribution basedist and smoothed by an FIR kernel filter of fractional
# width flp (must be < 1).  Returns a 4 element vector that must be multiplied
# by the p terms (1, p, p^2, log(p/(1-p))) and summed to get the final
# quantile.
flat.pcoef <- function(n, flp, filter, basedist) {

  if ((0 == length(basedist)) || is.na(basedist) || !is.character(basedist)) {
    warning("basedist not a character string, using logistic")
    pm <- pcoef.length.logistic(filter)
  } else if ("weibull" == basedist) {
    pm <- pcoef.length.weibull(filter)
  } else if (("normal" == basedist) || ("gaussian" == basedist)) {
    pm <- pcoef.length.normal(filter)
  } else if ("gumbel" == basedist) {
    pm <- pcoef.length.gumbel(filter)
  } else {
    if ("logistic" != basedist) {
      warning(paste0("unsupported base distribution ", basedist,
                     ", using logistic"))
    }
    pm <- pcoef.length.logistic(filter)
  }

  nw <- c(1, n, n^2, flp, flp*n, flp*(n^2))

  pcf <- matrix(pm, nrow=4, byrow=TRUE)
  # Sanity check.
  if (ncol(pcf) != length(nw)) {
    stop("internal error - pcoef matrix does not conform to n/flp combinations")
  }

  # Collapse the model by column, leaving a 4 element vector to be multiplied
  # by the p terms.
  pcf %*% nw
}

# Return a vector with the flat length model coefficients based on a logistic
# draw smoothed by FIR kernel filter.  Columns are combinations of draw size
# and fractional kernel window size terms.  Can be cast into a 4x6 matrix by
# row.
pcoef.length.logistic <- function(filter) {

  if ((0 == length(filter)) || is.na(filter) || !is.character(filter)) {
    warning("filter not a character string, using Kaiser")
    filter <- "kaiser"
  }
  if ("hanning" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  895.07,  -6.4915,  0.050202,   -2914.0,   39.755,  -0.12859,
      -1945.1,   14.621,  -0.10957,     6365.8,  -87.586,   0.28017,
       1068.6,   -8.4033,  0.060307,   -3524.0,   49.338,  -0.15443,
      -0.80887, 0.078324, -6.8829e-5,   3.8649, -0.16948,   1.9127e-4)
  } else if ("hamming" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c( -1.3561,  -2.6245,   0.055274,   -1175.4,   41.627,  -0.15507,
      -45.672,    6.7682,  -0.12187,     2713.6,  -93.135,   0.34047,
       61.9459,  -4.4633,   0.067877,   -1604.3,   53.304,  -0.18934,
       -1.0098, 0.086396,  -9.7872e-5,  4.5358,  -0.19984,   2.8048e-4)
  } else if (("triangular" == filter) || ("bartlett" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c( -731.78,   20.902,   9.4621e-3,  1129.6,  -29.898, -0.025898,
       1552.6,   -44.270, -0.023055,   -2347.6,   62.174,  0.062428,
       -814.69,   23.292,  0.014510,    1185.1,  -31.308, -0.039256,
      -0.11633, 0.071427, -7.8624e-5,   1.9172, -0.15436,  2.168e-4)
  } else if (("gaussian" == filter) || ("normal" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  2248.3,  -20.929,  0.058887,   -7656.4,   82.141,  -0.15955,
       -4859.7,   45.630, -0.12765,    16555,   -178.30,    0.34485,
        2639.1,  -25.057,  0.069560,   -8993.5,   97.708,  -0.18757,
      -0.83352, 0.074709, -4.7317e-5,   3.5018, -0.15093,   1.2987e-4)
  } else if ("blackman" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  1616.7,  -18.638,  0.051899,   -5458.4,   73.494,   -0.13673,
       -3490.0,   40.584, -0.11238,    11782.1, -159.31,     0.29516,
        1894.7,  -22.256,  0.061167,   -6397.7,   87.202,   -0.16035,
      -0.41984, 0.067871, -3.3585e-05,  2.0423,   -0.12768,  8.6115e-5)
  } else {
    if ("kaiser" != filter) {
      warning("unsupported filter ", filter, ", using kaiser")
    }
    #   const         n        n^2        flp     flp*n     flp*n^2
    c(-2389.1,   39.048, -0.025085,   5.9153e+3,  -72.984,  0.047613,
       5122.2,  -83.133,  0.050001,  -1.2677e+4,  154.64,  -0.094133,
      -2739.2,   44.077, -0.023815,   6.7609e+3,  -80.650,  0.042964,
      0.43973, 0.069628, -9.5534e-5,    0.41845, -0.15918,  2.807e-4)
  }
}

# Return a vector with the flat length model coefficients based on a Weibull
# draw smoothed by FIR kernel filter.  Can be cast into a 4x6 matrix by row.
pcoef.length.weibull <- function(filter) {

  if ((0 == length(filter)) || is.na(filter) || !is.character(filter)) {
    warning("filter not a character string, using Kaiser")
    filter <- "kaiser"
  }
  if ("hanning" == filter) {
    #   const         n        n^2        flp     flp*n      flp*n^2
    c( -4.9840,  -3.2583,   1.8028e-3,  330.12,   19.067,    0.033660,
       17.469,    7.1345,  -3.8490e-3, -711.49,  -41.233,   -0.073340,
       -6.5314,  -3.9661,   2.1845e-3,  360.79,   22.817,    0.039462,
       0.72389, 0.026799,   2.1729e-5, -2.1478,  8.0694e-3, -8.6371e-5)
  } else if ("hamming" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  86.658,  -6.2163,   9.3797e-3,  -700.22,    37.300,   9.2935e-3,
      -175.64,   13.469,   -0.020101,    1489.3,   -80.287,  -0.021620,
        93.357,  -7.3396,   0.010898,    -804.34,   43.671,   0.011922,
       0.58325, 0.027359,   2.5845e-5,  -2.0106,   0.00681,  -9.7996e-5)
  } else if (("triangular" == filter) || ("bartlett" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  133.22,  -6.5424,  0.012183,   -570.49,    36.029,  -7.5517e-4,
       -281.46,  14.191,  -0.026233,   1223.8,    -77.590,   5.4783e-4,
        154.47,  -7.7411,  0.014236,   -669.89,    42.186,   2.2254e-5,
       0.36176, 0.034808,  1.585e-5,   -1.3107, -0.015858,  -8.0323e-5)
  } else if (("gaussian" == filter) || ("normal" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  81.895,  -5.3860,  4.3516e-3,  -16.150,    21.258,   0.014945,
      -160.95,   11.623,  -9.2230e-3,   24.492,   -45.817,  -0.032756,
        83.962,  -6.3206,  4.9779e-3,  -22.006,    25.059,   0.017657,
        1.3213, 0.019862,  2.0665e-5,   -3.9419, 0.026294,  -6.4009e-5)
  } else if ("blackman" == filter) {
    #    const         n        n^2        flp     flp*n    flp*n^2
    c(  -254.86,  -2.3780,  5.5634e-5,  1055.6,    12.453,   0.026062,
         551.58,   5.2308, -1.2819e-4, -2256.2,   -26.961,  -0.056475,
        -292.19,  -2.9261,  1.6688e-4,  1189.1,    14.961,   0.030318,
         1.1371, 0.019393,  1.9808e-5, -3.5074,  0.030359,  -6.5452e-5)
  } else {
    if ("kaiser" != filter) {
      warning("unsupported filter ", filter, ", using kaiser")
    }
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  185.83,  -8.9368,  0.024456,   -1864.9,    61.922,  -0.063500,
       -392.38,  19.341,  -0.052597,    3978.0,  -132.97,    0.13362,
        211.76, -10.507,   0.028442,   -2133.5,    71.896,  -0.070919,
      0.010027, 0.038499,  1.4777e-5,  0.26152, -0.035014,  -6.5996e-5)
  }
}

# Return a vector with the flat length model coefficients based on normal
# draws smoothed by FIR kernel filter.  Can be cast into a 4x6 matrix by row.
pcoef.length.normal <- function(filter) {

  if ((0 == length(filter)) || is.na(filter) || !is.character(filter)) {
    warning("filter not a character string, using Kaiser")
    filter <- "kaiser"
  }
  if ("hanning" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  209.32,  -7.3078,  0.011596,   -711.87,    36.242,  -2.1480e-3,
       -448.08,  15.925,  -0.025068,   1529.6,    -78.318,   3.8716e-3,
        246.65,  -8.7548,  0.013716,   -846.01,    42.923,  -2.2864e-3,
       0.59752, 0.036907,  1.4542e-5,   -1.604, -0.026236,  -6.7247e-5)
  } else if ("hamming" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  307.04,  -10.703,  0.022635,   -1405.5,    54.554,  -0.032498,
       -658.02,   23.252, -0.048878,    3028.0,  -117.83,    0.068855,
        356.80,  -12.685,  0.026553,   -1644.8,    64.186,  -0.037172,
       0.54728, 0.036049,  2.1316e-5,  -1.7339, -0.024355,  -8.694e-5)
  } else if (("triangular" == filter) || ("bartlett" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  443.26,  -10.033,  0.025407,    -1897.6,    55.841, -0.056668,
       -946.30,   21.779, -0.054899,     4064.4,  -120.38,   0.121160,
        510.96,  -11.885,  0.029831,    -2192.6,    65.403, -0.065230,
       0.17459, 0.046524,  4.2899e-6,  -0.34232, -0.059278, -4.144e-5)
  } else if (("gaussian" == filter) || ("normal" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  262.78,  -7.8587,  8.9202e-3,  -986.59,     33.510,  -4.2118e-3,
       -550.32,  16.984,  -0.019065,   2093.2,     -72.079,   7.9891e-3,
        292.97,  -9.2384,  0.010298,  -1123.7,      39.192,  -4.0579e-3,
        1.6729, 0.025024,  2.2439e-5,  -4.9313,   8.3653e-3, -7.3441e-5)
  } else if ("blackman" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  74.431,  -6.1651,  6.0550e-3,   7.4568,     24.212,  0.011533,
      -145.40,   13.308,  -0.012853,  -34.403,    -52.049,  -0.025881,
        76.036,  -7.2398,  6.9213e-3,  12.338,     28.388,   0.014190,
        1.2774, 0.024892,  2.1922e-5,  -3.7843,  0.011591,  -7.6688e-5)
  } else {
    if ("kaiser" != filter) {
      warning("unsupported filter ", filter, ", using kaiser")
    }
    #   const         n        n^2        flp     flp*n    flp*n^2
    c ( 741.06,  -17.064,  0.053711,   -3807.4,    93.135,  -0.16180,
      -1593.3,    36.962, -0.11593,     8194.4,   -200.65,   0.34606,
        861.61,  -20.087,  0.062820,   -4425.2,    108.73,  -0.18599,
      -0.56429, 0.055043, -1.137e-5,    2.2923, -0.089751,   8.0502e-6)
  }
}

# Return a vector with the flat length model coefficients based on a Gumbel
# draw smoothed by FIR knerel filter.  Can be cast into a 4x6 matrix by row.
pcoef.length.gumbel <- function(filter) {

  if ((0 == length(filter)) || is.na(filter) || !is.character(filter)) {
    warning("filter not a character string, using Kaiser")
    filter <- "kaiser"
  }
  if ("hanning" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c( -1598.0,   47.873, -0.032367,    4161.1,  -106.21,  0.073324,
        3410.9, -102.175,  0.065916,   -8822.6,   225.65, -0.147592,
       -1804.3,   54.258, -0.032486,    4613.0,  -118.40,  0.071224,
      -0.23631, 0.075773, -9.6954e-5,   2.6738, -0.16307,  2.5834e-4 )
  } else if ("hamming" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c( -4048.0,   77.083,  -0.10121,      11980,  -197.31,   0.27878,
        8685.7, -164.802,   0.21272,     -25613,   420.27,  -0.58524,
       -4646.0,   87.778,  -0.11039,      13626,  -221.98,   0.30297,
        1.0524,  0.06882,  -1.0146e-4, -0.56980, -0.16052,   2.9109e-4 )
  } else if (("triangular" == filter) || ("bartlett" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c( -2278.6,   74.222,  -0.11808,    6609.5,   -188.44,   0.32789,
        4964.3, -160.127,   0.25141,  -14335.1,    405.65,  -0.69687,
       -2698.3,   86.257,  -0.13276,    7743.4,   -217.24,   0.36735,
       1.6654, 0.044069,  -5.5019e-5, -2.9821, -0.079026,   1.3708e-4 )
  } else if (("gaussian" == filter) || ("normal" == filter)) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  18.961, -0.26832,   0.066611,   -1454.5,   39.102,  -0.21431,
       -79.873,  1.68360,  -0.146120,    3278.1,  -87.435,   0.46803,
        80.357, -1.79288,   0.080824,   -1900.2,   50.104,  -0.25722,
       -1.2499, 0.095639,  -1.0463e-4,   4.8565, -0.20499,   2.6783e-4 )
  } else if ("blackman" == filter) {
    #   const         n        n^2        flp     flp*n    flp*n^2
    c(  148.96,   13.913,   0.038246,   -1735.7,  -6.8326,  -0.12234,
       -381.39,  -28.733,  -0.085554,    3944.8,  11.1196,   0.27155,
        256.95,   14.512,   0.048553,   -2302.8,  -2.6911,  -0.15256,
       -1.4974, 0.093077,  -1.0534e-4,   6.0483,  -0.20031,  2.74553e-4 )
  } else {
    if ("kaiser" != filter) {
      warning("unsupported filter ", filter, ", using kaiser")
    }
    #   const         n        n^2        flp     flp*n    flp*n^2
    c( -2286.5,   77.418,  -0.14717,    7699.7,   -209.25,   0.42424,
        5091.1, -168.237,   0.31626,  -17056.9,    454.52,  -0.91247,
       -2834.9,   91.323,  -0.16862,    9425.3,   -245.53,   0.48656,
       3.0400, 0.031533,  -4.2934e-5, -6.9384, -0.056694,   1.24383e-4 )
  }

}

# Find the location of the minimum of our flat model.  pcf is the matrix
# returned from flat.pcoef.  Return NA if there is no minimum, or the largest
# value below 1 if there are multiple zeroes.
flat.pmin <- function(pcf) {

  # Our model is
  #  pcf[1] * (p * pcf[2]) + (p^2 * pcf[3]) * (log(p/(1-p)) * pcf[4])
  # and the gradient is
  #  pcf[2] + (2 * p * pcf[3]) + ((1/p) * (1/(1-p)) * pcf[4])
  # Extrema are located where gradient == 0, which is a cubic equation and
  # can be solved directly rather than by a root finder.  The solution is
  # based on the Wikipedia cubic equation page.

  if ((0 == pcf[2]) && (0 == pcf[3])) {
    exp(-pcf[1]/pcf[4]) / (1.0 + exp(-pcf[1]/pcf[4]))
  } else if (0 == pcf[3]) {
    (1.0 + sqrt(1.0 + (4.0 * pcf[4] / pcf[2]))) / 2.0
  } else {
    B <- pcf[2] / (2.0 * pcf[3])
    D <- pcf[4] / (2.0 * pcf[3])
    E <- -((B^2) + B + 1.0) / 3.0
    F <- ((2.0*(B^3)) + (3.0*(B^2)) - (3.0*B) - (27.0*D) - 2.0) / 27.0
    delta <- ((E^3) / 27.0) + ((F^2) / 4.0)
    # Treat values close to zero as matching
    eps <- 1.0e-10
    if (eps < delta) {
      # Other two roots are imaginary.
      t0 <- cbrt((-F/2.0) * sqrt(delta)) + cbrt((-F/2.0) - sqrt(delta))
    } else if (delta < -eps) {
      phis <- (acos(((3 * F) / (2 * E)) * sqrt(-3/E)) - (2 * (0:2) * pi)) / 3
      t0 <- 2.0 * sqrt(-E/3.0) * cos(phis)
    } else {
      t0 <- c((3.0 * F) / E, (-3.0 * F) / (2.0 * E))
    }
  }

  # Shift back to p.
  t0 <- t0 - ((B - 1.0) / 3.0)
  t0 <- t0[is.finite(t0)]
  if ((0 == length(t0)) || (0 == sum((0 <= t0) & (t0 < 1)))) {
    NA_real_
  } else {
    max(t0[t0<1])
  }
}

# Wrapper around uniroot that returns error.value for $root instead of stopping
# if there's a problem finding a root.  Arguments passed through to uniroot.
safe.uniroot <- function(..., error.value=NA_real_) {
  dummy <- list(root=error.value, f.root=NULL, iter=NA, init.it=NA,
                estim.prec=NA)
  
  tryCatch(uniroot(...),
           error=function(e) { dummy }, warning=function(w) { dummy })
}

# Return the cube root of each x, which may be negative, unlike x^(1/3).
cbrt <- function(x) {

  sign(x) * (abs(x)^(1/3))
}

## Markov Chains

# Build the transition matrix for a first order Markov chain along data x
# for the unique values lvls, which can be a superset of what is present in
# x (in case some values are missing in a subset).  If rel is TRUE normalize
# each row to sum to 1, except if there are no transitions to the final
# state along it, where the sum will be 0, otherwise return the raw
# transition counts.  The transition matrix is #lvls^r x #lvls.
mkv.transition <- function(x, lvls=sort(unique(x), na.last=NA), rel=TRUE) {

  x <- x[is.finite(x)]

  nlvl <- length(lvls)
  
  # Guarantee that each possible combination is counted at least once.
  # The vectors are length nlvl^2.
  pairst <- sapply(lvls, rep, nlvl, simplify="vector")
  pairend <- sapply(1:nlvl, function(i) { lvls }, simplify="vector")
  tmat <- table(start=c(x[-length(x)], NA, pairst),
                  end=c(x[-1L], NA, pairend))
  # The pairs mean tmat is nlvl x nlvl.  Values in x not in lvls increase
  # the size.
  if ((nlvl != nrow(tmat)) || (nlvl != ncol(tmat))) {
    stop("data contains unrecognized levels")
  }
  tmat <- tmat - 1

  if (rel) {
    tmat <- tmat / rowSums(tmat)
    tmat[!is.finite(tmat)] <- 0
  }

  tmat
}

# Return the steady-state state vector for Markov transition matrix tmat.
# If none is found, return a vector of NAs.  
mkv.stationary <- function(tmat) {

  ttmat <- t(tmat)
  ev <- eigen(ttmat)

  # Stationary state is not valid if largest left eigenvalue is not 1.
  if (1e-4 < abs(Mod(ev$values[1]) - 1)) {
    dummy <- rep(NA_real_, ncol(tmat))
    names(dummy) <- colnames(tmat)
    dummy
  } else {
    smat <- ttmat %*% ev$vectors
    scl <- Mod(colSums(smat))

    # Or if we cannot normalize.  In case of tied eigenvalues we need to
    # check all corresponding eigenvectors to see if any are good =
    # eigenvalue at 1 and sum of stationary state not 0.  Note that the
    # scale is unsigned but the final normalization is signed.
    valid <- (Mod(ev$values) - 1) < 1e-4
    i <- which.max(scl)
    if (!valid[i] || (scl[i] < 1e-4)) {
      dummy <- rep(NA_real_, ncol(tmat))
      names(dummy) <- colnames(tmat)
      dummy
    } else {
      Re(smat[,i] / sum(smat[,i]))
    }
  }
}

# Return the probability that all runs are <= runlen in length for
# trajectories of featlen steps through a Markov chain with transition
# matrix tmat.  Consider all paths starting from state(s) i; if more than
# one, scale by weight wt (stationary matrix or 1/nrow(tmat) for uniform
# starts).  If transition matrix is bad (b.v. only one state) return NA.
test.markov.runlen <- function(runlen, featlen, tmat, wt, i=1:nrow(tmat)) {

  runlen <- runlen[1L]
  featlen <- featlen[1L]

  if (!is.finite(runlen) || !is.finite(featlen)) {
    return(NA_real_)
  }

  if ((ncol(tmat) != nrow(tmat)) ||
      !all(abs(rowSums(tmat) - 1.0) < 2*.Machine$double.eps)) {
    warning("bad transition matrix in longest runs test")
    return(NA_real_)
  }
  if (nrow(tmat) != length(wt)) {
    warning("weight/stationary vector incompatible with transition matrix")
    return(NA_real_)
  }

  # Comment out this if you want the original, slower R implementation.
  return( .Call("C_test_runlen", runlen, featlen, tmat, wt, PACKAGE="Dimodal") )

  if ((runlen < 1) || (featlen < 1)) {
    return(0)
  } else if (featlen <= runlen) {
    return(1)
  }

  # A advances to the next run length.  R resets back to len=1.
  A <- diag(diag(tmat))
  R <- tmat - A
  
  recur <- array(0, c(runlen, nrow(tmat), ncol(tmat)))
  recur[runlen,,] <- A

  if (2 <= featlen) {
    for (t in 2:featlen) {
      top <- R %*% recur[1,,]
      prev <- recur[runlen,,]
      recur[runlen,,] <- top + A
      if (1 < runlen) {
        for (j in (runlen-1):1) {
          curr <- top + (A %*% prev)
          prev <- recur[j,,]
          recur[j,,] <- curr
        }
      }
    }
  }

  plong <- rowSums(recur[1,,])[i]

  if (1 < length(i)) {
    1 - sum(plong * wt)
  } else {
    1 - plong
  }
}

## Runs Permutations

# Separate the group identifiers grp (any class, we take unique values) in
# preparation for a permutation that separates them.  Returns a list with
#   $grpval      unique identifiers
#   $grpID       position in grp of each identifier, in grpval order
#   $ngrp        population of each symbol
setup.permute <- function(grp) {

  # Doing this once before the permutations, rather than at the top of
  # alt.permute, cuts the run time roughly in half, from 4.2 s to 2.2 s
  # for the test bench.
  
  grpval <- sort(unique(grp))
  grpID <- lapply(grpval, function(g) { which(grp == g) })
  ngrp <- sapply(grpID, length)

  list(grpval=grpval, grpID=grpID, ngrp=ngrp)
}

# Return the height of a permutation, or the permutation itself if raw TRUE,
# of the values in x, with the added condition that no two adjacent will
# belong to the same group, described by the setup.permute result grpspec.
alt.permute <- function(x, grpspec, raw=FALSE) {

  grpval <- grpspec$grpval
  # Scramble order within each group.
  grpID <- lapply(grpspec$grpID, function(g) { g[sample.int(length(g))] })
  ngrp <- grpspec$ngrp
  
  perm <- integer(length(x))
  
  if (1 == length(grpval)) {
    perm <- x[grpID[[1]]]
  } else {
    sID <- .Call("C_altperm_symbols", ngrp, PACKAGE="Dimodal")
    for (i in seq_along(grpval)) {
      perm[sID==i] <- x[grpID[[i]]]
    }
  }

  if (raw) {
    perm
  } else {
    # This is the old height, suitable for flats.
    # diff(range(cumsum( perm )))
    cp <- cumsum(perm)
    max(cp) - min(0, cp[length(cp)])
  }
}

# Exhaustively permute the elements of vector x, deleting any where the sign
# of adjacent elements is the same.  Returns a matrix with length(x) columns
# and permutations in rows.
permute.all.alt <- function(x) {

  perm <- permute.all(x)
  dropcol <- -ncol(perm)
  keep <- apply(perm, 1, function(r) { all(sign(r[-1L]) != sign(r[dropcol])) })
  perm[keep,,drop=FALSE]
}

# Exhaustively permute the elements of vector x.  Returns a matrix with
# length(x)! rows and length(x) columns.
permute.all <- function(x) {

  if (4 == length(x)) {
    matrix(c(x[1],x[2],x[3],x[4], x[1],x[2],x[4],x[3], x[1],x[3],x[2],x[4],
             x[1],x[3],x[4],x[2], x[1],x[4],x[2],x[3], x[1],x[4],x[3],x[2],
             x[2],x[1],x[3],x[4], x[2],x[1],x[4],x[3], x[2],x[3],x[1],x[4],
             x[2],x[3],x[4],x[1], x[2],x[4],x[1],x[3], x[2],x[4],x[3],x[1],
             x[3],x[1],x[2],x[4], x[3],x[1],x[4],x[2], x[3],x[2],x[1],x[4],
             x[3],x[2],x[4],x[1], x[3],x[4],x[1],x[2], x[3],x[4],x[2],x[1],
             x[4],x[1],x[2],x[3], x[4],x[1],x[3],x[2], x[4],x[2],x[1],x[3],
             x[4],x[2],x[3],x[1], x[4],x[3],x[1],x[2], x[4],x[3],x[2],x[1]),
             nrow=24, byrow=TRUE)
  } else if (3 == length(x)) {
    matrix(c(x[1],x[2],x[3], x[1],x[3],x[2],
             x[2],x[1],x[3], x[2],x[3],x[1],
             x[3],x[1],x[2], x[3],x[2],x[1]), nrow=6, byrow=TRUE)
  } else if (2 == length(x)) {
    matrix(c(x[1],x[2], x[2],x[1]), nrow=2, byrow=TRUE)
  } else if (1 == length(x)) {
    matrix(x[1], nrow=1, ncol=1)
  } else if (0 < length(x)) {
    # Why does sapply or simplify2array not bind the sub-matrices correctly?
    # Ah well, do it ourselves.
    sub <- lapply(1:length(x),
                  function(i) {
                    sub <- permute.all(x[-i])
                    cbind(rep(x[i], nrow(sub)), sub)
                  })
    perm <- sub[[1]]
    for (i in 2:length(x)) {
      perm <- rbind(perm, sub[[i]])
    }
    perm
  } else {
    stop("no permutations possible for 0 length vector")
  }
}