Nothing
##` Marginal splitters
##' @title Helper functions for marginal splitting
##' @description These functions are helpers to safely split bins
##' along X or Y.
##' @details These unexported functions have been defined primarily
##' to clean up other code, but could be changed to obtain different
##' core functionality.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param bd numeric split point within the bin bounds
##' @param above indices of `x` and `y` points in the bin above `bd`
##' @param below indices of `x` and `y` points in the bin below `bd`
##' @return A list of two bins resulting from the split of `bin` at
##' `bds`.
##' @author Chris Salahub
##' @describeIn marginalsplitters Splitting on x
splitX <- function(bin, bd, above, below) {
belowfac <- (bd - bin$bnds$x[1])/diff(bin$bnds$x)
abovefac <- (bin$bnds$x[2] - bd)/diff(bin$bnds$x)
list(makeBin(x = bin$x[below], y = bin$y[below],
bnds = list(x = c(bin$bnds$x[1], bd),
y = bin$bnds$y),
expn = bin$expn*belowfac,
n = bin$n-length(above), depth = bin$depth + 1,
stopped = FALSE),
makeBin(x = bin$x[above], y = bin$y[above],
bnds = list(x = c(bd, bin$bnds$x[2]),
y = bin$bnds$y),
expn = bin$expn*abovefac,
n = length(above), depth = bin$depth + 1,
stopped = FALSE))
}
##' @describeIn marginalsplitters Splitting on y
splitY <- function(bin, bd, above, below) {
belowfac <- (bd - bin$bnds$y[1])/diff(bin$bnds$y)
abovefac <- (bin$bnds$y[2] - bd)/diff(bin$bnds$y)
list(makeBin(x = bin$x[below], y = bin$y[below],
bnds = list(x = bin$bnds$x,
y = c(bin$bnds$y[1], bd)),
expn = bin$expn*belowfac,
n = bin$n-length(above), depth = bin$depth + 1,
stopped = FALSE),
makeBin(x = bin$x[above], y = bin$y[above],
bnds = list(x = bin$bnds$x,
y = c(bd, bin$bnds$y[2])),
expn = bin$expn*abovefac,
n = length(above), depth = bin$depth + 1,
stopped = FALSE))
}
##' @title Halve at an observed point
##' @description This function halves a bin under the restriction that
##' splits can only occur at observation coordinates.
##' @details Given a bin and a margin, this function splits the bin so
##' half the points are above the new split point and half are below.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param margin string, one of `x` or `y`
##' @return A list of two bins resulting from the split of `bin` in
##' half along the specified margin
##' @examples
##' bin <- list(x = 1:10, y = sample(1:10),
##' bnds = list(x = c(0, 10), y = c(0, 10)),
##' expn = 10, n = 10, depth = 0)
##' halfSplit(bin)
##' halfSplit(bin, margin = "y")
##' @author Chris Salahub
halfSplit <- function(bin, margin = sample(c("x", "y"), 1)) {
if (margin == "x") {
xsort <- order(bin$x)
hind <- floor(bin$n/2) # middle index
newbnd <- bin$x[xsort][hind] # middle value
above <- xsort[(hind+1):(bin$n)] # points above
below <- xsort[1:hind] # and below
splitX(bin, bd = newbnd, above = above, below = below)
} else if (margin == "y") {
ysort <- order(bin$y)
hind <- floor(bin$n/2) # middle index
newbnd <- bin$y[ysort][hind] # middle value
above <- ysort[(hind+1):(bin$n)] # points above
below <- ysort[1:hind]
splitY(bin, bd = newbnd, above = above, below = below)
} else stop("Margin must be one of x or y")
}
##' @title Halve continuously to break ties
##' @description This function halves a bin based on the midpoint of
##' the bounds along whichever margin produces the larger score.
##' @details The goal of this function is to break ties within bin
##' splitting in a way which prevents very small or lopsided bins from
##' forming, a common problem with the `halfSplit` function
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param xscore numeric value giving the score for all splits along
##' x
##' @param yscore numeric value giving the score for all splits along
##' y
##' @param wider logical; is the bin wider than it is tall?
##' @param squarify logical value, should we force splitting on
##' the longer side regardless of scores?
##' @return A list of two bins resulting from the split of `bin` in
##' half along the margin corresponding to the larger score.
##' @examples
##' bin <- makeBin(x = 1:10, y = sample(1:10))
##' halfCutTie(bin, 1, 2, wider = FALSE) # splits on y
##' halfCutTie(bin, 2, 1, wider = FALSE) # splits on x
##' halfCutTie(bin, 1, 1, wider = FALSE) # ties are random
##' @author Chris Salahub
halfCutTie <- function(bin, xscore, yscore, wider,
squarify = FALSE) {
u <- as.numeric(yscore < xscore) # prefer to split on max score
if (yscore == xscore) u <- runif(1)
if (squarify) u <- as.numeric(wider)
if (u >= 0.5) { # x has a larger score or bin is wider than tall
newbnd <- ceiling(mean(bin$bnds$x)) # split value
abv <- bin$x > newbnd # which x values are above
above <- which(abv) # indices above
below <- which(!abv) # indices below
splitX(bin, bd = newbnd, above = above, below = below)
} else {
newbnd <- ceiling(mean(bin$bnds$y)) # split value
abv <- bin$y > newbnd # which y values are above
above <- which(abv) # indices above
below <- which(!abv) # indices below
splitY(bin, bd = newbnd, above = above, below = below)
}
}
##' @title Size-restricted bivariate score maximizing splitting
##' @description Splits a bin based on the location maximizing a score
##' function with restrictions on minimum bin size.
##' @details This function serves as a wrapper which manages the
##' logic of splitting bins using a score function while maintaining
##' a minimum size and allowing forced splits along the wider edge.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param scorer function which accepts a numeric vector of potential
##' split coordinates and the bounds of `bin` and returns a numeric
##' vector of scores for each
##' @param minExp value giving the smallest expected count allowed for
##' bin splits
##' @param squarify logical value, should we force splitting on
##' the longer side regardless of scores?
##' @return A list of two bins resulting from the split of `bin`
##' along the corresponding margin at the maximum location
##' @examples
##' bin <- makeBin(x = 1:10, y = sample(1:10))
##' maxScoreSplit(bin, chiScores)
##' maxScoreSplit(bin, miScores) # pretty similar for both
##' maxScoreSplit(bin, randScores)
##' maxScoreSplit(bin, randScores) # different every time
##' @author Chris Salahub
maxScoreSplit <- function(bin, scorer, minExp = 5,
squarify = FALSE) {
expn <- bin$expn
prop <- minExp/expn
deltax <- diff(bin$bnds$x)
deltay <- diff(bin$bnds$y)
xfrm <- bin$bnds$x + prop*deltax*c(1, -1) # minExp bounds
yfrm <- bin$bnds$y + prop*deltay*c(1, -1)
if ((xfrm[2] - xfrm[1]) >= 0) {
## take advantage of R ordering: ensure frame includes pts
xaug <- c(min(bin$x)-1, bin$x, xfrm) # augment with frame
xsort <- order(xaug)
xlowFrm <- which(xsort == (length(xaug) - 1)) # identify frame
xupFrm <- which(xsort == length(xaug))
xinfrm <- xlowFrm:xupFrm
xcntblw <- cumsum(c(0, rep(1, bin$n), 0, 0)[xsort])[xinfrm]
xscore <- scorer(bounds = c(bin$bnds$x[1],
xaug[xsort][xinfrm],
bin$bnds$x[2]),
nbelow = xcntblw, n = bin$n)
xmax <- which.max(xscore)
if (all(abs(xscore - xscore[1]) < sqrt(.Machine$double.eps))) {
xmax <- ceiling((xupFrm - xlowFrm)/2)
} # halve on ties
} else xmax <- NA # indicate inability to split
if ((yfrm[2] - yfrm[1]) >= 0) {
yaug <- c(min(bin$y)-1, bin$y, yfrm)
ysort <- order(yaug)
ylowFrm <- which(ysort == (length(yaug) - 1))
yupFrm <- which(ysort == length(yaug))
yinfrm <- ylowFrm:yupFrm
ycntblw <- cumsum(c(0, rep(1, bin$n), 0, 0)[ysort])[yinfrm]
yscore <- scorer(bounds = c(bin$bnds$y[1],
yaug[ysort][yinfrm],
bin$bnds$y[2]),
nbelow = ycntblw, n = bin$n)
ymax <- which.max(yscore)
if (all(abs(yscore - yscore[1]) < sqrt(.Machine$double.eps))) {
ymax <- ceiling((yupFrm - ylowFrm)/2)
}
} else ymax <- NA
## control the bin being split
if (is.na(xmax) & is.na(ymax)) {
swtch <- NA
} else {
if (is.na(xmax)) {
swtch <- 1
} else if (is.na(ymax)) {
swtch <- 0
} else {
wider <- deltax >= deltay
if (yscore[ymax] > xscore[xmax]) {
swtch <- 1
} else if (yscore[ymax] == xscore[xmax]) {
swtch <- runif(1)
} else swtch <- 0
if (squarify) {
if (wider) swtch <- 0 else swtch <- 1
}
}
}
## use swtch variable to control behaviour
if (is.na(swtch)) {
bin$stopped <- TRUE
list(bin)
} else if (swtch > 0.5) {
newbnd <- yaug[ysort][yinfrm[ymax]]
## remove frame, remove new lowest min-1, adjust indices
yclean <- ysort[-c(yupFrm, ylowFrm)][-1]-1 # remove extras
below <- yclean[seq_len(ycntblw[ymax])]
if (newbnd >= bin$y[yclean[bin$n]]) { # the complement of below
above <- integer(0)
} else above <- yclean[(ycntblw[ymax]+1):bin$n]
splitY(bin, bd = newbnd, above = above, below = below)
} else {
newbnd <- xaug[xsort][xinfrm[xmax]]
xclean <- xsort[-c(xupFrm, xlowFrm)][-1] - 1
below <- xclean[seq_len(xcntblw[xmax])]
if (newbnd >= bin$x[xclean[bin$n]]) {
above <- integer(0)
} else above <- xclean[(xcntblw[xmax]+1):bin$n]
splitX(bin, bd = newbnd, above = above, below = below)
}
}
##' @title Univariate score maximizing splitting
##' @description A function which splits a bin based on the location
##' maximizing a score function.
##' @details This function is the univariate version of
##' `maxScoreSplit` and so is considerably simpler. It assumes the
##' variable to be split is named `x` in the bin, and the other
##' variable is to remain unsplit.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param scorer function which accepts a numeric vector of potential
##' split coordinates and the bounds of `bin` and returns a numeric
##' vector of scores for each
##' @param minExp numeric giving the minimum expected count allowed
##' in a bin
##' @return A list of two bins resulting from the split of `bin` at
##' the maximum split location along y
##' @author Chris Salahub
uniMaxScoreSplit <- function(bin, scorer, minExp = 5) {
expn <- bin$expn
prop <- minExp/expn
deltay <- diff(bin$bnds$y)
yfrm <- bin$bnds$y + ceiling(prop*deltay)*c(1, -1)
if ((yfrm[2] - yfrm[1]) >= 0) {
yaug <- c(min(bin$y)-1, bin$y, yfrm)
ysort <- order(yaug)
ylowFrm <- which(ysort == (length(yaug) - 1))
yupFrm <- which(ysort == length(yaug))
yinfrm <- ylowFrm:yupFrm
ycntblw <- cumsum(c(0, rep(1, bin$n), 0, 0)[ysort])[yinfrm]
yscore <- scorer(bounds = c(bin$bnds$y[1],
yaug[ysort][yinfrm],
bin$bnds$y[2]),
nbelow = ycntblw, n = bin$n)
ymax <- which.max(yscore)
if (all(abs(yscore - yscore[1]) < sqrt(.Machine$double.eps))) {
ymax <- ceiling((yupFrm - ylowFrm)/2)
}
} else ymax <- NA
if (is.na(ymax)) {
bin$stopped <- TRUE
list(bin)
} else {
newbnd <- yaug[ysort][yinfrm[ymax]]
yclean <- ysort[-c(yupFrm, ylowFrm)][-1] - 1
below <- yclean[seq_len(ycntblw[ymax])]
if (newbnd >= bin$y[yclean[bin$n]]) {
above <- integer(0)
} else above <- yclean[(ycntblw[ymax]+1):bin$n]
splitY(bin, bd = newbnd, above = above, below = below)
}
}
##' @title Bivariate score maximizing splitting
##' @description A function which splits a bin based on the location
##' maximizing a score function.
##' @details This function serves as a wrapper which manages the
##' interaction of a score function, marginal splitting functions,
##' tie breaking function, and a maximum selection function to split
##' a bin at the observation coordinate which maximizes the score
##' function.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param scorer function which accepts a numeric vector of potential
##' split coordinates and the bounds of `bin` and returns a numeric
##' vector of scores for each
##' @param ties function which is called to break ties when all splits
##' generate the same score
##' @param minExp value giving the smallest expected count allowed for
##' bin splits
##' @param pickMax function which accepts a list of scores and returns
##' the element of the largest score according to some rule
##' @param ... optional additional arguments to `scorer`
##' @return A list of two bins resulting from the split of `bin`
##' along the corresponding margin at the maximum location
##' @author Chris Salahub
sandboxMaxSplit <- function(bin, scorer, ties = halfCutTie,
minExp = 5, pickMax = which.max, ...) {
xsort <- order(bin$x)
ysort <- order(bin$y) # get marginal ordering
xscore <- scorer(bin, xsort, ...)
yscore <- scorer(bin, ysort, ...) # score splits based on ordering
xmax <- pickMax(xscore)
ymax <- pickMax(yscore) # the score values
xallEq <- all(abs(xscore - xscore[1]) < sqrt(.Machine$double.eps))
yallEq <- all(abs(yscore - yscore[1]) < sqrt(.Machine$double.eps))
u <- as.numeric(xscore[xmax] >= yscore[ymax]) # ties go to x
if (xallEq & yallEq) { # in the case of ties, use tie function
ties(bin, xscore[1], yscore[1])
} else if (u >= 0.5) {
xsplts <- bin$x[xsort]
newbnd <- c(xsplts[1]-1, xsplts)[xmax] # new boundary
below <- xsort[seq_len(xmax-1)] # get indices of points below
above <- if (xmax == bin$n+1) integer(0) else xsort[xmax:bin$n]
splitX(bin, bd = newbnd, above = above, below = below)
} else {
ysplts <- bin$y[ysort]
newbnd <- c(ysplts[1]-1, ysplts)[ymax]
below <- ysort[seq_len(ymax-1)]
above <- if (ymax == bin$n+1) integer(0) else ysort[ymax:bin$n]
splitY(bin, bd = newbnd, above = above, below = below)
}
}
##' @title Random integer splitting
##' @description A function which splits a bin at a random integer
##' conforming to limits on minimum bin size.
##' @details This function serves as a wrapper which manages the
##' interaction of a score function, marginal splitting functions,
##' tie breaking function, and a maximum selection function to split
##' a bin at the observation coordinate which maximizes the score
##' function.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param minExp numeric giving the minimum expected count allowed
##' in a bin
##' @param squarify logical value, should we force splitting on
##' the longer side?
##' @return A list of two bins resulting from the split of `bin`
##' along the corresponding margin at the maximum location
##' @examples
##' bin <- makeBin(x = 1:10, y = sample(1:10))
##' rIntSplit(bin, minExp = 2)
##' @author Chris Salahub
rIntSplit <- function(bin, minExp = 5, squarify = FALSE) {
expn <- bin$expn
prop <- minExp/expn
deltax <- diff(bin$bnds$x)
deltay <- diff(bin$bnds$y)
wider <- deltax > deltay
if (squarify) u <- as.numeric(wider) else u <- runif(1)
if (u >= 0.5) { # split on x
lower <- bin$bnds$x[1] + ceiling(prop*deltax)
upper <- bin$bnds$x[2] - ceiling(prop*deltax)
if (upper <= lower) {
bin$stopped <- TRUE
list(bin)
} else{
locs <- seq(from = lower, to = upper, by = 1)
spos <- sample(locs, size = 1)
above <- which(bin$x > spos)
below <- which(bin$x <= spos)
splitX(bin, spos, above, below)
}
} else {
lower <- bin$bnds$y[1] + ceiling(prop*deltay)
upper <- bin$bnds$y[2] - ceiling(prop*deltay)
if (upper <= lower) {
bin$stopped <- TRUE
list(bin)
} else{
locs <- seq(from = lower, to = upper, by = 1)
spos <- sample(locs, size = 1)
above <- which(bin$y > spos)
below <- which(bin$y <= spos)
splitY(bin, spos, above, below)
}
}
}
##' @title Univariate random integer splitting
##' @description A function which splits a bin along x at a random
##' integer conforming to limits on minimum bin size.
##' @details This function serves as a wrapper which manages the
##' interaction of a score function, marginal splitting functions,
##' tie breaking function, and a maximum selection function to split
##' a bin along a single margin at the observation coordinate which
##' maximizes the score function.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param minExp numeric giving the minimum expected count allowed
##' in a bin
##' @return A list of two bins resulting from the split of `bin`
##' along the corresponding margin at the maximum location
##' @examples
##' bin <- makeBin(x = 1:10, y = sample(1:10))
##' rIntSplit(bin, minExp = 2)
##' @author Chris Salahub
uniRIntSplit <- function(bin, minExp = 5) {
expn <- bin$expn
prop <- minExp/expn
deltay <- diff(bin$bnds$y)
lower <- bin$bnds$y[1] + ceiling(prop*deltay)
upper <- bin$bnds$y[2] - ceiling(prop*deltay)
if (upper <= lower) {
bin$stopped <- TRUE
list(bin)
} else{
locs <- seq(from = lower, to = upper, by = 1)
spos <- sample(locs, size = 1)
above <- which(bin$y > spos)
below <- which(bin$y <= spos)
splitY(bin, spos, above, below)
}
}
##' @title Random uniform splitting
##' @description Split bins randomly and uniformly
##' @details This function samples a coordinate uniformly along a
##' random margin and splits a bin at that coordinate. In contrast to
##' maxScoreSplit with randScores, this can introduce splits at
##' locations other than the points.
##' @param bin a bin to be split with elements `x`, `y`, `depth`,
##' `bnds` (list with elements `x` and `y`), `expn`, `n`
##' @param minExp numeric giving the minimum expected count allowed
##' in a bin
##' @param squarify logical value, should we force splitting on
##' the longer side?
##' @return A list of two bins resulting from the split of `bin`
##' at a random location on a random margin
##' @examples
##' bin <- makeBin(x = 1:10, y = sample(1:10))
##' rUnifSplit(bin, minExp = 2)
##' @author Chris Salahub
rUnifSplit <- function (bin, minExp = 0, squarify = FALSE) {
expn <- bin$expn
prop <- minExp/expn
deltax <- diff(bin$bnds$x)
deltay <- diff(bin$bnds$y)
wider <- deltax > deltay
if (squarify) u <- as.numeric(wider) else u <- runif(1)
if (u >= 0.5) {
lower <- bin$bnds$x[1] + prop*deltax
upper <- bin$bnds$x[2] - prop*deltax
if (upper <= lower) {
bin$stopped <- TRUE
list(bin)
} else{
spos <- runif(1, min = lower, max = upper)
above <- which(bin$x > spos)
below <- which(bin$x <= spos)
splitX(bin, spos, above, below)
}
}
else {
lower <- bin$bnds$y[1] + prop*deltay
upper <- bin$bnds$y[2] - prop*deltay
if (upper <= lower) {
bin$stopped <- TRUE
list(bin)
} else{
spos <- runif(1, min = lower, max = upper)
above <- which(bin$y > spos)
below <- which(bin$y <= spos)
splitY(bin, spos, above, below)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.