###############################################################################
#
# Pasting functions
#
###############################################################################
#' Bottom up pasting
#'
#' Refines the edges of a chosen box in a \code{prim} object by pasting.
#'
#' @param object \code{prim} object resulting from a call to
#' \code{\link{peeling}}.
#' @param npeel Numeric value indicating which box to choose in \code{object}
#' through the number of peeling iteration.
#' @param support Numeric value between 0 and 1 indicating the support of the
#' box to choose in \code{object}.
#' @param yfun Numeric value indicating the value of the objective function
#' for the chosen box in \code{object}.
#' @param alpha The proportion of observations to add at each pasting iteration.
#' Usually equal to the peeling fraction used in \code{\link{peeling}}.
#' @param obj.fun The objective function to maximize by pasting. See
#' \code{\link{peeling}}.
#' @param peeling.side Constraints on the pasting side.
#' -1 indicates pasting only on the 'left' of the box
#' (i.e. moving the lower limit only), 1 indicate pasting only on the
#' 'right' and 0 for no constraint.
#'
#' @details The function takes a \code{prim} object and choose one of its boxes
#' as a starting point for pasting. Bottom-up pasting is the reverse of
#' of the top down peeling. It starts from the result of the peeling and
#' refines its edges by iteratively adding \code{alpha} times N
#' observations at each iteration, where N is the number of observations
#' in the current box.
#'
#' The best box after the peeling should be chosen by analyzing the
#' peeling trajectory (see \code{\link{plot_trajectory}}).
#' It is given by one of: number of peeling iteration leading to the box
#' (argument \code{npeel}), the closest support (argument \code{support}),
#' or the closest objective function value (argument \code{yfun}).
#'
#' Although it is possible to use different algorithm parameters
#' (\code{alpha}, \code{obj.fun}, \code{peeling.side}) than the peeling
#' step, it is advised to keep the same values (the default).
#'
#' @return A \code{prim} object which is a list with the following elements:
#' \item{npeel}{The number of peeling iteration performed.}
#' \item{support}{A vector of length \code{npeel + npaste + 1}
#' containing the support
#' of each successivepeeled box.}
#' \item{yfun}{A vector of length \code{npeel + npaste + 1} containing
#' the objective function value of each successive peeled box.}
#' \item{limits}{A list of length \code{npeel + npaste + 1} containing the
#' limits of each successive box. Each limit is a list with one element per
#' input variable.}
#' \item{x,y}{The input and response data used in the algorithm.}
#' \item{numeric.vars}{A logical vector indicating, for each input variable,
#' if it was considered as a numeric variable.}
#' \item{alpha, peeling.side, obj.fun}{The value of the arguments used for
#' peeling. Useful for prim methods.}
#' \item{npaste}{Number of pasting iteration performed.}
#'
#' @seealso \code{\link{extract.box}} to extract information about a
#' particular box in a \code{prim} object. \code{\link{plot_box}} to
#' visualize boxes. \code{\link{predict.prim}} to predict if new data
#' falls into particular boxes.
#'
#' @references
#' Friedman, J.H., Fisher, N.I., 1999. Bump hunting in high-dimensional data.
#' Statistics and Computing 9, 123-143.
#' https://doi.org/10.1023/A:1008894516817
#'
#' @examples
#' # A simple bump
#' set.seed(12345)
#' x <- matrix(runif(2000), ncol = 2, dimnames = list(NULL, c("x1", "x2")))
#' y <- 2 * x[,1] + 5 * x[,2] + 10 * (x[,1] >= .8 & x[,2] >= .5) + rnorm(1000)
#' # Peeling step
#' peel_res <- peeling(y, x)
#' # Pasting from the box with support 0.01
#' paste_res <- pasting(peel_res, support = 0.01)
#' # Visualize the peeled box and pasted one (npaste 0 and 2)
#' plot_box(paste_res, pch = 16, ypalette = hcl.colors(10), npaste = c(0, 2),
#' box.args = list(lwd = 2, border = c("grey", "black"), lty = 1:2))
#'
#' @export
pasting <- function(object, npeel = NULL, support = NULL,
yfun = NULL, alpha = object$alpha, obj.fun = object$obj.fun,
peeling.side = object$peeling.side)
{
if (!inherits(object, "prim")){
warning("'object' should be of class 'prim'")
}
x <- object$x
y <- object$y
numeric.vars <- object$numeric.vars
nnumeric <- sum(numeric.vars)
peeling.side <- rep_len(peeling.side, nnumeric)
npeel <- npeel[1]
support <- support[1]
yfun <- yfun[1]
small.box <- extract.box(object, npeel = npeel, support = support,
yfun = yfun)
if (length(c(npeel, support, yfun)) > 1){
warning("Only one of 'npeel', 'support' or 'yfun' should be used.
The one with highest npeel has been used.")
small.box <- extract.box(object, npeel = max(small.box$npeel))
}
insmall <- in.box(x, small.box$limits[[1]])
cur.sup <- small.box$support
nstep <- ceiling(-log(cur.sup) / log(1 + alpha))
object$support <- c(object$support[1:(small.box$npeel + 1)],
numeric(nstep))
object$yfun <- c(object$yfun[1:(small.box$npeel + 1)],
numeric(nstep))
object$limits <- c(object$limits[1:(small.box$npeel + 1)],
vector("list", nstep))
object$npeel <- small.box$npeel
npaste <- 1
repeat{
new.box <- paste.one(y, x, object$limits[[small.box$npeel + npaste]],
alpha, obj.fun, numeric.vars = numeric.vars, peeling.side)
if (new.box$yfun > object$yfun[small.box$npeel + npaste]){
npaste <- npaste + 1
object$yfun[small.box$npeel + npaste] <- new.box$yfun
object$limits[[small.box$npeel + npaste]] <- new.box$limits
object$support[small.box$npeel + npaste] <-
length(new.box$y) / length(y)
} else {
break
}
}
object$yfun <- object$yfun[1:(small.box$npeel + npaste)]
object$support <- object$support[1:(small.box$npeel + npaste)]
object$limits <- object$limits[1:(small.box$npeel + npaste)]
object$npaste <- npaste - 1
return(object)
}
paste.one <- function(y, x, small.box, alpha = 0.05, obj.fun = mean,
numeric.vars = rep(TRUE, ncol(x)), peeling.side = rep(0,sum(numeric.vars)))
{
p <- ncol(x)
n <- nrow(x)
numinds <- which(numeric.vars)
insmall <- finalobs <- in.box(x, small.box)
nsmall <- sum(insmall)
sup.small <- nsmall/n
small.fun <- do.call(obj.fun, list(y = y, x = x, inbox = insmall))
yfun <- -Inf
limits <- small.box
for (j in 1:p){
if (numeric.vars[j]){
newn <- ceiling(nsmall * alpha)
if (p > 1){
eligible.obs <- in.box(x[,-j], small.box[-j])
} else {
eligible.obs <- rep(TRUE, n)
}
rankmin <- rank(x[eligible.obs,j], ties.method = "min")
rankmax <- rank(x[eligible.obs,j], ties.method = "max")
ranklims <- c(min(rankmin[insmall[eligible.obs]]),
max(rankmax[insmall[eligible.obs]]))
newobs <- list(
rankmax >= (ranklims[1] - newn) & rankmax <= ranklims[2],
rankmin >= ranklims[1] & rankmin <= (ranklims[2] + newn)
)
newobs <- lapply(newobs, function(obs){
out <- insmall
out[eligible.obs] <- obs
return(out)
})
newobs <- newobs[peeling.side[numinds == j] != c(1, -1)]
# Discard same boxes (when only one side can be extended)
newobs <- newobs[sapply(newobs, sum) > nsmall]
} else {
newvals <- setdiff(unique(x[,j]), small.box[[j]])
newobs <- lapply(newvals,
function(v) x[,j] == v | insmall)
}
if (length(newobs) == 0) next
for (k in 1:length(newobs)){
jyfun <- do.call(obj.fun, list(y = y, x = x, inbox = newobs[[k]]))
if (jyfun > yfun){
yfun <- jyfun
finalobs <- newobs[[k]]
limits <- small.box
if (numeric.vars[j]){
limits[[j]] <- range(x[finalobs, j])
} else {
limits[[j]] <- unique(x[finalobs, j])
}
}
}
}
return(list(limits = limits, yfun = yfun, y = y[finalobs],
x = x[finalobs,,drop=F]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.