#' Section search optimisation algorithm
#'
#' Finds an (approximate) maximum of \code{gain_function} by intelligently evaluating at some of the \code{split_candidates}.
#' Works best if \code{split_candidates} is an interval.
#'
#' @inheritParams hdcd
#' @param split_candidates An array of indices where gain function is (possibly) evaluated.
#' @return An array of length \code{control$n_obs} with entries for gains at different splits
section_search <- function(gain_function, split_candidates, control){
# Check that gain_function is compatible
if(!('split_point' %in% formalArgs(gain_function))){
stop('gain_function is not of the required form. Make sure that gain_function evaluated at x, start, end and lambda returns
a closure with a single argument split_point.')
}
min_points <- control$section_search_min_points
if(min_points < 5){
stop('section_search_min_points needs to be chosen >= 5')
}
n_obs <- control$n_obs
stepsize <- control$section_search_stepsize
stopifnot(!any(c(is.null(min_points), is.null(n_obs), is.null(stepsize))))
stopifnot(!is.null(n_obs))
gain <- rep(NA, n_obs)
# helper function that selects a new midpoint in the smaller of the two segments (cur_left, cur_middle]
# and (cur_middle, cur_right] and then calls section_search_recursive
select_next_midpoint <- function(cur_left, cur_middle, cur_right){
# If the current segment (cur_left, cur_right] is small, find the maximum of the gain_function on that
# segment with a line_search
if(cur_right - cur_left < min_points){
# select indices for which to calculate gains
inds <- intersect(intersect((cur_left) : (cur_right), split_candidates), which(is.na(gain)))
# Evaluate gain on all of the split_candidates within w_left : w_right to search for maximum
gain[inds] <<- sapply(inds, gain_function)
return(gain)
}
stopifnot(cur_left < cur_middle)
stopifnot(cur_middle < cur_right)
# select new midpoint in smaller segment
if (cur_right - cur_middle > cur_middle - cur_left){
w <- cur_right - ceiling((cur_right - cur_middle) * stepsize)
section_search_recursive(cur_left, cur_middle, w, cur_right)
} else {
w <- cur_left + ceiling((cur_middle - cur_left) * stepsize)
section_search_recursive(cur_left, w, cur_middle, cur_right)
}
}
# Recursive function that finds the maximum of the gain_function on the segment (cur_left, cur_right] via a
# binary search style algorithm.
section_search_recursive <- function(cur_left, w_left, w_right, cur_right){
# When called, at least one of gain[w_left] and gain[w_right] is NA. We then evaluate gain_function at this w
# (if it is a split_candidate), in/decrease it by one if doing so is not possible and try again. When
# w_right - w_left < min_points, the maximum is found via a line_search.
while(is.na(gain[w_left]) & w_right - w_left > 0){
if(w_left %in% split_candidates){
gain[w_left] <<- gain_function(w_left)
}
if(is.na(gain[w_left])){
w_left <- w_left + 1
}
}
while(is.na(gain[w_right]) & w_right - w_left > 0){
if(w_right %in% split_candidates){
gain[w_right] <<- gain_function(w_right)
}
if(is.na(gain[w_right])){
w_right <- w_right - 1
}
}
# if the gain function fails to evaluate at all w_left, w_right, just evaluate w on the whole segment (ie do a line search)
# and return gain
if(w_right <= w_left){
gain[cur_left : cur_right] <- sapply(cur_left : cur_right, gain_function)
return(gain)
}
# if the gain is higher at w_left than at w_right, discard the segment (w_right, cur_right]. Else discard the
# segment (cur_left, w_right]
if(gain[w_left] >= gain[w_right]){
select_next_midpoint(cur_left, w_left, w_right)
} else {
select_next_midpoint(w_left, w_right, cur_right)
}
}
cur_left <- min(split_candidates)
cur_right <- max(split_candidates)
w_left <- ceiling((cur_left + stepsize * cur_right) / (1 + stepsize))
w_right <- floor((stepsize * cur_left + cur_right) / (1 + stepsize))
gain <- section_search_recursive(cur_left, w_left, w_right, cur_right)
best_split = ifelse(all(is.na(gain)), NA, which.max(gain))
max_gain = ifelse(all(is.na(gain)), NA, max(gain, na.rm = T))
list(gain = gain, best_split = best_split, max_gain = max_gain)
}
#' Line search optimisation algorithm
#'
#' Finds an (exact) maximum of \code{gain_function} by evaluating it at each of \code{split_candidates}.
#'
#' @inheritParams hdcd
#' @param split_candidates array of indices where gain function is evaluated.
#' @return An array of length \code{control$n_obs} with entries for gains at all of \code{split_candidates}.
line_search <- function(gain_function, split_candidates, control){
# Check that gain_function is compatible
if(!('split_point' %in% formalArgs(gain_function))){
stop('gain_function is not of the required form. Make sure that gain_function evaluated at x, start, end and lambda returns
a closure with a single argument split_point.')
}
stopifnot(!is.null(control$n_obs))
# Create array to save evaluated gains
gain <- rep(NA, control$n_obs)
# evaluate the gain_function at each of split_candidates and save corresponding gains in gain
gain[split_candidates] <- sapply(split_candidates, gain_function)
best_split = ifelse(all(is.na(gain)), NA, which.max(gain))
max_gain = ifelse(all(is.na(gain)), NA, max(gain, na.rm = T))
list(gain = gain, best_split = best_split, max_gain = max_gain)
}
#' Two step search optimisation algorithm
#'
#' Find an (approximate) maximum of \code{gain_function} by repeatedly fitting and evaluating.
#'
#' @inheritParams hdcd
#' @param split_candidates An array of indices where gain function is evaluated.
#' @return A list with a list of approximate gains after each split and the split points used to estimate the gains.
two_step_search <- function(gain_function, split_candidates, control){
if(any(!c('split_point', 'evaluate_all') %in% formalArgs(gain_function))){
stop('gain_function is not of the required form. To use the two_step_search optimizer make sure that gain_function
evaluated at x, start, end and lambda returns a closure with arguments split_point and evaluate_all')
}
if(control$two_step_search_find_best_split == 'shift_in_mean'){
find_best_split <- find_best_split_shift_in_mean
} else if (control$two_step_search_find_best_split == 'shift_in_median'){
find_best_split <- find_best_split_shift_in_median
} else if (control$two_step_search_find_best_split == 'classifier'){
find_best_split <- find_best_classifier_split
} else if (control$two_step_search_find_best_split == 'shift_in_mean_and_variance'){
find_best_split <- find_best_split_shift_in_mean_and_variance
}
#prepare output
res <- list(gain = list(), best_split = NA, split_point = list(), predictions = list())
# choose first first split
first_split <- round(median(split_candidates))
# get predictions
temp <- gain_function(split_point = first_split, evaluate_all = T)
res$splits[[1]] <- first_split
res$predictions[[1]] <- temp
res$gain[[1]] <- find_best_split(temp, initial_guess = first_split)
split_point <- split_candidates[which.max(res$gain[[1]][split_candidates])]
temp <- gain_function(split_point = split_point, evaluate_all = T)
res$splits[[2]] <- split_point
res$predictions[[2]] <- temp
res$gain[[2]] <- find_best_split(temp, initial_guess = split_point)
res$best_split <- ifelse(all(is.na(res$gain[[2]][split_candidates])), NA, split_candidates[which.max(res$gain[[2]][split_candidates])])
res$max_gain <- ifelse(all(is.na(res$gain[[2]])), NA, max(res$gain[[2]], na.rm = T))
#recover()
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.