#' u_statistic_model_selection_funciton
#'
#' returns a test statistic based on oob u statistics
#'
u_statistic_model_selection_function <- function(){
function(x, start, split_point, end, lambda = 0){
n <- end - start
y <- c(rep(0, split_point - start), rep(1, end - split_point))
t_value <- get_t_value(x[(start + 1) : end, ], y, 10000, 25, 1000)
list(is_significant = t_value > 2, statistic = t_value)
}
}
estimate_sigma_1 <- function(x, y, n_z, n_MC){
n <- nrow(x)
k <- sample(1 : n, n_z)
res <- matrix(NA, ncol = n, nrow = n_z)
for (i in 1:n_z){
w <- rep(0.001 / (n-1), n)
w[k[i]] <- 0.999 #dirty fix
fit <- ranger::ranger(y ~ ., data = data.frame(x = x, y = y), num.trees = n_MC, replace = FALSE, sample.fraction = 1 / sqrt(n), case.weights = w)
res[i, ] <- fit$predictions
res[i, k[i]] <- NA
}
cov(res, use = 'pairwise')
}
estimate_sigma_n <- function(x, y, n_z){
n <- nrow(x)
fit <- ranger::ranger(y ~ ., data = data.frame(x = x, y = y), num.trees = n_z * 10, replace = FALSE, sample.fraction = 1 / sqrt(n), keep.inbag = T)
res <- predict(fit, data.frame(x = x), predict.all = T)$predictions
inbag <- matrix(1 == unlist(fit$inbag.counts), nrow = n)
res[inbag] <- NA
cov(t(res), use = 'pair')
}
get_u_stat <- function(x, y, n_tree, n_z, n_MC){
n <- nrow(x)
fit <- ranger::ranger(y ~ ., data.frame(x = x, y = y), num.trees = n_tree, replace = F, sample.fraction = 1 / sqrt(n))
sigma_1 <- estimate_sigma_1(x, y, n_z, n_MC)
sigma_n <- estimate_sigma_n(x, y, n_MC)
#sigma_n <- 0
list(fit$predictions, (n_tree * sigma_1 + sigma_n) / n_tree)
}
get_t_value <- function(x, y, n_tree, n_z, n_MC){
n <- nrow(x)
varU <- get_u_stat(x, y, n_tree, n_z, n_MC)
c <- 2 * y - 1
v <- 1 #sqrt(diag(varU[[2]])) #individual stds
t <- sum((varU[[1]] - 0.5 + c / (2 * n - 2)) * c / v ) / sqrt(t(c / v) %*% varU[[2]] %*% (c / v))
t
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.