Nothing
library(survival)
# misc functions used for tests ----
no_miss_list <- function(l){
sapply(l, function(x){
if(is.list(x)) {return(no_miss_list(x))}
any(is.na(x)) | any(is.nan(x)) | any(is.infinite(x))
})
}
add_noise <- function(x, eps = .Machine$double.eps){
noise <- runif(length(x), min = -eps, max = eps)
x + noise
}
change_scale <- function(x, mult_by = 1/2){
x * mult_by
}
#' Find cut-point boundaries (R version)
#'
#' Used to test the cpp version for finding cutpoints
#'
#' @param y_node outcome matrix
#' @param w_node weight vector
#' @param XB linear combination of predictors
#' @param xb_uni unique values in XB
#' @param leaf_min_events min no. of events in a leaf
#' @param leaf_min_obs min no. of observations in a leaf
#'
#' @noRd
#'
#' @return data.frame with description of valid cutpoints
cp_find_bounds_R <- function(y_node,
w_node,
XB,
xb_uni,
leaf_min_events,
leaf_min_obs){
status = y_node[, 'status']
cp_stats <-
sapply(
X = xb_uni,
FUN = function(x){
c(
cp = x,
e_right = sum(status[XB > x] * w_node[XB > x]),
e_left = sum(status[XB <= x] * w_node[XB <= x]),
n_right = sum(as.numeric(XB > x) * w_node),
n_left = sum(as.numeric(XB <= x) * w_node)
)
}
)
cp_stats <- as.data.frame(t(cp_stats))
cp_stats$valid_cp = with(
cp_stats,
e_right >= leaf_min_events &
e_left >= leaf_min_events &
n_right >= leaf_min_obs &
n_left >= leaf_min_obs
)
cp_stats
}
# oobag functions ----
oobag_fun_brier <- function(y_mat, w_vec, s_vec){
# risk = 1 - survival
r_vec <- 1 - s_vec
y <- y_mat[, 2L]
# mean of the squared differences between predicted and observed risk
bri <- mean( (y - r_vec)^2 )
y_mean <- mean(y)
ref <- mean( (y - y_mean)^2 )
answer <- 1 - bri / ref
answer
}
oobag_fun_bad_name <- function(nope, w_vec, s_vec){
# risk = 1 - survival
r_vec <- 1 - s_vec
# mean of the squared differences between predicted and observed risk
mean( (y_mat[, 2L] - r_vec)^2 )
}
oobag_fun_bad_name_2 <- function(y_mat, w_vec, nope){
# risk = 1 - survival
r_vec <- 1 - s_vec
# mean of the squared differences between predicted and observed risk
mean( (y_mat[, 2L] - r_vec)^2 )
}
oobag_fun_bad_name_3 <- function(y_mat, nope, s_vec){
# risk = 1 - survival
r_vec <- 1 - s_vec
# mean of the squared differences between predicted and observed risk
mean( (y_mat[, 2L] - r_vec)^2 )
}
oobag_fun_bad_out <- function(y_mat, w_vec, s_vec){
# risk = 1 - survival
r_vec <- 1 - s_vec
# mean of the squared differences between predicted and observed risk
quantile( (y_mat[, 2L] - r_vec)^2, probs = c(0.25, 0.50, 0.75) )
}
oobag_fun_bad_out_2 <- function(y_mat, w_vec, s_vec){
# mean of the squared differences between predicted and observed risk
return("A")
}
oobag_fun_4_args <- function(y_mat, w_vec, s_vec, nope){
# risk = 1 - survival
r_vec <- 1 - s_vec
y <- y_mat[, 2L]
# mean of the squared differences between predicted and observed risk
bri <- mean( (y - r_vec)^2 )
y_mean <- mean(y)
ref <- mean( (y - y_mean)^2 )
answer <- 1 - bri / ref
answer
}
oobag_fun_errors_on_test <- function(y_mat, w_vec, s_vec){
stop("expected error occurred!", call. = FALSE)
}
# linear combo functions ----
f_pca <- function(x_node, y_node, w_node) {
# estimate two principal components.
pca <- stats::prcomp(x_node, rank. = 2)
# use a random principal component to split the node
pca$rotation[, 2, drop = FALSE]
}
# testing functions ----
expect_equal_leaf_summary <- function(x, y){
expect_equal(x$forest$leaf_summary,
y$forest$leaf_summary,
tolerance = 1e-9)
}
expect_equal_oobag_eval <- function(x, y){
expect_equal(x$eval_oobag$stat_values,
y$eval_oobag$stat_values,
tolerance = 1e-9)
}
expect_no_missing <- function(x){
expect_true(!any(is.na(x)))
}
# data processing ----
prep_test_matrices <- function(data, outcomes = c("time", "status")){
names_y_data <- outcomes
names_x_data <- setdiff(names(data), outcomes)
fi <- fctr_info(data, names_x_data)
types_x_data <- check_var_types(data,
names_x_data,
valid_types = c('numeric',
'integer',
'units',
'factor',
'ordered'))
names_x_numeric <- grep(pattern = "^integer$|^numeric$|^units$",
x = types_x_data)
means <- standard_deviations<- modes <- numeric_bounds <- NULL
numeric_cols <- names_x_data[names_x_numeric]
nominal_cols <- fi$cols
if(!is_empty(nominal_cols)){
modes <- vapply(
select_cols(data, nominal_cols),
collapse::fmode,
FUN.VALUE = integer(1)
)
}
if(!is_empty(numeric_cols)){
numeric_data <- select_cols(data, numeric_cols)
numeric_bounds <- matrix(
data = c(
collapse::fnth(numeric_data, 0.1),
collapse::fnth(numeric_data, 0.25),
collapse::fnth(numeric_data, 0.5),
collapse::fnth(numeric_data, 0.75),
collapse::fnth(numeric_data, 0.9)
),
nrow =5,
byrow = TRUE,
dimnames = list(c('10%', '25%', '50%', '75%', '90%'),
names(numeric_data))
)
means <- collapse::fmean(numeric_data)
standard_deviations <- collapse::fsd(numeric_data)
}
if(any(is.na(select_cols(data, names_y_data))))
stop("Please remove missing values from the outcome variable(s)",
call. = FALSE)
cc <- stats::complete.cases(data[, names_x_data])
data <- data[cc, ]
y <- prep_y(data, names_y_data)
x <- prep_x(data, fi, names_x_data, means, standard_deviations)
w <- sample(1:3, nrow(y), replace = TRUE)
sorted <- collapse::radixorder(y[, 1], -y[, 2])
return(
list(
x = x[sorted, ],
y = y[sorted, ],
w = w[sorted]
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.