Nothing
test_that("morf splits and predicts as expected with continuos covariates", {
## Generating data.
set.seed(1986)
n <- 200
m <- sample(c(1, 2, 3), size = 1) # Class to be tested.
y <- sample(c(1, 2, 3), size = n, replace = TRUE)
x <- data.frame("x1" = rnorm(n))
y_m <- ifelse(y <= m, 1, 0)
y_m_1 <- ifelse(y <= m-1, 1, 0)
alpha <- 0.1
## Fitting a "stump."
morf <- morf(y = y, X = x, n.trees = 1, max.depth = 1, replace = FALSE, sample.fraction = 1, min.node.size = 1,
honesty = FALSE, alpha = alpha)
avg_split <- tree_info(morf$forests.info[[m]])$splitval[1]
predictions <- tree_info(morf$forests.info[[m]])$prediction[-1]
split_values <- combn(x[, 1], 2)[, which(avg_split == combn(x[, 1], 2, mean))]
## R splitting criterion.
modified_split <- function(x, y_m, y_m_1, alpha) {
splits <- sort(unique(x))
mse <- rep(NA, length(splits))
## Scanning all split points x.
for (i in seq_along(splits)) {
## Skip this split value if alpha-regularity would be violated.
if (sum(x < splits[i]) < length(x) * alpha | sum(x > splits[i]) < length(x) * alpha) next
split <- splits[i]
mse_m <- sum(sum(y_m[x < split])^2 / sum(x < split), sum(y_m[x >= split])^2 / sum(x >= split), na.rm = TRUE)
mse_m_1 <- sum(sum(y_m_1[x < split])^2 / sum(x < split), sum(y_m_1[x >= split])^2 / sum(x >= split), na.rm = TRUE)
mce <- sum(mean(y_m[x < split] * y_m_1[x < split]), -mean(y_m[x < split]) * mean(y_m_1[x < split]),
mean(y_m[x >= split] * y_m_1[x >= split]), -mean(y_m[x >= split]) * mean(y_m_1[x >= split]), na.rm = TRUE)
mse[i] <- mse_m + mse_m_1 + 2 * mce
}
## Best split.
best_split <- splits[which.max(mse)]
left_prediction <- mean(y_m[x < best_split]) - mean(y_m_1[x < best_split])
right_prediction <- mean(y_m[x >= best_split]) - mean(y_m_1[x >= best_split])
predictions <- c(left_prediction, right_prediction)
return(list("best_split" = best_split,
"predictions" = predictions))
}
## Comparing.
treeR <- modified_split(x[, 1], y_m, y_m_1, alpha)
check_split <- treeR$best_split %in% split_values
expect_true(check_split)
expect_setequal(treeR$predictions, predictions)
})
test_that("morf splits and predicts as expected with categorical covariates", {
## Generating data.
set.seed(1986)
n <- 200
m <- sample(c(1, 2, 3), size = 1) # Class to be tested.
y <- sample(c(1, 2, 3), size = n, replace = TRUE)
x <- data.frame("x1" = sample(c(1, 2, 3, 4, 5), size = n, replace = TRUE))
y_m <- ifelse(y <= m, 1, 0)
y_m_1 <- ifelse(y <= m-1, 1, 0)
alpha <- 0.1
## Fitting a "stump."
morf <- morf(y = y, X = x, n.trees = 1, max.depth = 1, replace = FALSE, sample.fraction = 1, min.node.size = 1,
honesty = FALSE, alpha = alpha)
avg_split <- tree_info(morf$forests.info[[m]])$splitval[1]
predictions <- tree_info(morf$forests.info[[m]])$prediction[-1]
split_values <- combn(x[, 1], 2)[, which(avg_split == combn(x[, 1], 2, mean))]
## R splitting criterion.
modified_split <- function(x, y_m, y_m_1, alpha) {
splits <- sort(unique(x))
mse <- rep(NA, length(splits))
## Scanning all split points x.
for (i in seq_along(splits)) {
## Skip this split value if alpha-regularity would be violated.
if (sum(x < splits[i]) < length(x) * alpha | sum(x > splits[i]) < length(x) * alpha) next
split <- splits[i]
mse_m <- sum(sum(y_m[x < split])^2 / sum(x < split), sum(y_m[x >= split])^2 / sum(x >= split), na.rm = TRUE)
mse_m_1 <- sum(sum(y_m_1[x < split])^2 / sum(x < split), sum(y_m_1[x >= split])^2 / sum(x >= split), na.rm = TRUE)
mce <- sum(mean(y_m[x < split] * y_m_1[x < split]), -mean(y_m[x < split]) * mean(y_m_1[x < split]),
mean(y_m[x >= split] * y_m_1[x >= split]), -mean(y_m[x >= split]) * mean(y_m_1[x >= split]), na.rm = TRUE)
mse[i] <- mse_m + mse_m_1 + 2 * mce
}
## Best split.
best_split <- splits[which.max(mse)]
left_prediction <- mean(y_m[x < best_split]) - mean(y_m_1[x < best_split])
right_prediction <- mean(y_m[x >= best_split]) - mean(y_m_1[x >= best_split])
predictions <- c(left_prediction, right_prediction)
return(list("best_split" = best_split,
"predictions" = predictions))
}
## Comparing.
treeR <- modified_split(x[, 1], y_m, y_m_1, alpha)
check_split <- treeR$best_split %in% split_values
expect_true(check_split)
expect_setequal(treeR$predictions, predictions)
})
test_that("Standard predictions and weight-based predictions are the same", {
## Generating data.
set.seed(1986)
n <- 200
y <- sample(c(1, 2, 3), size = n, replace = TRUE)
x <- data.frame("x1" = rnorm(n))
## Fitting morf objects.
set.seed(1986) # Set seed to get same honest split.
morf <- morf(y = y, X = x, inference = FALSE, honesty = TRUE)
set.seed(1986)
morf2 <- morf(y = y, X = x, inference = TRUE, honesty = TRUE)
## Comparing.
expect_setequal(round(morf$predictions$probabilities, 3), round(morf2$predictions$probabilities, 3))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.