Nothing
test_that("MeasureRegrRSQ works", {
measure = msr("regr.rsq")
expect_equal(measure$properties, "weights")
pred = PredictionRegr$new(truth = 0:10, response = 2:12, row_ids = seq(11))
expect_equal(pred$score(measure), c(regr.rsq = 0.6))
pred = PredictionRegr$new(truth = seq(0, 2, 0.5), response = seq(0, 2, 0.5), row_ids = seq(5))
expect_equal(pred$score(measure), c(regr.rsq = 1.0))
pred = PredictionRegr$new(truth = seq(1, 4), response = c(1, 2, 3, 5), row_ids = seq(4))
expect_equal(pred$score(measure), c(regr.rsq = 0.8))
measure = msr("regr.rsq", pred_set_mean = FALSE)
expect_subset(measure$properties, c("requires_task", "requires_train_set", "weights"))
pred = PredictionRegr$new(truth = 0:10, response = 2:12, row_ids = seq(11))
task = as_task_regr(data.table(y = 0:10), target = "y")
expect_equal(pred$score(measure, task = task, train_set = seq(11)), c(regr.rsq = 0.6))
pred = PredictionRegr$new(truth = seq(0, 2, 0.5), response = seq(0, 2, 0.5), row_ids = seq(5))
task = as_task_regr(data.table(y = seq(0, 2, 0.5)), target = "y")
expect_equal(pred$score(measure, task = task, train_set = seq(5)), c(regr.rsq = 1.0))
pred = PredictionRegr$new(truth = seq(1, 4), response = c(1, 2, 3, 5), row_ids = seq(4))
task = as_task_regr(data.table(y = seq(1, 4)), target = "y")
expect_equal(pred$score(measure, task = task, train_set = seq(4)), c(regr.rsq = 0.8))
})
test_that("MeasureRegrRSQ works with weights", {
# Setup: truth, response, weights
truth = 1:5
response = c(1.1, 1.9, 3.2, 4.1, 4.8)
weights = c(1, 2, 1, 2, 1)
row_ids = 1:5
pred = PredictionRegr$new(row_ids = row_ids, truth = truth, response = response, weights = weights)
measure_weighted = msr("regr.rsq")
measure_weighted$use_weights = "use" # explicitly set, although TRUE is default if 'weights' property exists
expect_true("weights" %in% measure_weighted$properties)
# Case 1: pred_set_mean = TRUE (default)
# Manual calculation - mu now uses unweighted mean
mu_pred = mean(truth) # No longer using weighted.mean()
ss_res_pred = sum(weights * (truth - response)^2)
ss_tot_pred = sum(weights * (truth - mu_pred)^2)
rsq_expected_pred = 1 - ss_res_pred / ss_tot_pred
expect_equal(pred$score(measure_weighted), c(regr.rsq = rsq_expected_pred))
expect_equal(measure_weighted$score(pred), rsq_expected_pred)
# Case 2: pred_set_mean = FALSE
measure_train_mean = msr("regr.rsq", pred_set_mean = FALSE)
measure_train_mean$use_weights = "use"
expect_subset(c("requires_task", "requires_train_set", "weights"), measure_train_mean$properties)
# Setup Task
train_data = data.table(y = c(0, 8)) # mean = 4
task = as_task_regr(train_data, target = "y")
train_set_indices = 1:2 # Indices within the task data
# Manual calculation - still uses unweighted mean for train set
mu_train = mean(task$truth(train_set_indices)) # mean(c(0, 8)) = 4
ss_res_train = ss_res_pred # Residuals don't depend on the mean used for SS_tot
ss_tot_train = sum(weights * (truth - mu_train)^2)
rsq_expected_train = 1 - ss_res_train / ss_tot_train
expect_equal(pred$score(measure_train_mean, task = task, train_set = train_set_indices), c(regr.rsq = rsq_expected_train))
expect_equal(measure_train_mean$score(pred, task = task, train_set = train_set_indices), rsq_expected_train)
# Case 3: Perfect fit
pred_perfect = PredictionRegr$new(row_ids = row_ids, truth = truth, response = truth, weights = weights)
expect_equal(pred_perfect$score(measure_weighted), c(regr.rsq = 1))
expect_equal(measure_weighted$score(pred_perfect), 1)
expect_equal(pred_perfect$score(measure_train_mean, task = task, train_set = train_set_indices), c(regr.rsq = 1))
expect_equal(measure_train_mean$score(pred_perfect, task = task, train_set = train_set_indices), 1)
# # Case 4: Constant truth (denominator = 0 -> NaN)
### this seems to be an edge case that does not need to be checked...
pred_const = PredictionRegr$new(row_ids = row_ids, truth = rep(3, 5), response = response, weights = weights)
# mu_const_pred = weighted.mean(pred_const$truth, weights) # 3
# ss_tot_const_pred = sum(weights * (pred_const$truth - mu_const_pred)^2) # sum(weights * (3 - 3)^2) = 0
# expect_equal(pred_const$score(measure_weighted), c(regr.rsq = -Inf)) # 1 - ss_res / 0 -> 1 - Inf -> -Inf? No, should be NaN
# # R's 0/0 is NaN. If ss_res > 0, then pos/0 is Inf. 1 - Inf is -Inf.
# # Let's check mlr3measures code (uses base::mean and base::weighted.mean)
# # R: 1 - 5/0 = -Inf. The code uses sum((truth - response)^2) / sum((truth - mu)^2). So Inf. 1-Inf=-Inf.
# # Let's test it:
# expect_equal(1 - sum(weights * (pred_const$truth - pred_const$response)^2) / sum(weights * (pred_const$truth - mu_const_pred)^2), -Inf)
# # The measure code directly calculates this, so it should be -Inf, not NaN
# # Let's re-evaluate based on the definition provided in the help page:
# # "This measure is undefined for constant t." -> NaN seems more appropriate user-facing value?
# # Test what the code *actually* does:
# expect_equal(measure_weighted$score(pred_const), -Inf)
# expect_equal(pred_const$score(measure_weighted)[["regr.rsq"]], -Inf)
# Use train mean = 4
mu_const_train = 4
ss_tot_const_train = sum(weights * (pred_const$truth - mu_const_train)^2) # sum(weights * (3 - 4)^2) = sum(weights * (-1)^2) = sum(weights) = 7
rsq_expected_const_train = 1 - sum(weights * (pred_const$truth - pred_const$response)^2) / ss_tot_const_train
expect_equal(pred_const$score(measure_train_mean, task = task, train_set = train_set_indices), c(regr.rsq = rsq_expected_const_train))
expect_equal(measure_train_mean$score(pred_const, task = task, train_set = train_set_indices), rsq_expected_const_train)
})
test_that("MeasureRegrRSQ works with weights during resampling", {
task = cars_weights_measure$clone() # Use predefined task with 'weights_measure' role
learner = lrn("regr.rpart")
resampling = rsmp("cv", folds = 3)
# Ensure the measure uses weights
measure = msr("regr.rsq")
measure$use_weights = "use"
rr = resample(task, learner, resampling)
# Check individual scores (pred_set_mean = TRUE implicitly)
scores_manual = map_dbl(rr$predictions("test"), function(p) {
w = p$weights
t = p$truth
r = p$response
mu = mean(t)
1 - sum(w * (t - r)^2) / sum(w * (t - mu)^2)
})
expect_equal(rr$score(measure)$regr.rsq, scores_manual)
# Check aggregation - macro
measure$average = "macro"
aggr_macro = rr$aggregate(measure)
expect_equal(aggr_macro[["regr.rsq"]], mean(scores_manual))
# Check aggregation - micro
measure$average = "micro"
aggr_micro = rr$aggregate(measure)
pred_combined = rr$prediction("test")
w_comb = pred_combined$weights
t_comb = pred_combined$truth
r_comb = pred_combined$response
mu_comb = mean(t_comb)
rsq_micro_manual = 1 - sum(w_comb * (t_comb - r_comb)^2) / sum(w_comb * (t_comb - mu_comb)^2)
expect_equal(aggr_micro[["regr.rsq"]], rsq_micro_manual)
# Check aggregation - macro_weighted
measure$average = "macro_weighted"
aggr_macro_w = rr$aggregate(measure)
weights_per_fold = map_dbl(rr$predictions("test"), function(p) sum(p$weights))
rsq_macro_w_manual = weighted.mean(scores_manual, weights_per_fold)
expect_equal(aggr_macro_w[["regr.rsq"]], rsq_macro_w_manual)
# Check pred_set_mean = FALSE during resampling
measure_train = msr("regr.rsq", pred_set_mean = FALSE)
measure_train$use_weights = "use"
rr_train_mean = resample(task, learner, resampling) # Need task for train mean
scores_manual_train = map_dbl(seq_len(resampling$iters), function(i) {
p = rr_train_mean$predictions()[[i]]
train_set = rr_train_mean$resampling$train_set(i)
w = p$weights
t = p$truth
r = p$response
mu = mean(task$truth(train_set)) # Unweighted mean of train set truth
1 - sum(w * (t - r)^2) / sum(w * (t - mu)^2)
})
expect_equal(rr_train_mean$score(measure_train)$regr.rsq, scores_manual_train)
# Aggregation with pred_set_mean = FALSE
measure_train$average = "macro"
expect_equal(rr_train_mean$aggregate(measure_train)[["regr.rsq"]], mean(scores_manual_train))
measure_train$average = "micro"
expect_error(rr_train_mean$aggregate(measure_train), "requires the train_set")
measure_train$average = "macro_weighted"
weights_per_fold_train = map_dbl(rr_train_mean$predictions("test"), function(p) sum(p$weights))
rsq_macro_w_manual_train = weighted.mean(scores_manual_train, weights_per_fold_train)
expect_equal(rr_train_mean$aggregate(measure_train)[["regr.rsq"]], rsq_macro_w_manual_train)
})
test_that("MeasureRegrRSQ use_weights works", {
task_plain = as_task_regr(cars, target = "dist")
task_weighted = cars_weights_measure$clone()
learner = lrn("regr.rpart")
resampling = rsmp("holdout")$instantiate(task_plain)
rr_plain = resample(task_plain, learner, resampling)
rr_weighted = resample(task_weighted, learner, resampling)
measure = msr("regr.rsq")
# Default for measure with "weights" property is "use"
expect_equal(measure$use_weights, "use")
score_plain_use = rr_plain$aggregate(measure)
score_weighted_use = rr_weighted$aggregate(measure)
expect_false(isTRUE(all.equal(score_plain_use, score_weighted_use))) # Scores should differ
# Ignore weights
measure$use_weights = "ignore"
score_plain_ignore = rr_plain$aggregate(measure)
score_weighted_ignore = rr_weighted$aggregate(measure)
expect_equal(score_plain_ignore, score_plain_use) # Ignoring weights on unweighted task = no change
expect_equal(score_weighted_ignore, score_plain_use) # Ignoring weights should yield same as unweighted task
# Error on weights
measure$use_weights = "error"
score_plain_error = rr_plain$aggregate(measure)
expect_equal(score_plain_error, score_plain_use) # No error on unweighted task
expect_error(rr_weighted$aggregate(measure), "since 'use_weights' was set to 'error'")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.