Nothing
library(cvms)
context("metrics")
# Diagnosis by score
test_that("Metrics work for glm in validate()", {
# skip_test_if_old_R_version()
xpectr::set_test_seed(7)
dat <- groupdata2::partition(participant.scores,
p = 0.8,
cat_col = "diagnosis",
id_col = "participant",
list_out = FALSE
)
validated <- validate(
train_data = dat, formulas = "diagnosis~score",
partitions_col = ".partitions", family = "binomial",
positive = 1
)
same_model <- glm(diagnosis ~ score, data = dat[dat$.partitions == 1, ], family = "binomial")
train_data <- dat[dat$.partitions == 1, ]
test_data <- dat[dat$.partitions == 2, ]
prob <- predict(same_model, newdata = test_data, type = c("response"))
test_data$prob <- prob
test_data <- test_data %>%
dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0))
# AUC
g <- pROC::roc(diagnosis ~ prob, data = test_data, direction = "<", levels = c(0, 1))
expect_equal(validated$AUC, as.numeric(g$auc))
if (requireNamespace("AUC", quietly = TRUE)){
auc2 <- AUC::auc(AUC::roc(test_data$prob, factor(test_data$diagnosis)))
expect_equal(validated$AUC, auc2)
}
# Confusion Matrix Metrics
conf_mat <- confusion_matrix(
targets = test_data$diagnosis,
predictions = test_data$pred,
positive = levels(as.factor(test_data$diagnosis))[1],
c_levels = levels(as.factor(train_data$diagnosis))
)
# Sensitivity
expect_equal(validated$Sensitivity, conf_mat$Sensitivity)
# Specificity
expect_equal(validated$Specificity, conf_mat$Specificity)
# posPredValue
expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`)
# negPredValue
expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`)
})
test_that("Metrics work for glmer in validate()", {
# skip_test_if_old_R_version()
xpectr::set_test_seed(7)
dat <- groupdata2::partition(participant.scores,
p = 0.8,
cat_col = "diagnosis",
id_col = "participant",
list_out = FALSE
)
validated <- validate(
train_data = dat, formulas = "diagnosis~score+(1|session)",
partitions_col = ".partitions", family = "binomial",
positive = 1
)
same_model <- lme4::glmer(diagnosis ~ score + (1 | session), data = dat[dat$.partitions == 1, ], family = "binomial")
train_data <- dat[dat$.partitions == 1, ]
test_data <- dat[dat$.partitions == 2, ]
prob <- predict(same_model, newdata = test_data, type = c("response"))
test_data$prob <- prob
test_data <- test_data %>%
dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0))
# AUC
auc1 <- pROC::roc(diagnosis ~ prob, data = test_data, levels = c(0, 1), direction = "<")
expect_equal(validated$AUC, as.numeric(auc1$auc))
if (requireNamespace("AUC", quietly = TRUE)){
auc2 <- AUC::auc(AUC::roc(test_data$prob, factor(test_data$diagnosis)))
expect_equal(validated$AUC, auc2)
}
# Confusion Matrix metrics
conf_mat <- confusion_matrix(
targets = test_data$diagnosis,
predictions = test_data$pred,
positive = levels(as.factor(test_data$diagnosis))[1],
c_levels = levels(as.factor(train_data$diagnosis))
)
# Sensitivity
expect_equal(validated$Sensitivity, conf_mat$Sensitivity)
# Specificity
expect_equal(validated$Specificity, conf_mat$Specificity)
# posPredValue
expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`)
# negPredValue
expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`)
})
# Diagnosis by age
test_that("Metrics work for glm in validate()", {
# skip_test_if_old_R_version()
xpectr::set_test_seed(6)
dat <- groupdata2::partition(participant.scores,
p = 0.8,
cat_col = "diagnosis",
id_col = "participant",
list_out = FALSE
)
validated <- validate(
train_data = dat, formulas = "diagnosis~age",
partitions_col = ".partitions", family = "binomial",
positive = 1
)
same_model <- glm(diagnosis ~ age, data = dat[dat$.partitions == 1, ], family = "binomial")
train_data <- dat[dat$.partitions == 1, ]
test_data <- dat[dat$.partitions == 2, ]
prob <- predict(same_model, newdata = test_data, type = c("response"))
test_data$prob <- prob
test_data <- test_data %>%
dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0))
# AUC
g <- pROC::roc(diagnosis ~ prob,
data = test_data,
direction = "<", levels = c(0, 1)
)
expect_equal(validated$AUC, as.numeric(g$auc))
if (requireNamespace("AUC", quietly = TRUE)){
roc_ <- AUC::roc(test_data$prob, factor(test_data$diagnosis))
auc2 <- AUC::auc(AUC::roc(test_data$prob, factor(test_data$diagnosis)))
expect_equal(validated$AUC, auc2) # TODO What is the actual underlying error here?
}
# Confusion matrix metrics
conf_mat <- confusion_matrix(
targets = test_data$diagnosis,
predictions = test_data$pred,
positive = levels(as.factor(test_data$diagnosis))[1],
c_levels = levels(as.factor(train_data$diagnosis))
)
# Sensitivity
expect_equal(validated$Sensitivity, conf_mat$Sensitivity)
# Specificity
expect_equal(validated$Specificity, conf_mat$Specificity)
# posPredValue
expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`)
# negPredValue
expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`)
})
test_that("Metrics work for glmer in validate()", {
# skip_test_if_old_R_version()
xpectr::set_test_seed(201)
dat <- groupdata2::partition(participant.scores,
p = 0.8,
cat_col = "diagnosis",
id_col = "participant",
list_out = FALSE
)
validated <- validate(
train_data = dat, formulas = "diagnosis~age+(1|session)",
partitions_col = ".partitions", family = "binomial",
positive = 1
)
same_model <- lme4::glmer(diagnosis ~ age + (1 | session),
data = dat[dat$.partitions == 1, ], family = "binomial"
)
train_data <- dat[dat$.partitions == 1, ]
test_data <- dat[dat$.partitions == 2, ]
prob <- predict(same_model, newdata = test_data, type = c("response"))
test_data$prob <- prob
test_data <- test_data %>%
dplyr::mutate(pred = dplyr::if_else(prob > 0.5, 1, 0))
# AUC
auc1 <- pROC::roc(diagnosis ~ prob, data = test_data, direction = "<", levels = c(0, 1))
expect_equal(validated$AUC, as.numeric(auc1$auc))
if (requireNamespace("AUC", quietly = TRUE)){
auc2 <- AUC::auc(AUC::roc(
test_data$prob,
factor(test_data$diagnosis, levels = levels(as.factor(train_data$diagnosis)))
))
expect_equal(validated$AUC, auc2)
}
# Confusion matrix metrics
conf_mat <- confusion_matrix(
targets = test_data$diagnosis,
predictions = test_data$pred,
positive = levels(as.factor(test_data$diagnosis))[1],
c_levels = levels(as.factor(train_data$diagnosis))
)
# Sensitivity
expect_equal(validated$Sensitivity, conf_mat$Sensitivity)
# Specificity
expect_equal(validated$Specificity, conf_mat$Specificity)
# posPredValue
expect_equal(validated$`Pos Pred Value`, conf_mat$`Pos Pred Value`)
# negPredValue
expect_equal(validated$`Neg Pred Value`, conf_mat$`Neg Pred Value`)
})
test_that("Metrics work when 0 is positive class for glmer in validate()", {
# skip_test_if_old_R_version()
# AUC approach was improved from this answer: https://stats.stackexchange.com/a/269577
# Here I test that it works.
# First we will check what should be the behavior, when changing positive to 0.
participant.scores$perfect_predicted_probability <- c(
0.8, 0.9, 0.7, 0.3, 0.2, 0.1,
0.8, 0.7, 0.7, 0.1, 0.4, 0.3,
0.8, 0.9, 0.7, 0.8, 0.7,
0.7, 0.7, 0.9, 0.8, 0.8,
0.7, 0.95, 0.3, 0.2, 0.1,
0.4, 0.25, 0.2
)
participant.scores$few_false_negs_predicted_probability <- c(
0.2, 0.3, 0.4, 0.3, 0.2, 0.1,
0.8, 0.7, 0.7, 0.1, 0.4, 0.3,
0.8, 0.9, 0.7, 0.8, 0.7,
0.7, 0.7, 0.9, 0.8, 0.8,
0.7, 0.95, 0.3, 0.2, 0.1,
0.4, 0.25, 0.2
)
participant.scores$few_false_pos_predicted_probability <- c(
0.8, 0.9, 0.7, 0.7, 0.9, 0.6,
0.8, 0.7, 0.7, 0.1, 0.4, 0.3,
0.8, 0.9, 0.7, 0.8, 0.7,
0.7, 0.7, 0.9, 0.8, 0.8,
0.7, 0.95, 0.3, 0.2, 0.1,
0.4, 0.25, 0.2
)
participant.scores$worst_predicted_probability <- 1 - c(
0.8, 0.9, 0.7, 0.3, 0.2, 0.1,
0.8, 0.7, 0.7, 0.1, 0.4, 0.3,
0.8, 0.9, 0.7, 0.8, 0.7,
0.7, 0.7, 0.9, 0.8, 0.8,
0.7, 0.95, 0.3, 0.2, 0.1,
0.4, 0.25, 0.2
)
# AUC (positive = 1 vs positive = 0)
# PERFECT
# With AUC::
if (requireNamespace("AUC", quietly = TRUE)){
AUC_auc_perfect <- AUC::auc(AUC::roc(
participant.scores$perfect_predicted_probability,
factor(participant.scores$diagnosis)
))
AUC_auc_perfect_pos0 <- AUC::auc(AUC::roc(
1 - participant.scores$perfect_predicted_probability,
factor(1 - participant.scores$diagnosis)
))
expect_equal(AUC_auc_perfect, AUC_auc_perfect_pos0)
# With pROC
pROC_auc_perfect <- as.numeric(pROC::roc(
response = participant.scores$diagnosis,
predictor = participant.scores$perfect_predicted_probability,
direction = "<", levels = c(0, 1)
)$auc)
pROC_auc_perfect_pos0 <- as.numeric(pROC::roc(
response = 1 - participant.scores$diagnosis,
predictor = 1 - participant.scores$perfect_predicted_probability,
direction = ">", levels = c(1, 0)
)$auc)
expect_equal(pROC_auc_perfect, pROC_auc_perfect_pos0)
expect_equal(pROC_auc_perfect, AUC_auc_perfect)
expect_equal(AUC_auc_perfect_pos0, pROC_auc_perfect_pos0)
}
# FALSE NEGATIVES
# With AUC
if (requireNamespace("AUC", quietly = TRUE)){
AUC_auc_false_negs <- AUC::auc(AUC::roc(
participant.scores$few_false_negs_predicted_probability,
factor(participant.scores$diagnosis)
))
AUC_auc_false_negs_pos0 <- AUC::auc(AUC::roc(
1 - participant.scores$few_false_negs_predicted_probability,
factor(1 - participant.scores$diagnosis)
))
expect_equal(AUC_auc_false_negs, AUC_auc_false_negs_pos0)
# With pROC
pROC_auc_false_negs <- as.numeric(pROC::roc(
response = participant.scores$diagnosis,
predictor = participant.scores$few_false_negs_predicted_probability,
direction = "<", levels = c(0, 1)
)$auc)
pROC_auc_false_negs_pos0 <- as.numeric(pROC::roc(
response = 1 - participant.scores$diagnosis,
predictor = 1 - participant.scores$few_false_negs_predicted_probability,
direction = ">", levels = c(1, 0)
)$auc)
expect_equal(pROC_auc_false_negs, pROC_auc_false_negs_pos0)
expect_equal(pROC_auc_false_negs, AUC_auc_false_negs)
expect_equal(AUC_auc_false_negs_pos0, pROC_auc_false_negs_pos0)
}
# FALSE POSITIVES
# With AUC
if (requireNamespace("AUC", quietly = TRUE)){
AUC_auc_false_pos <- AUC::auc(AUC::roc(
participant.scores$few_false_pos_predicted_probability,
factor(participant.scores$diagnosis)
))
AUC_auc_false_pos_pos0 <- AUC::auc(AUC::roc(
1 - participant.scores$few_false_pos_predicted_probability,
factor(1 - participant.scores$diagnosis)
))
expect_equal(AUC_auc_false_pos, AUC_auc_false_pos_pos0)
# With pROC
pROC_auc_false_pos <- as.numeric(pROC::roc(
response = participant.scores$diagnosis,
predictor = participant.scores$few_false_pos_predicted_probability,
direction = "<", levels = c(0, 1)
)$auc)
pROC_auc_false_pos_pos0 <- as.numeric(pROC::roc(
response = 1 - participant.scores$diagnosis,
predictor = 1 - participant.scores$few_false_pos_predicted_probability,
direction = ">", levels = c(1, 0)
)$auc)
expect_equal(pROC_auc_false_pos, pROC_auc_false_pos_pos0)
expect_equal(pROC_auc_false_pos, AUC_auc_false_pos)
expect_equal(AUC_auc_false_pos_pos0, pROC_auc_false_pos_pos0)
}
# ALL WRONG
# With AUC
if (requireNamespace("AUC", quietly = TRUE)){
AUC_auc_worst <- AUC::auc(AUC::roc(
participant.scores$worst_predicted_probability,
factor(participant.scores$diagnosis)
))
AUC_auc_worst_pos0 <- AUC::auc(AUC::roc(
1 - participant.scores$worst_predicted_probability,
factor(1 - participant.scores$diagnosis)
))
expect_equal(AUC_auc_worst, AUC_auc_worst_pos0)
}
# With pROC
pROC_auc_worst <- as.numeric(pROC::roc(
response = participant.scores$diagnosis,
predictor = participant.scores$worst_predicted_probability,
direction = "<", levels = c(0, 1)
)$auc)
pROC_auc_worst_pos0 <- as.numeric(pROC::roc(
response = 1 - participant.scores$diagnosis,
predictor = 1 - participant.scores$worst_predicted_probability,
direction = ">", levels = c(1, 0)
)$auc)
expect_equal(pROC_auc_worst, pROC_auc_worst_pos0)
expect_equal(pROC_auc_worst, AUC_auc_worst)
expect_equal(AUC_auc_worst_pos0, pROC_auc_worst_pos0)
xpectr::set_test_seed(201)
dat <- groupdata2::partition(participant.scores,
p = 0.8,
cat_col = "diagnosis",
id_col = "participant",
list_out = FALSE
)
validated_pos1 <- validate(
train_data = dat, formulas = "diagnosis~score",
partitions_col = ".partitions", family = "binomial",
positive = 2
)
validated_pos0 <- validate(
train_data = dat, formulas = "diagnosis~score",
partitions_col = ".partitions", family = "binomial",
positive = 1
)
expect_equal(validated_pos1$AUC, validated_pos0$AUC)
validated_pos1 <- validate(
train_data = dat, formulas = "diagnosis~age",
partitions_col = ".partitions", family = "binomial",
positive = 2
)
validated_pos0 <- validate(
train_data = dat, formulas = "diagnosis~age",
partitions_col = ".partitions", family = "binomial",
positive = 1
)
expect_equal(validated_pos1$AUC, validated_pos0$AUC)
# If dependent variable is character factor
dat$diagnosis_chr <- factor(ifelse(dat$diagnosis == 0, "a", "b"))
validated_pos1_num <- validate(
train_data = dat, formulas = "diagnosis_chr~age",
partitions_col = ".partitions", family = "binomial",
positive = 2
)
validated_pos1_chr <- validate(
train_data = dat, formulas = "diagnosis_chr~age",
partitions_col = ".partitions", family = "binomial",
positive = "b"
)
expect_equal(validated_pos1_num$AUC, validated_pos1_chr$AUC)
validated_pos0_num <- validate(
train_data = dat, formulas = "diagnosis_chr~age",
partitions_col = ".partitions", family = "binomial",
positive = 1
)
validated_pos0_chr <- validate(
train_data = dat, formulas = "diagnosis_chr~age",
partitions_col = ".partitions", family = "binomial",
positive = "a"
)
expect_equal(validated_pos0_num$AUC, validated_pos0_chr$AUC)
expect_equal(validated_pos0_num$AUC, validated_pos1_num$AUC)
expect_equal(validated_pos0_chr$AUC, validated_pos1_chr$AUC)
})
test_that("AUC works", {
# skip_test_if_old_R_version()
#
# In this test I printed the predictions within each training loop
# and manually copied the predictions
# I did this to ensure that cross_validate gathers the predictions correctly before
# calculating its metrics. This is incredibly important.
# Metrics are calculated and compared to the metrics I got from cross_validate.
#
target <- c(
0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1,
0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1
)
predictions_prob <- c(
0.77379615, 0.36952324, 0.09125579, 0.89205819,
0.73620142, 0.55282759, 0.8307928, 0.6042899,
0.1754574, 0.9317034, 0.8307928, 0.5145979,
0.9269098, 0.6874739, 0.5867096, 0.71867985,
0.26746773, 0.09346533, 0.85976827, 0.24884534,
0.13205012, 0.6503171, 0.4541755, 0.1564246,
0.8445872, 0.7085838, 0.5871876, 0.8514956,
0.7607141, 0.7085838
)
predictions <- dplyr::if_else(predictions_prob > 0.5, 1, 0)
pred_df <- data.frame("obs" = target, "prob" = predictions_prob, "pred" = predictions)
# AUC
auc1 <- pROC::roc(obs ~ prob, data = pred_df, direction = "<", levels = c(0, 1))
expect_equal(as.numeric(auc1$auc), 0.7615741, tolerance = 1e-3)
if (requireNamespace("AUC", quietly = TRUE)){
auc2 <- AUC::auc(AUC::roc(pred_df$prob, factor(pred_df$obs)))
expect_equal(auc2, 0.7615741, tolerance = 1e-3)
}
})
test_that("Metrics work in confusion_matrix()", {
# skip_test_if_old_R_version()
#
# In this test I printed the predictions within each training loop
# and manually copied the predictions
# I did this to ensure that cross_validate gathers the predictions correctly before
# calculating its metrics. This is incredibly important.
# Metrics are calculated and compared to the metrics I got from cross_validate.
#
target <- c(
0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1,
0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1
)
predictions_prob <- c(
0.77379615, 0.36952324, 0.09125579, 0.89205819,
0.73620142, 0.55282759, 0.8307928, 0.6042899,
0.1754574, 0.9317034, 0.8307928, 0.5145979,
0.9269098, 0.6874739, 0.5867096, 0.71867985,
0.26746773, 0.09346533, 0.85976827, 0.24884534,
0.13205012, 0.6503171, 0.4541755, 0.1564246,
0.8445872, 0.7085838, 0.5871876, 0.8514956,
0.7607141, 0.7085838
)
predictions <- dplyr::if_else(predictions_prob > 0.5, 1, 0)
pred_df <- data.frame("obs" = target, "prob" = predictions_prob, "pred" = predictions)
# Confusion matrix metrics
conf_mat <- confusion_matrix(
targets = pred_df$obs,
predictions = pred_df$pred,
positive = levels(as.factor(pred_df$obs))[1],
c_levels = levels(as.factor(pred_df$obs)),
metrics = "all"
)
## Testing 'conf_mat' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(conf_mat),
c("cfm_results", "cfm_binomial", "tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
conf_mat[["Positive Class"]],
"0",
fixed = TRUE)
expect_equal(
conf_mat[["Balanced Accuracy"]],
0.73611,
tolerance = 1e-4)
expect_equal(
conf_mat[["Accuracy"]],
0.76667,
tolerance = 1e-4)
expect_equal(
conf_mat[["F1"]],
0.66667,
tolerance = 1e-4)
expect_equal(
conf_mat[["Sensitivity"]],
0.58333,
tolerance = 1e-4)
expect_equal(
conf_mat[["Specificity"]],
0.88889,
tolerance = 1e-4)
expect_equal(
conf_mat[["Pos Pred Value"]],
0.77778,
tolerance = 1e-4)
expect_equal(
conf_mat[["Neg Pred Value"]],
0.7619,
tolerance = 1e-4)
expect_equal(
conf_mat[["Kappa"]],
0.49275,
tolerance = 1e-4)
expect_equal(
conf_mat[["MCC"]],
0.50483,
tolerance = 1e-4)
expect_equal(
conf_mat[["Detection Rate"]],
0.23333,
tolerance = 1e-4)
expect_equal(
conf_mat[["Detection Prevalence"]],
0.3,
tolerance = 1e-4)
expect_equal(
conf_mat[["Prevalence"]],
0.4,
tolerance = 1e-4)
expect_equal(
conf_mat[["False Neg Rate"]],
0.41667,
tolerance = 1e-4)
expect_equal(
conf_mat[["False Pos Rate"]],
0.11111,
tolerance = 1e-4)
expect_equal(
conf_mat[["False Discovery Rate"]],
0.22222,
tolerance = 1e-4)
expect_equal(
conf_mat[["False Omission Rate"]],
0.2381,
tolerance = 1e-4)
expect_equal(
conf_mat[["Threat Score"]],
0.5,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(conf_mat),
c("Confusion Matrix", "Table", "Positive Class", "Balanced Accuracy",
"Accuracy", "F1", "Sensitivity", "Specificity", "Pos Pred Value",
"Neg Pred Value", "Kappa", "MCC", "Detection Rate", "Detection Prevalence",
"Prevalence", "False Neg Rate", "False Pos Rate", "False Discovery Rate",
"False Omission Rate", "Threat Score"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(conf_mat),
c("list", "list", "character", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(conf_mat),
c("list", "list", "character", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "double",
"double"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(conf_mat),
c(1L, 20L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(conf_mat)),
character(0),
fixed = TRUE)
## Finished testing 'conf_mat' ####
# F1
F1 <- (2 * conf_mat$`Pos Pred Value` * conf_mat$Sensitivity) / (conf_mat$`Pos Pred Value` + conf_mat$Sensitivity)
expect_equal(F1, 0.6666667, tolerance = 1e-5)
expect_equal(conf_mat$F1, F1, tolerance = 1e-5)
confMatTable <- conf_mat[["Table"]][[1]]
# Confusion matrix
TP <- confMatTable[1] # Dependent on positive = 0 ?
FP <- confMatTable[3]
FN <- confMatTable[2]
TN <- confMatTable[4]
ppv <- TP / (TP + FP)
expect_equal(ppv, conf_mat$`Pos Pred Value`[[1]], tolerance = 1e-5)
expect_equal(ppv, 0.7777778, tolerance = 1e-5)
npv <- TN / (TN + FN)
expect_equal(npv, conf_mat$`Neg Pred Value`[[1]], tolerance = 1e-5)
expect_equal(npv, 0.7619048, tolerance = 1e-5)
sensitivity <- TP / (TP + FN)
expect_equal(sensitivity, conf_mat$Sensitivity[[1]], tolerance = 1e-5)
expect_equal(sensitivity, 0.5833333, tolerance = 1e-5)
specificity <- TN / (TN + FP)
expect_equal(specificity, conf_mat$Specificity[[1]], tolerance = 1e-5)
expect_equal(specificity, 0.8888889, tolerance = 1e-5)
acc <- (TP + TN) / (TP + TN + FP + FN)
expect_equal(acc, conf_mat$Accuracy[[1]], tolerance = 1e-5)
expect_equal(acc, 0.7666667, tolerance = 1e-5)
F1_2 <- 2 * ppv * sensitivity / (ppv + sensitivity)
expect_equal(F1_2, conf_mat$F1[[1]], tolerance = 1e-5)
expect_equal(F1_2, 0.6666667, tolerance = 1e-5)
bal_acc <- (sensitivity + specificity) / 2
expect_equal(bal_acc, conf_mat$`Balanced Accuracy`[[1]], tolerance = 1e-5)
expect_equal(bal_acc, 0.7361111, tolerance = 1e-5)
bal_acc <- (sensitivity + specificity) / 2
expect_equal(bal_acc, conf_mat$`Balanced Accuracy`[[1]], tolerance = 1e-5)
expect_equal(bal_acc, 0.7361111, tolerance = 1e-5)
p_observed <- TP + TN
p_expected <- (((TP + FP)*(TP + FN))+((FN+TN)*(FP+TN))) / (TP + TN + FP + FN)
kappa <- (p_observed - p_expected) / ((TP + TN + FP + FN)-p_expected)
expect_equal(kappa, conf_mat$Kappa[[1]], tolerance = 1e-5)
expect_equal(kappa, 0.4927536, tolerance = 1e-5)
mcc <- ((TP*TN) - (FP*FN))/sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
expect_equal(mcc, conf_mat$MCC[[1]], tolerance = 1e-5)
expect_equal(mcc, 0.5048268, tolerance = 1e-5)
detection_rate <- TP / (TP + TN + FP + FN)
expect_equal(detection_rate, conf_mat$`Detection Rate`[[1]], tolerance = 1e-5)
expect_equal(detection_rate, 0.2333333, tolerance = 1e-5)
detection_prevalence <- (TP + FP) / (TP + TN + FP + FN)
expect_equal(detection_prevalence, conf_mat$`Detection Prevalence`[[1]], tolerance = 1e-5)
expect_equal(detection_prevalence, 0.3, tolerance = 1e-5)
prevalence <- (TP + FN) / (TP + TN + FP + FN)
expect_equal(prevalence, conf_mat$Prevalence[[1]], tolerance = 1e-5)
expect_equal(prevalence, 0.4, tolerance = 1e-5)
fnr <- FN / (FN + TP)
expect_equal(fnr, conf_mat$`False Neg Rate`[[1]], tolerance = 1e-5)
expect_equal(fnr, 0.4166667, tolerance = 1e-5)
fpr <- FP / (TN + FP)
expect_equal(fpr, conf_mat$`False Pos Rate`[[1]], tolerance = 1e-5)
expect_equal(fpr, 0.1111111, tolerance = 1e-5)
fdr <- FP / (FP + TP)
expect_equal(fdr, conf_mat$`False Discovery Rate`[[1]], tolerance = 1e-5)
expect_equal(fdr, 0.22222, tolerance = 1e-5)
f_omission_rate <- FN / (FN + TN)
expect_equal(f_omission_rate, conf_mat$`False Omission Rate`[[1]], tolerance = 1e-5)
expect_equal(f_omission_rate, 0.2380952, tolerance = 1e-5)
threat_score <- TP / (TP + FN + FP)
expect_equal(threat_score, conf_mat$`Threat Score`[[1]], tolerance = 1e-5)
expect_equal(threat_score, 0.5, tolerance = 1e-5)
# Test that MCC does not care about what class if positive
expect_equal(
mcc(list("TP" = TP, "FP" = FP, "FN" = FN, "TN" = TN)),
mcc(list("TP" = TN, "FP" = FN, "FN" = FP, "TN" = TP))
)
# Multiclass MCC
xpectr::set_test_seed(1)
mc_preds <- factor(sample(c(1,2,3,4), 100, replace = TRUE))
mc_targs_random <- factor(sample(c(1,2,3,4), 100, replace = TRUE))
mc_targs_good <- factor(ifelse(runif(100) < 0.7, mc_preds, mc_targs_random))
mcc_cmf <- confusion_matrix(targets = mc_targs_good, predictions = mc_preds, do_one_vs_all = FALSE)
# yardstick_mcc <- yardstick::mcc_vec(mc_targs_good, mc_preds) # 0.759631087897275
expect_equal(mcc_cmf$MCC, 0.759631087897275, tolerance = 1e-8)
mcc_cmf <- confusion_matrix(targets = mc_targs_random, predictions = mc_preds, do_one_vs_all = FALSE)
# yardstick_mcc <- yardstick::mcc_vec(mc_targs_random, mc_preds) # 0.0153721822602552
expect_equal(mcc_cmf$MCC, 0.0153721822602552, tolerance = 1e-8)
})
test_that("evaluate_residuals() metrics work", {
# skip_test_if_old_R_version()
# Normal distribution
xpectr::set_test_seed(6)
targets <- rnorm(100)
preds <- rnorm(100)
df <- data.frame(t = targets, p = preds)
results <- evaluate_residuals(df, prediction_col = "p", target_col = "t", metrics = "all")
## Testing 'results' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(results),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
results[["RMSE"]],
1.23924,
tolerance = 1e-4)
expect_equal(
results[["MAE"]],
0.98881,
tolerance = 1e-4)
expect_equal(
results[["NRMSE(RNG)"]],
0.27174,
tolerance = 1e-4)
expect_equal(
results[["NRMSE(IQR)"]],
0.96402,
tolerance = 1e-4)
expect_equal(
results[["NRMSE(STD)"]],
1.1991,
tolerance = 1e-4)
expect_equal(
results[["NRMSE(AVG)"]],
-121.41375,
tolerance = 1e-4)
expect_equal(
results[["RMSLE"]],
NaN,
tolerance = 1e-4)
expect_equal(
results[["MALE"]],
NaN,
tolerance = 1e-4)
expect_equal(
results[["RAE"]],
1.20507,
tolerance = 1e-4)
expect_equal(
results[["RSE"]],
1.45236,
tolerance = 1e-4)
expect_equal(
results[["RRSE"]],
1.20514,
tolerance = 1e-4)
expect_equal(
results[["MAPE"]],
4.7236,
tolerance = 1e-4)
expect_equal(
results[["MSE"]],
1.53572,
tolerance = 1e-4)
expect_equal(
results[["TAE"]],
98.88096,
tolerance = 1e-4)
expect_equal(
results[["TSE"]],
153.5717,
tolerance = 1e-4)
# Testing column names
expect_equal(
names(results),
c("RMSE", "MAE", "NRMSE(RNG)", "NRMSE(IQR)", "NRMSE(STD)", "NRMSE(AVG)",
"RSE", "RRSE", "RAE", "RMSLE", "MALE", "MAPE", "MSE", "TAE",
"TSE"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(results),
c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(results),
c("double", "double", "double", "double", "double", "double", "double",
"double", "double", "double", "double", "double", "double",
"double", "double"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(results),
c(1L, 15L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(results)),
character(0),
fixed = TRUE)
## Finished testing 'results' ####
# Manual calculation
resids <- function(targets, preds, log=FALSE){
if (isTRUE(log)) xpectr::suppress_mw(err <- log(preds + 1) - log(targets + 1))
else err <- preds - targets
err
}
centered_targets <- function(targets){
targets - mean(targets)
}
rmse_ <- function(targets, preds, log=FALSE) {
err <- resids(targets, preds, log)
sqrt(mean(err^2))
}
mae_ <- function(targets, preds, log=FALSE) {
err <- resids(targets, preds, log)
mean(abs(err))
}
nrmse_ <- function(targets, preds, by){
rms <- rmse_(targets, preds)
if (by == "iqr") div <- IQR(targets)
else if (by == "sd") div <- sd(targets)
else if (by == "avg") div <- mean(targets)
else if (by == "rng") div <- max(targets)-min(targets)
rms / div
}
rae_ <- function(targets, preds){
err <- sum(abs(resids(targets, preds)))
cent <- sum(abs(centered_targets(targets)))
err/cent
}
rse_ <- function(targets, preds){
err <- sum(resids(targets, preds)^2)
cent <- sum(centered_targets(targets)^2)
err/cent
}
mape_ <- function(targets, preds){
err <- resids(targets, preds)
mean(abs(err/targets))
}
# RMSE
expect_equal(results$RMSE, rmse_(targets, preds), tolerance = 1e-3)
expect_equal(results$RMSE, 1.23924, tolerance = 1e-3)
# RMSLE
expect_equal(results$RMSLE, rmse_(targets, preds, log = TRUE), tolerance = 1e-3)
expect_equal(results$RMSLE, NaN, tolerance = 1e-3)
# MAE
expect_equal(results$MAE, mae_(targets, preds), tolerance = 1e-3)
expect_equal(results$MAE, 0.9888096, tolerance = 1e-3)
# MALE
expect_equal(results$MALE, mae_(targets, preds, log = TRUE), tolerance = 1e-3)
expect_equal(results$MALE, NaN, tolerance = 1e-3)
# NRMSE
expect_equal(results$`NRMSE(RNG)`, nrmse_(targets, preds, by = "rng"), tolerance = 1e-3)
expect_equal(results$`NRMSE(RNG)`, 0.271736645098678, tolerance = 1e-3)
expect_equal(results$`NRMSE(IQR)`, nrmse_(targets, preds, by = "iqr"), tolerance = 1e-3)
expect_equal(results$`NRMSE(IQR)`, 0.964022899327126, tolerance = 1e-3)
expect_equal(results$`NRMSE(STD)`, nrmse_(targets, preds, by = "sd"), tolerance = 1e-3)
expect_equal(results$`NRMSE(STD)`, 1.19909776380955, tolerance = 1e-3)
expect_equal(results$`NRMSE(AVG)`, nrmse_(targets, preds, by = "avg"), tolerance = 1e-3)
expect_equal(results$`NRMSE(AVG)`, -121.413747175841, tolerance = 1e-3)
# RAE
expect_equal(results$RAE, rae_(targets, preds), tolerance = 1e-3)
expect_equal(results$RAE, 1.2050715889456, tolerance = 1e-3)
# RSE
expect_equal(results$RSE, rse_(targets, preds), tolerance = 1e-3)
expect_equal(results$RSE, 1.45235903754855, tolerance = 1e-3)
# RRSE
expect_equal(results$RRSE, sqrt(rse_(targets, preds)), tolerance = 1e-3)
expect_equal(results$RRSE, 1.20513859682136, tolerance = 1e-3)
# MAPE
expect_equal(results$MAPE, mape_(targets, preds), tolerance = 1e-3)
expect_equal(results$MAPE, 4.72360030788065, tolerance = 1e-3)
# MSE
expect_equal(results$MSE, rmse_(targets, preds)^2, tolerance = 1e-3)
expect_equal(results$MSE, 1.53571701341794, tolerance = 1e-3)
# TAE
expect_equal(results$TAE, sum(abs(resids(targets, preds))), tolerance = 1e-3)
expect_equal(results$TAE, 98.880955884436, tolerance = 1e-3)
# TSE
expect_equal(results$TSE, sum(resids(targets, preds)^2), tolerance = 1e-3)
expect_equal(results$TSE, 153.571701341794, tolerance = 1e-3)
# Uniform distribution
xpectr::set_test_seed(9)
targets <- runif(100, min = 45, max = 97)
preds <- runif(100, min = 54, max = 120)
df <- data.frame(t = targets, p = preds)
results <- evaluate_residuals(df, prediction_col = "p",
target_col = "t", metrics = "all")
# RMSE
expect_equal(results$RMSE, rmse_(targets, preds), tolerance = 1e-3)
expect_equal(results$RMSE, 30.2487016310356, tolerance = 1e-3)
# RMSLE
expect_equal(results$RMSLE, rmse_(targets, preds, log = TRUE), tolerance = 1e-3)
expect_equal(results$RMSLE, 0.381933438597387, tolerance = 1e-3)
# MAE
expect_equal(results$MAE, mae_(targets, preds), tolerance = 1e-3)
expect_equal(results$MAE, 24.3477034755331, tolerance = 1e-3)
# MALE
expect_equal(results$MALE, mae_(targets, preds, log = TRUE), tolerance = 1e-3)
expect_equal(results$MALE, 0.309460458578487, tolerance = 1e-3)
# NRMSE
expect_equal(results$`NRMSE(RNG)`, nrmse_(targets, preds, by = "rng"), tolerance = 1e-3)
expect_equal(results$`NRMSE(RNG)`, 0.585117778308063, tolerance = 1e-3)
expect_equal(results$`NRMSE(IQR)`, nrmse_(targets, preds, by = "iqr"), tolerance = 1e-3)
expect_equal(results$`NRMSE(IQR)`, 1.05622503346585, tolerance = 1e-3)
expect_equal(results$`NRMSE(STD)`, nrmse_(targets, preds, by = "sd"), tolerance = 1e-3)
expect_equal(results$`NRMSE(STD)`, 1.90845876022934, tolerance = 1e-3)
expect_equal(results$`NRMSE(AVG)`, nrmse_(targets, preds, by = "avg"), tolerance = 1e-3)
expect_equal(results$`NRMSE(AVG)`, 0.428886643444635, tolerance = 1e-3)
# RAE
expect_equal(results$RAE, rae_(targets, preds), tolerance = 1e-3)
expect_equal(results$RAE, 1.76907453491764, tolerance = 1e-3)
# RSE
expect_equal(results$RSE, rse_(targets, preds), tolerance = 1e-3)
expect_equal(results$RSE, 3.67900488837992, tolerance = 1e-3)
# RRSE
expect_equal(results$RRSE, sqrt(rse_(targets, preds)), tolerance = 1e-3)
expect_equal(results$RRSE, 1.9180732228932, tolerance = 1e-3)
# MAPE
expect_equal(results$MAPE, mape_(targets, preds), tolerance = 1e-3)
expect_equal(results$MAPE, 0.390727375837037, tolerance = 1e-3)
# MSE
expect_equal(results$MSE, rmse_(targets, preds)^2, tolerance = 1e-3)
expect_equal(results$MSE, 914.983950363419, tolerance = 1e-3)
# TAE
expect_equal(results$TAE, sum(abs(resids(targets, preds))), tolerance = 1e-3)
expect_equal(results$TAE, 2434.77034755331, tolerance = 1e-3)
# TSE
expect_equal(results$TSE, sum(resids(targets, preds)^2), tolerance = 1e-3)
expect_equal(results$TSE, 91498.3950363419, tolerance = 1e-3)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.