Nothing
# data -----------------------------------------------------------------
# cf dataset anxiety from irr-package
anxiety <- data.frame(rater1 = c(3L, 3L, 3L, 4L, 5L, 5L, 2L, 3L, 5L, 2L, 2L, 6L, 1L, 5L, 2L, 2L, 1L, 2L, 4L, 3L),
rater2 = c(3L, 6L, 4L, 6L, 2L, 4L, 2L, 4L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 1L, 3L, 3L, 4L),
rater3 = c(2L, 1L, 4L, 4L, 3L, 2L, 1L, 6L, 1L, 1L, 1L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 2L, 2L))
# example data for 3-level categorical UICC rating
# gold standard in first column with no NA
# ordinary raters have many NAs
uicc <- structure(list(
GS = c("I", "II", "II", "III", "I", "III", "I",
"I", "III", "II", "III", "II", "I", "II", "II", "III", "III",
"I", "I", "I", "III", "I", "III", "III", "III", "II", "III",
"III", "I", "I", "I", "I", "III", "I", "I", "I", "I", "II", "II",
"I", "I", "III", "III", "II", "I", "I", "III", "II", "III", "I",
"I"),
rater1 = c("III", "II", "II", "II", "III", "III", NA, "I",
NA, "III", NA, "I", "I", "III", NA, "III", "III", "I", "I", "I",
NA, "I", NA, NA, NA, NA, NA, "I", "I", "II", NA, "I", "II", NA,
"II", "III", "I", "II", "III", "II", NA, "II", NA, NA, "I", "I",
"III", NA, "III", NA, "I"),
rater2 = c("II", "II", "II", "II",
NA, "III", "II", NA, "II", "II", "III", "I", "II", NA, NA, "III",
"I", "I", "I", "I", NA, "III", "III", "II", "II", NA, "I", "II",
"I", "II", "I", "I", "I", NA, NA, "II", "I", "II", NA, "II",
"I", NA, "II", "II", "II", "I", NA, "III", NA, "II", "II"),
rater3 = c(NA,
"II", "II", "I", "III", "III", "I", "III", "I", "III", "III",
"I", "II", "III", "III", "III", "I", "I", "II", "II", "III",
"III", "III", "II", "III", "III", "I", NA, "I", "I", NA, "I",
"I", NA, "III", "III", "II", NA, "III", "III", "III", "III",
"III", "III", NA, NA, "II", "III", "III", "III", "II"),
rater4 = c(NA,
NA, NA, NA, NA, NA, "I", NA, NA, "II", NA, "I", NA, NA, NA, "I",
NA, NA, "I", "I", "II", "III", "III", "II", "II", NA, "I", "II",
"I", "I", "I", "I", "II", NA, NA, "II", "I", "II", "III", "I",
"II", "III", "II", "III", "II", "I", "III", "II", "III", NA,
"I"),
rater5 = c("I", "II", "I", "I", NA, NA, "I", NA, NA, "III",
NA, "I", "II", "III", NA, "I", "I", "I", "I", "II", NA, "III",
NA, "III", NA, "II", "III", "II", "I", "I", NA, "I", "I", "I",
"I", NA, "II", "II", "II", "II", "II", "III", "II", "II", "I",
"II", "II", "II", "III", "II", "II"),
rater6 = c(NA, "II", "II",
"II", NA, "III", "II", "I", "II", "III", "III", "I", "I", "III",
"III", NA, "I", "I", "II", "I", NA, "III", "III", NA, NA, "II",
"I", "II", "I", "II", "III", "II", "II", "I", NA, "II", "II",
"II", "III", "II", "II", "III", NA, "II", "I", "I", "III", "II",
"III", NA, "II"),
rater7 = c(NA, "II", "II", "II", "III", "III",
"III", "I", "I", "II", "II", "II", "II", "III", "III", "III",
NA, "II", "I", "II", NA, "III", "III", "II", "II", "III", "III",
NA, "I", "II", "III", "I", "II", NA, "III", "III", "I", "II",
"III", "III", "III", "III", "III", "III", "I", "I", "II", "III",
"III", NA, "I"),
rater8 = c("I", NA, "I", NA, "II", "III", "III",
"III", "I", "III", "III", "I", NA, "III", "III", NA, "III", NA,
NA, NA, "III", "III", "III", NA, NA, "III", "III", NA, NA, NA,
NA, NA, "I", "III", "III", "III", NA, NA, "III", "III", NA, "III",
"III", "III", NA, "III", "III", "III", "III", "III", NA),
rater9 = c(NA,
NA, "I", NA, NA, NA, "II", NA, NA, "III", "III", NA, NA, "III",
"II", "III", "I", "II", "I", "III", "III", "III", "III", "II",
"II", "III", "III", "III", "I", "I", NA, NA, "I", "I", "III",
NA, "II", "I", "III", "II", "III", "III", NA, NA, "II", NA, "III",
NA, "III", "II", "II"),
rater10 = c("II", "I", "I", "II", "III",
"III", "III", "II", "I", NA, "I", NA, "I", NA, NA, "II", NA,
"I", "I", NA, NA, "III", NA, "II", "I", NA, "I", "I", "II", "I",
NA, "I", NA, "III", NA, "III", "I", NA, "III", "II", NA, "III",
NA, NA, "I", NA, "III", "I", NA, "III", "I"),
rater11 = c("II",
"I", "II", "I", "I", "III", NA, NA, "III", NA, NA, "I", "I",
"III", "II", "III", NA, "III", NA, "I", "III", NA, "III", "II",
"I", "II", "I", "I", "I", "I", NA, "I", NA, NA, NA, "III", "I",
"II", "III", "III", "II", "III", NA, NA, "II", "I", NA, NA, NA,
NA, NA),
rater12 = c(NA, NA, NA, "III", "III", "III", "III",
NA, "III", "III", "III", NA, "I", "III", NA, "III", NA, "I",
NA, "III", "III", "III", "III", "III", "III", "II", "III", "II",
"I", "I", NA, NA, NA, "III", NA, "III", "II", "III", "III", NA,
NA, "III", "III", NA, NA, "II", "III", NA, "III", NA, "II"),
rater13 = c("I", "I", "I", "I", "III", "III", NA, NA, "II",
"III", "I", "I", NA, NA, "III", "II", "I", "III", "I", "I",
"III", NA, NA, NA, NA, NA, NA, NA, NA, "I", NA, "I", "II",
"II", NA, "III", "I", NA, "III", NA, "III", "II", "III",
NA, "I", "I", "III", "III", NA, "II", "I"),
rater14 = c("II", "II", "II", "II", "II", "II", "II", "II", "II", NA, "II",
"II", "II", "II", NA, "II", "I", "II", "II", "II", "II",
NA, NA, "II", "II", "II", "I", "I", "I", "II", "I", "I",
"II", "I", "I", "II", "II", "II", "II", "I", "II", "III",
"II", "I", "II", NA, "II", "II", NA, NA, "II"),
rater15 = c("II", "II", "II", "II", NA, NA, "I", "II", "II",
"II", "III", "I", "II", "III", "III", "III", "I", "II", "I",
"III", NA, "III", "III", "III", "II", "II", "I", "I", "I",
"I", "I", "I", "II", "III", "I", NA, NA, "II", "III", "II",
"II", "III", "II", "II", "I", "I", "II", "II", NA, "II",
"I")),
row.names = c("sj1", "sj2", "sj3", "sj4",
"sj5", "sj6", "sj7", "sj8", "sj9", "sj10", "sj11",
"sj12", "sj13", "sj14", "sj15", "sj16", "sj17", "sj18",
"sj19", "sj20", "sj21", "sj22", "sj23", "sj24", "sj25",
"sj26", "sj27", "sj28", "sj29", "sj30", "sj31", "sj32",
"sj33", "sj34", "sj35", "sj36", "sj37", "sj38", "sj39",
"sj40", "sj41", "sj42", "sj43", "sj44", "sj45", "sj46",
"sj47", "sj48", "sj49", "sj50", "sj51"),
class = "data.frame")
# cf Fleiss, Cohen, Everitt "large sample std err of kappa", 1969
# Table1 (and also Table2)
FCEdat <- data.frame(n = as.integer(200 * c(.53, .11, .01,
.05, .14, .06,
.02, .05, .03)),
R1 = rep.int(c("I", "II", "III"), times = 3),
R2 = rep(c("I", "II", "III"), each = 3)) |>
tidyr::uncount(weights = n)
test_that("ratings data sets", {
expect_true(is.matrix(diagnoses))
expect_type(diagnoses, type = "character")
expect_s3_class(anxiety, class = "data.frame")
expect_s3_class(uicc, class = "data.frame")
expect_s3_class(FCEdat, class = "data.frame")
expect_identical(NROW(FCEdat), expected = 200L)
expect_identical(NCOL(FCEdat), expected = 2L)
# 1st column is gold standard
expect_identical(names(uicc)[[1L]], expected = "GS")
expect_identical(sum(is.na(uicc$GS)), expected = 0L)
})
# kappa -------------------------------------------------------------------
test_that("kappa2 implementation", {
# select two columns manually
k2_diag23 <- kappa2(diagnoses[, 2:3])
expect_type(k2_diag23, type = "list")
expect_named(k2_diag23,
expected = c("method", "subjects", "raters", "categories",
"robust", "agreem", "value", "se"))
expect_equal(k2_diag23$value,
expected = irr::kappa2(diagnoses[,2:3])$value)
# all sets of two columns
purrr::walk(.x = utils::combn(NCOL(diagnoses), 2L, simplify = FALSE),
.f = ~ expect_equal(kappa2(diagnoses[, .x])$value,
expected = irr::kappa2(diagnoses[, .x])$value))
# cf Fleiss, Cohen, Everitt "large sample std err of kappa", 1969
# unweighted kappa (after Table2)
k2_FCE <- kappa2(FCEdat)
expect_equal(k2_FCE$value, expected = .429, tolerance = 1e-3)
expect_equal(k2_FCE$se^2, expected = .002885, tolerance = 1e-4)
})
test_that("kappam_fleiss", {
diag_kf <- kappam_fleiss(ratings = diagnoses, variant = "fleiss")
# actually, Conger does not fit here to the diagnoses dataset
#+as we have no select set of six fixed raters!
diag_kc <- kappam_fleiss(ratings = diagnoses, variant = "conger")
K_NAMES <- c("method", "subjects", "raters", "categories", "ratings", "balanced",
"agreem", "value0", "value", "se_j", "stat.name", "statistic", "p.value")
expect_type(diag_kf, "list")
expect_named(diag_kf, expected = c(K_NAMES, "se0", "statistic0", "p.value0"))
expect_equal(diag_kf$value0, expected = 0.43024, tolerance = 1e-4) # from irr::kappam.fleiss
expect_type(diag_kc, "list")
expect_named(diag_kc, expected = K_NAMES)
# diagnoses with un-permuated rater1-rater6: 0.44181
expect_equal(diag_kc$value0, expected = 0.43112, tolerance = 1e-4) # from irr::kappam.fleiss
# Fleiss, "Stat. methods for rates and prop.", 3rd ed, section 18.3, table 18.8:
# five ratings on each of ten subjects into one of three categories
# we assume that we have identical raters
fleissmEx18.8 <- list(
nSj = 10,
nRt = 5)
fleissmEx18.8$tab_cnt_sj <- matrix(c(1, 4, 0,
2, 0, 3,
0, 0, 5,
4, 0, 1,
3, 0, 2,
1, 4, 0,
5, 0, 0,
0, 4, 1,
1, 0, 4,
3, 0, 2),
nrow = fleissmEx18.8$nSj, byrow = TRUE,
dimnames = list(subj = paste0("S", seq_len(fleissmEx18.8$nSj)),
cat = LETTERS[1:3]))
fleissmEx18.8$prob_tab <- colSums(fleissmEx18.8$tab_cnt_sj) / sum(fleissmEx18.8$tab_cnt_sj)
fleissmEx18.8$rat <- fleissmEx18.8$tab_cnt_sj |>
tibble::as_tibble(rownames = "sjID") |>
tidyr::pivot_longer(cols = c(A, B, C), names_to = "Category", values_to = "Cnt") |>
tidyr::uncount(weights = Cnt) |>
# optionally, permute rows within sjID to have exchangeable raters
#+with no permutation it is ordered and first rater tends to give A etc
dplyr::mutate(rID = dplyr::row_number(), .by = sjID) |>
tidyr::pivot_wider(id_cols = sjID,
names_from = rID, names_prefix = "R",
values_from = Category) |>
tibble::column_to_rownames(var = "sjID")
fleissmEx18.8$kappa <- kappam_fleiss(ratings = fleissmEx18.8$rat, detail = TRUE)
#
with(fleissmEx18.8, {
expect_identical(kappa$categories, 3L)
expect_equal(kappa$value0, 0.42, tolerance = .01) #value given by Fleiss
expect_equal(kappa$value0,
1 - (nSj * nRt^2 - sum(tab_cnt_sj^2)) / (nSj * nRt * (nRt-1) * crossprod(prob_tab, 1-prob_tab)[1]))
expect_equal(kappa$se0, 0.072, tolerance = .01)
expect_equal(kappa$statistic0, 5.83, tolerance = .01)
expect_identical(colnames(kappa$detail), expected = c("kappa_j", "se0_j", "z_j", "p.value_j"))
expect_equal(kappa$detail[, "kappa_j"], c(A=.29, B=.67, C=.35), tolerance = .01)
expect_equal(kappa$detail[, "se0_j"], rep(0.10, 3), ignore_attr = TRUE)
})
# cf Fleiss (2003), sec 18.3, table 18.7
fleissmEx18.7 <- matrix(c(2, 2,
2, 0,
3, 2,
4, 3,
3, 3,
4, 1,
3, 0,
5, 0,
2, 0,
4, 4,
5, 5,
3, 3,
4, 4,
4, 3,
2, 0,
2, 2,
3, 1,
2, 1,
4, 1,
5, 4,
3, 2,
4, 0,
3, 0,
3, 3,
2, 2), ncol = 2, byrow = TRUE,
dimnames = list(subj = paste0("S", seq_len(25)),
cnt = c("m_i", "x_i"))) |>
tibble::as_tibble(rownames = "sjID") |>
dplyr::rename(A = x_i) |>
dplyr::mutate(B = m_i - A, m_i = NULL) |>
tidyr::pivot_longer(cols = c(A, B), names_to = "Category", values_to = "Cnt") |>
tidyr::uncount(weights = Cnt) |>
# optionally, permute rows within sjID to have exchangeable raters
#+with no permutation it is ordered and first rater tends to give A etc
dplyr::mutate(rID = dplyr::row_number(), .by = sjID) |>
tidyr::pivot_wider(id_cols = sjID,
names_from = rID, names_prefix = "R",
values_from = Category) |>
tibble::column_to_rownames(var = "sjID")
kfEx18.7 <- kappam_fleiss(ratings = fleissmEx18.7)
kfEx18.7_cong <- kappam_fleiss(ratings = fleissmEx18.7, variant = "cong")
kfEx18.7_rob <- kappam_fleiss(ratings = fleissmEx18.7, variant = "rob")
expect_identical(kfEx18.7$subjects, 25L)
expect_identical(kfEx18.7$raters, 5L)
expect_identical(kfEx18.7$ratings, 81L)
expect_identical(kfEx18.7$categories, 2L)
expect_false(kfEx18.7$balanced)
expect_equal(kfEx18.7$value0, 0.54, tolerance = .05) #cf sect 18.3, p. 613
expect_equal(kfEx18.7$se0, 0.103, tolerance = .01)
expect_equal(kfEx18.7$statistic0, 5.24, tolerance = .05)
# conger reduces the chance agreement a little bit, hence kappa increases
expect_gt(kfEx18.7_cong$value0, kfEx18.7$value0)
# robust assumes smallest possible chance agreement, hence kappa increases
expect_gt(kfEx18.7_rob$value0, kfEx18.7$value0)
})
test_that("kappam_gold", {
# take first rater as reference
kg_anx <- kappam_gold(ratings = anxiety)
expect_identical(kg_anx$subjects, expected = NROW(anxiety))
expect_identical(kg_anx$raters, expected = NCOL(anxiety)-1)
# kappam_gold as average of pairwise Cohen's kappa
expect_equal(kg_anx$value0,
expected = mean(purrr::map_dbl(2L:NCOL(anxiety), ~ kappa2(ratings = anxiety[, c(1L,.x)])$value)))
kg_diag <- kappam_gold(ratings = diagnoses)
expect_identical(kg_diag$subjects, expected = NROW(diagnoses))
expect_identical(kg_diag$raters, expected = NCOL(diagnoses)-1)
# kappam_gold as average of pairwise Cohen's kappa
expect_equal(kg_diag$value0,
expected = mean(purrr::map_dbl(2:NCOL(diagnoses),
~ kappa2(ratings = diagnoses[, c(1L, .x)])$value)))
# copes with NAs
expect_no_error({kg_uicc <- kappam_gold(ratings = uicc)})
expect_type(kg_uicc, type = "list")
expect_named(kg_uicc, expected = c("method",
"subjects", "raters", "categories",
"agreem",
"value0", "value",
"se_j", "conf.level", "ci.lo", "ci.hi", "ci.width"))
expect_identical(kg_uicc$subjects, NROW(uicc))
expect_identical(kg_uicc$raters, NCOL(uicc)-1)
# pairwise Cohen's kappa with gold standard
k2_uicc <- purrr::map(.x = 2L:NCOL(uicc), .f = ~ kappa2(ratings = uicc[, c(1L, .x)]))
expect_identical(purrr::map_dbl(k2_uicc, "subjects"), expected = colSums(!is.na(uicc[,-1L])),
ignore_attr = "names")
expect_equal(kg_uicc$value0,
expected = weighted.mean(x = purrr::map_dbl(k2_uicc, "value"),
w = purrr::map_dbl(k2_uicc, "subjects")))
})
test_that("kappa simulation", {
simK <- simulKappa(nRater = 8, cats = 3, nSubj = 11, mcSim = 9,
# assumed prob for classification by raters
probs = matrix(c(.6, .2, .1, # subjects of cat 1
.3, .4, .3, # subjects of cat 2
.1, .4, .5 # subjects of cat 3
), nrow = 3, byrow = TRUE))
expect_type(simK, "list")
expect_s3_class(simK, class = "data.frame")
expect_named(simK,
expected = c("nRater", "nSubjTotal", "kappam_gold", "ci_lo", "ci_hi", "ci_halfwidth"))
expect_identical(simK$nRater, expected = rep.int(8, times = 9))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.