#' Light's G
#'
#' Calculate Light's G based on Light's 1971 paper. Used to compare multiple raters with standard.
#'
#' @param subject A vector identifying subjects.
#' @param rater A vector identifying raters.
#' @param rating A vector identifying ratings.
#' @param rater.standard A scalar identifying the name of the rater considered "correct".
#' @param alternative The alternative hypothesis to use for the test computation.
#' @param conf.level The confidence level for this test, between 0 and 1.
#'
#' @references
#' Light. Measurements of Response agreement for Qualitative Data: Some Generalizations and Alternatives.
#' Psychological Bulletin. 1971 76-5. pg 365-377
#'
#' @return The results of the statistical test.
#Note: Include standard as a rater in the data and identify it with rater.standard
cor.light.g.onesample <- function(
subject
,rater
,rating
,rater.standard = "Standard"
#,weight = rep(1, length(rating)) -> not used, only 1
,alternative = c("two.sided", "greater", "less")
,conf.level = .95
) {
validate.htest.alternative(alternative = alternative)
#if (any(weight < 0)) {
# weight[which(weight < 0)] <- 0
#}
if (!is.factor(subject)) {
subject <- factor(subject)
}
if (!is.factor(rater)) {
rater <- factor(rater)
}
if (!is.factor(rating)) {
rating <- factor(rating)
}
# m observers assign n subjects among C categories
unique_subjects <- levels(subject)
unique_ratings <- levels(rating)
unique_raters <- levels(rater)
if (!(rater.standard %in% unique_raters)) {
stop("rater.standard not in list of raters. Specify rater.standard to be one of the names in the rater parameter.")
}
#n_ij_p <- responses in the pth contingency table, i,j in 1:C and p in 1:m
n_ij_p <- list() #n_ij_p[[rater]][[row_category]][[col_category]] <- count
for (p in unique_raters) {
if (p != rater.standard) {
n_ij_p[[p]] <- list()
for (i in unique_ratings) {
n_ij_p[[p]][[i]] <- list()
for (j in unique_ratings) {
n_ij_p[[p]][[i]][[j]] <- 0
}
}
}
}
n <- 0
standard <- list() #[[subject]] <- rating
standard.margin <- list() #[[rating]] <- n_i+ row sum
rater.margin <- list() #[[rater]][[rating]] <- n_+j col sum
rater.names <- levels(rater)[rater]
subject.names <- levels(subject)[subject]
rating.names <- levels(rating)[rating]
for (i in unique_ratings) {
standard.margin[[i]] <- 0
}
for (p in unique_raters) {
if (p != rater.standard) {
rater.margin[[p]] <- list()
for (j in unique_ratings) {
rater.margin[[p]][[j]] <- 0
}
}
}
for (idx in 1:length(rater.names)) {
if (rater.names[idx] == rater.standard) {
standard[[subject.names[idx]]] <- rating.names[idx]
standard.margin[[rating.names[idx]]] <- standard.margin[[rating.names[idx]]] + 1
n <- n + 1
} else {
rater.margin[[rater.names[idx]]][[rating.names[idx]]] <- rater.margin[[rater.names[idx]]][[rating.names[idx]]] + 1
}
}
for (idx in 1:length(rater.names)) {
tmp.sn <- subject.names[idx]
p <- rater.names[idx]
j <- rating.names[idx]
i <- standard[[tmp.sn]]
n_ij_p[[p]][[i]][[j]] <- n_ij_p[[p]][[i]][[j]] + 1
}
#row variable is always standard, col variable is always a rater
#notation n_i+ is row margin, n_+j is column margin
#G = (t_m - E(t_m))/sqrt(xi_t_m) ~ standard normal [Eq 11]
#t_m - across all raters, how many total agreements with standard, eq 11a
#E(t_m) - expected value for total agreements with standard, eq 11b
t_m <- 0
E_t_m <- 0
xi_t_m_3 <- 0 #convenient to calculate with this loop
xi_t_m_4 <- 0 #convenient to calculate with this loop
for (i in unique_ratings) {
for (p in unique_raters) {
if (p != rater.standard) {
t_m <- t_m + n_ij_p[[p]][[i]][[i]]
}
}
E_t_m <- E_t_m + standard.margin[[i]] * sum(rmnames(unlist(lapply(rater.margin, FUN = function(margin) {
margin[[i]]
}))))
xi_t_m_3 <- xi_t_m_3 + (standard.margin[[i]] * standard.margin[[i]]) * sum(rmnames(unlist(lapply(rater.margin, FUN = function(margin) {
margin[[i]]
}))))
xi_t_m_4 <- xi_t_m_4 + standard.margin[[i]] * sum(rmnames(unlist(lapply(rater.margin, FUN = function(margin) {
margin[[i]] * margin[[i]]
}))))
}
E_t_m <- E_t_m / n
xi_t_m_3 <- xi_t_m_3 /(n*(n-1))
xi_t_m_4 <- xi_t_m_4 /(n*(n-1))
#xi_t_m - variance of t_m, eq 11c
xi_t_m_1 <- (n/(n-1)) * E_t_m
xi_t_m_Z <- 0
#outer loop - standard margin
#inner loop - rater margin
#lappy/sum over raters for product with inner, then outer loop
for (i in unique_ratings) {
add_outer <- 0
for (j in unique_ratings) {
add_outer <- add_outer + standard.margin[[j]] * sum(rmnames(unlist( lapply(rater.margin, FUN = function(p) {
p[[i]] * p[[j]]
}))))
}
add_outer <- add_outer * standard.margin[[i]]
xi_t_m_Z <- xi_t_m_Z + add_outer
}
xi_t_m_2 <- xi_t_m_Z / ( (n^2) * (n-1))
xi_t_m <- xi_t_m_1 + xi_t_m_2 - xi_t_m_3 - xi_t_m_4
#Delete/rename most below this line
#n for variance calculation, all subjects should have equal number of raters, but...
se.G <- sqrt(xi_t_m)
G <- (t_m-E_t_m)/se.G
#SE Kappa and CI
z <- G
cv <- qnorm(conf.level+(1-conf.level)/2)
ci.add <- cv*se.G #+ E_t_m #note: not 100% sure about adding back expected value
ci.lower <- NA# G-ci.add
ci.upper <- NA# G+ci.add
p.value <- if (alternative[1] == "two.sided") {
tmp<-pnorm(z)
min(tmp,1-tmp)*2
} else if (alternative[1] == "greater") {
pnorm(z,lower.tail = FALSE)
} else if (alternative[1] == "less") {
pnorm(z,lower.tail = TRUE)
} else {
NA
}
retval<-list(data.name = "standard, subjects, raters, and ratings",
statistic = z,
estimate = c(G = G
,se.G = se.G
),
parameter = 0,
p.value = p.value,
null.value = 0,
alternative = alternative[1],
method = "Light's G (1971)",
conf.int = c(ci.lower, ci.upper)
)
#names(retval$estimate) <- c("sample mean")
names(retval$statistic) <- "z"
names(retval$null.value) <- "G"
names(retval$parameter) <- "null hypothesis G"
attr(retval$conf.int, "conf.level") <- conf.level
class(retval)<-"htest"
retval
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.