R/agree.coeff2.r

Defines functions kappa2.table scott2.table gwet.ac1.table bp2.table krippen2.table pa2.table

Documented in bp2.table gwet.ac1.table kappa2.table krippen2.table pa2.table scott2.table

#						  AGREE.COEFF2.R
#					     (August 25, 2019)
#Description: This script file contains a series of R functions for computing various agreement coefficients
#		  for 2 raters when the input data file is in the form of 2x2 contingency table showing the distribution
#             of subjects by rater, and by category.
#Author: Kilem L. Gwet, Ph.D.
#

#====================================================================================
#kappa2.table: Cohen's kappa (Cohen(1960)) coefficient and its standard error for 2 raters when input dataset is a contingency table
#-------------
#The input data "ratings" is a qxq contingency table showing the distribution of
#subjects by rater, when q is the number of categories.
#======================================================================================
#' Kappa coefficient for 2 raters
#' @param ratings A square or contingency table of ratings (assume no missing ratings). See the 2 datasets 
#' "cont3x3abstractors" and "cont4x4diagnosis" that come with this package as examples.
#' @param weights An optional matrix that contains the weights used in the weighted analysis.
#' @param conflev An optional confidence level for confidence intervals. The default value is the traditional 0.95.
#' @param N An optional population size.  The default value is infinity.
#' @importFrom stats pt qt
#' @return A data frame containing the following 5 variables: coeff.name coeff.val coeff.se coeff.ci coeff.pval.
#' @examples 
#' #The dataset "cont3x3abstractors" comes with this package. Analyze it as follows:
#' kappa2.table(cont3x3abstractors) #Yields Cohen's kappa along with precision measures
#' kappa <- kappa2.table(cont3x3abstractors)$coeff.val #Yields Cohen's kappa alone.
#' kappa
#' q <- nrow(cont3x3abstractors) #Number of categories
#' kappa2.table(cont3x3abstractors,weights = quadratic.weights(1:q))#weighted kappa/quadratic wts
#' @export
kappa2.table <- function(ratings,weights = identity.weights(1:ncol(ratings)),conflev=0.95,N=Inf){
  ratings <- as.matrix(ratings)
  if(dim(ratings)[1] != dim(ratings)[2]){
  	stop('The contingency table should have the same number of rows and columns!')
  }
  if (ncol(ratings) != ncol(weights)){
    stop('The weight matrix has fewer columns than the contingency table. Please revise your input weights!')
  }
  n <- sum(ratings) # number of subjects
  f <- n/N # finite population correction
  q <- ncol(ratings) # number of categories
  pa <- sum(weights * ratings/n) # percent agreement
  pk. <- (ratings%*%rep(1,q))/n
  p.l <- t((t(rep(1,q))%*%ratings)/n)
  pe <- sum(weights*(pk.%*%t(p.l)))
  kappa <- (pa - pe)/(1 - pe) # weighted kappa
  
  # 2 raters special case variance
  
  pkl <- ratings/n
  pb.k <- weights %*% p.l
  pbl. <- t(weights) %*% pk.
  sum1 <- 0
  for(k in 1:q){
    for(l in 1:q){
      sum1 <- sum1 + pkl[k,l]* (weights[k,l]-(1-kappa)*(pb.k[k] + pbl.[l]))^2
    }
  }
  var.kappa <- ((1-f)/(n*(1-pe)^2)) * (sum1 - (pa-2*(1-kappa)*pe)^2)
  stderr <- sqrt(var.kappa)# kappa standard error
  p.value <- NA;lcb <- NA;ucb <- NA
  if (n>=2){
    p.value <- 2*(1-pt(kappa/stderr,n-1))
    lcb <- kappa - stderr*qt(1-(1-conflev)/2,n-1) # lower confidence bound
    ucb <- min(1,kappa + stderr*qt(1-(1-conflev)/2,n-1)) # upper confidence bound
  }
  conf.int <- paste0("(",round(lcb,3),",",round(ucb,3),")")
  coeff.name <- "Cohen's Kappa"
  coeff.val <- kappa
  coeff.se <- stderr
  coeff.ci <- conf.int
  coeff.pval <- format(p.value,digits=4,nsmall=3,scientific = TRUE)
  return(data.frame(coeff.name,coeff.val,coeff.se,coeff.ci,coeff.pval))
}

#scott2.table: Scott's pi coefficient (Scott(1955)) and its standard error for 2 raters when input dataset is a contingency table
#-------------
#The input data "ratings" is a qxq contingency table showing the distribution of
#subjects by rater, when q is the number of categories.
#======================================================================================
#' Scott's coefficient for 2 raters
#' @param ratings A square table of ratings (assume no missing ratings).
#' @param weights An optional matrix that contains the weights used in the weighted analysis. By default, this parameter contaings the identity weight matrix, which leads to the unweighted analysis.
#' @param conflev An optional parameter that specifies the confidence level used for constructing confidence intervals. By default the function assumes the standard value of 95\%.
#' @param N An optional parameter representing the finite population size if any. It is used to perform the finite population correction to the standard error. It's default value is infinity. 
#' @return A data frame containing the following 5 variables: coeff.name coeff.val coeff.se coeff.ci coeff.pval.
#' @examples 
#' #The dataset "cont3x3abstractors" comes with this package. Analyze it as follows:
#' scott2.table(cont3x3abstractors) #Yields Scott's Pi coefficient along with precision measures
#' scott <- scott2.table(cont3x3abstractors)$coeff.val #Yields Scott's coefficient alone.
#' scott
#' q <- nrow(cont3x3abstractors) #Number of categories
#' scott2.table(cont3x3abstractors,weights = quadratic.weights(1:q)) #weighted Scott's coefficient
#' @export
scott2.table <- function(ratings,weights=identity.weights(1:ncol(ratings)),conflev=0.95,N=Inf){
  ratings <- as.matrix(ratings)
  if(dim(ratings)[1] != dim(ratings)[2]){
  	stop('The contingency table should have the same number of rows and columns!')
  }
  if (ncol(ratings) != ncol(weights)){
    stop('The weight matrix has fewer columns than the contingency table. Please revise your input weights!')
  }
  n <- sum(ratings) # number of subjects
  f <- n/N # finite population correction
  q <- ncol(ratings) # number of categories
  pa <- sum(weights * ratings/n) # percent agreement

  pk. <- (ratings%*%rep(1,q))/n
  p.l <- t((t(rep(1,q))%*%ratings)/n)
  pi.k <- (pk.+p.l)/2
  pe <- sum(weights*(pi.k%*%t(pi.k)))
  scott <- (pa - pe)/(1 - pe) # weighted scott's pi coefficint

	# 2 raters special case variance

  pkl <- ratings/n	     #p_{kl}
  pb.k <- weights %*% p.l    #\ov{p}_{+k}
  pbl. <- t(weights) %*% pk. #\ov{p}_{l+}
  pbk  <- (pb.k + pbl.)/2    #\ov{p}_{k}
  sum1 <- 0
  for(k in 1:q){
  	for(l in 1:q){
  		sum1 <- sum1 + pkl[k,l] * (weights[k,l]-(1-scott)*(pbk[k] + pbk[l]))^2
  	}
  }
  var.scott <- ((1-f)/(n*(1-pe)^2)) * (sum1 - (pa-2*(1-scott)*pe)^2)
  stderr <- sqrt(var.scott)# Scott's standard error
  p.value <- NA;lcb <- NA;ucb <- NA
  if (n>=2){
    p.value <- 2*(1-pt(scott/stderr,n-1))
    lcb <- scott - stderr*qt(1-(1-conflev)/2,n-1) # lower confidence bound
    ucb <- min(1,scott + stderr*qt(1-(1-conflev)/2,n-1)) # upper confidence bound
  } 
  conf.int <- paste0("(",round(lcb,3),",",round(ucb,3),")")
  coeff.name <- "Scott's Pi"
  coeff.val <- scott
  coeff.se <- stderr
  coeff.ci <- conf.int
  coeff.pval <- format(p.value,digits=4,nsmall=3,scientific = TRUE)
  return(data.frame(coeff.name,coeff.val,coeff.se,coeff.ci,coeff.pval))
}

#gwet.ac1.table: Gwet's AC1/Ac2 coefficient (Gwet(2008)) and its standard error for 2 raters when input dataset is a contingency table
#-------------
#The input data "ratings" is a qxq contingency table showing the distribution of
#subjects by rater, when q is the number of categories.
#======================================================================================
#' Gwet's AC1/AC2 coefficient for 2 raters
#' @param ratings A square table of ratings (assume no missing ratings).
#' @param weights An optional matrix that contains the weights used in the weighted analysis. By default, this 
#' parameter contaings the identity weight matrix, which leads to the unweighted analysis.
#' @param conflev An optional parameter that specifies the confidence level used for constructing confidence 
#' intervals. By default the function assumes the standard value of 95\%.
#' @param N An optional parameter representing the finite population size if any. It is used to perform the finite
#' population correction to the standard error. It's default value is infinity.
#' @return A data frame containing the following 5 variables: coeff.name coeff.val coeff.se coeff.ci coeff.pval.
#' @examples 
#' #The dataset "cont3x3abstractors" comes with this package. Analyze it as follows:
#' gwet.ac1.table(cont3x3abstractors) #Yields AC1 along with precision measures
#' ac1 <- gwet.ac1.table(cont3x3abstractors)$coeff.val #Yields AC1 coefficient alone.
#' ac1
#' q <- nrow(cont3x3abstractors) #Number of categories
#' gwet.ac1.table(cont3x3abstractors,weights = quadratic.weights(1:q)) #AC2 with quadratic weights
#' @export
gwet.ac1.table <- function(ratings,weights=identity.weights(1:ncol(ratings)),conflev=0.95,N=Inf){
  ratings <- as.matrix(ratings)
  if(dim(ratings)[1] != dim(ratings)[2]){
  	stop('The contingency table should have the same number of rows and columns!')
  }
  if (ncol(ratings) != ncol(weights)){
    stop('The weight matrix has fewer columns than the contingency table. Please revise your input weights!')
  }
  n <- sum(ratings) # number of subjects
  f <- n/N # finite population correction
  q <- ncol(ratings) # number of categories
  pa <- sum(weights * ratings/n) # percent agreement

  pk. <- (ratings%*%rep(1,q))/n
  p.l <- t((t(rep(1,q))%*%ratings)/n)
  pi.k <- (pk.+p.l)/2
  tw <- sum(weights)
  pe <- tw * sum(pi.k *(1-pi.k))/(q*(q-1))
  gwet.ac1 <- (pa - pe)/(1 - pe) # gwet's ac1/ac2 coefficint

	# calculation of variance - standard error - confidence interval - p-value

  pkl <- ratings/n	     #p_{kl}
  sum1 <- 0
  for(k in 1:q){
  	for(l in 1:q){
  		sum1 <- sum1 + pkl[k,l] * (weights[k,l]-2*(1-gwet.ac1)*tw*(1-(pi.k[k] + pi.k[l])/2)/(q*(q-1)))^2
  	}
  }
  var.gwet <- ((1-f)/(n*(1-pe)^2)) * (sum1 - (pa-2*(1-gwet.ac1)*pe)^2)
  stderr <- sqrt(var.gwet)# ac1's standard error
  p.value <- NA;lcb <- NA;ucb <- NA
  if (n>=2){
    p.value <- 2*(1-pt(gwet.ac1/stderr,n-1))
    lcb <- gwet.ac1 - stderr*qt(1-(1-conflev)/2,n-1) # lower confidence bound
    ucb <- min(1,gwet.ac1 + stderr*qt(1-(1-conflev)/2,n-1)) # upper confidence bound
  }
  conf.int <- paste0("(",round(lcb,3),",",round(ucb,3),")")
  if (sum(weights)==q) coeff.name <- "Gwet's AC1"
  else coeff.name <- "Gwet's AC2"
  coeff.val <- gwet.ac1
  coeff.se <- stderr
  coeff.ci <- conf.int
  coeff.pval <- format(p.value,digits=4,nsmall=3,scientific = TRUE)
  return(data.frame(coeff.name,coeff.val,coeff.se,coeff.ci,coeff.pval))
}
#bp2.table: Brennan-Prediger coefficient (Brennan & Prediger (1981)) and its standard error for 2 raters when input dataset is a contingency table
#-------------
#The input data "ratings" is a qxq contingency table showing the distribution of
#subjects by rater, when q is the number of categories.
#======================================================================================
#' Brenann-Prediger coefficient for 2 raters
#' @param ratings A square table of ratings (assume no missing ratings).
#' @param weights An optional matrix that contains the weights used in the weighted analysis. By default, this 
#' parameter contaings the identity weight matrix, which leads to the unweighted analysis.
#' @param conflev An optional parameter that specifies the confidence level used for constructing confidence 
#' intervals. By default the function assumes the standard value of 95\%.
#' @param N An optional parameter representing the finite population size if any. It is used to perform the finite
#' population correction to the standard error. It's default value is infinity.
#' @examples 
#' #The dataset "cont3x3abstractors" comes with this package. Analyze it as follows:
#' bp2.table(cont3x3abstractors) #Yields Brennan-Prediger's coefficient along with precision measures
#' bp <- bp2.table(cont3x3abstractors)$coeff.val #Yields Brennan-Prediger coefficient alone.
#' bp
#' q <- nrow(cont3x3abstractors) #Number of categories
#' bp2.table(cont3x3abstractors,weights = quadratic.weights(1:q)) #Weighted BP coefficient
#' @return A data frame containing the following 5 variables: coeff.name coeff.val coeff.se coeff.ci coeff.pval.
#' @export
bp2.table <- function(ratings,weights=identity.weights(1:ncol(ratings)),conflev=0.95,N=Inf){
  ratings <- as.matrix(ratings)
  if(dim(ratings)[1] != dim(ratings)[2]){
  	stop('The contingency table should have the same number of rows and columns!')
  }
  if (ncol(ratings) != ncol(weights)){
    stop('The weight matrix has fewer columns than the contingency table. Please revise your input weights!')
  }
  n <- sum(ratings) # number of subjects
  f <- n/N # finite population correction
  q <- ncol(ratings) # number of categories
  pa <- sum(weights * ratings/n) # percent agreement

  tw <- sum(weights)
  pe <- tw/(q^2)
  bp.coeff <- (pa - pe)/(1 - pe) # Brennan-Prediger coefficint

	# calculation of variance - standard error - confidence interval - p-value

  pkl <- ratings/n	     #p_{kl}
  sum1 <- 0
  for(k in 1:q){
  	for(l in 1:q){
  		sum1 <- sum1 + pkl[k,l] * weights[k,l]^2
  	}
  }
  var.bp <- ((1-f)/(n*(1-pe)^2)) * (sum1 - pa^2)
  stderr <- sqrt(var.bp)# bp's standard error
  p.value <- NA;lcb <- NA;ucb <- NA
  if (n>=2){
    p.value <- 2*(1-pt(bp.coeff/stderr,n-1))
    lcb <- bp.coeff - stderr*qt(1-(1-conflev)/2,n-1) # lower confidence bound
    ucb <- min(1,bp.coeff + stderr*qt(1-(1-conflev)/2,n-1)) # upper confidence bound
  }
  conf.int <- paste0("(",round(lcb,3),",",round(ucb,3),")")
  coeff.name <- "Brennan-Prediger"
  coeff.val <- bp.coeff
  coeff.se <- stderr
  coeff.ci <- conf.int
  coeff.pval <- format(p.value,digits=4,nsmall=3,scientific = TRUE)
  return(data.frame(coeff.name,coeff.val,coeff.se,coeff.ci,coeff.pval))
}

#krippen2.table: Scott's pi coefficient (Scott(1955)) and its standard error for 2 raters when input dataset is a contingency table
#-------------
#The input data "ratings" is a qxq contingency table showing the distribution of
#subjects by rater, when q is the number of categories.
#======================================================================================
#' Krippendorff's Alpha coefficient for 2 raters
#' @param ratings A square table of ratings (assume no missing ratings).
#' @param weights An optional matrix that contains the weights used in the weighted analysis. By default, this 
#' parameter contaings the identity weight matrix, which leads to the unweighted analysis.
#' @param conflev An optional parameter that specifies the confidence level used for constructing confidence 
#' intervals. By default the function assumes the standard value of 95\%.
#' @param N An optional parameter representing the finite population size if any. It is used to perform the finite
#' population correction to the standard error. It's default value is infinity.
#' @return A data frame containing the following 5 variables: coeff.name coeff.val coeff.se coeff.ci coeff.pval.
#' @examples 
#' #The dataset "cont3x3abstractors" comes with this package. Analyze it as follows:
#' krippen2.table(cont3x3abstractors) #Krippendorff's alpha along with precision measures
#' alpha <- krippen2.table(cont3x3abstractors)$coeff.val #Krippendorff's alpha alone.
#' alpha
#' q <- nrow(cont3x3abstractors) #Number of categories
#' krippen2.table(cont3x3abstractors,weights = quadratic.weights(1:q)) #Weighted alpha coefficient
#' @export
krippen2.table <- function(ratings,weights=identity.weights(1:ncol(ratings)),conflev=0.95,N=Inf){
  ratings <- as.matrix(ratings)
  if(dim(ratings)[1] != dim(ratings)[2]){
  	stop('The contingency table should have the same number of rows and columns!')
  }
  if (ncol(ratings) != ncol(weights)){
    stop('The weight matrix has fewer columns than the contingency table. Please revise your input weights!')
  }
  n <- sum(ratings) # number of subjects
  f <- n/N # finite population correction
  q <- ncol(ratings) # number of categories
  epsi = 1/(2*n)
  pa0 <- sum(weights * ratings/n)
  pa <- (1-epsi)*pa0 + epsi # percent agreement

  pk. <- (ratings%*%rep(1,q))/n
  p.l <- t((t(rep(1,q))%*%ratings)/n)
  pi.k <- (pk.+p.l)/2
  pe <- sum(weights*(pi.k%*%t(pi.k)))
  kripp.coeff <- (pa - pe)/(1 - pe) # weighted Krippen's alpha coefficint

	# calculating variance

  pkl <- ratings/n	     #p_{kl}
  pb.k <- weights %*% p.l    #\ov{p}_{+k}
  pbl. <- t(weights) %*% pk. #\ov{p}_{l+}
  pbk  <- (pb.k + pbl.)/2    #\ov{p}_{k}
  kcoeff <- (pa0 - pe)/(1 - pe)
  sum1 <- 0
  for(k in 1:q){
	for(l in 1:q){
		sum1 <- sum1 + pkl[k,l] * (weights[k,l]-(1-kcoeff)*(pbk[k] + pbk[l]))^2
	}
  }
  var.kripp <- ((1-f)/(n*(1-pe)^2)) * (sum1 - (pa0-2*(1-kcoeff)*pe)^2)
  stderr <- sqrt(var.kripp)# Kripp. alpha's standard error
  p.value <- NA;lcb <- NA;ucb <- NA
  if (n>=2){
    p.value <- 2*(1-pt(kripp.coeff/stderr,n-1))
    lcb <- kripp.coeff - stderr*qt(1-(1-conflev)/2,n-1) # lower confidence bound
    ucb <- min(1,kripp.coeff + stderr*qt(1-(1-conflev)/2,n-1)) # upper confidence bound
  }
  conf.int <- paste0("(",round(lcb,3),",",round(ucb,3),")")
  coeff.name <- "Krippendorff's Alpha"
  coeff.val <- kripp.coeff
  coeff.se <- stderr
  coeff.ci <- conf.int
  coeff.pval <- format(p.value,digits=4,nsmall=3,scientific = TRUE)
  return(data.frame(coeff.name,coeff.val,coeff.se,coeff.ci,coeff.pval))
}
#pa2.table: Percent agreement coefficient and its standard error for 2 raters when input dataset is a contingency table
#-------------
#The input data "ratings" is a qxq contingency table showing the distribution of
#subjects by rater, when q is the number of categories.
#======================================================================================
#' Percent Agreement coefficient for 2 raters
#' @param ratings A square table of ratings (assume no missing ratings).
#' @param weights An optional matrix that contains the weights used in the weighted analysis. By default, this 
#' parameter contaings the identity weight matrix, which leads to the unweighted analysis.
#' @param conflev An optional parameter that specifies the confidence level used for constructing confidence 
#' intervals. By default the function assumes the standard value of 95\%.
#' @param N An optional parameter representing the finite population size if any. It is used to perform the finite
#' population correction to the standard error. It's default value is infinity.
#' @return A data frame containing the following 5 variables: coeff.name coeff.val coeff.se coeff.ci coeff.pval.
#' @examples 
#' #The dataset "cont3x3abstractors" comes with this package. Analyze it as follows:
#' pa2.table(cont3x3abstractors) #Yields percent agreement along with precision measures
#' pa <- pa2.table(cont3x3abstractors)$coeff.val #Yields percent agreement alone.
#' pa
#' q <- nrow(cont3x3abstractors) #Number of categories
#' pa2.table(cont3x3abstractors,weights = quadratic.weights(1:q)) #Weighted percent agreement
#' @export
pa2.table <- function(ratings,weights=identity.weights(1:ncol(ratings)),conflev=0.95,N=Inf){
  ratings <- as.matrix(ratings)
  if(dim(ratings)[1] != dim(ratings)[2]){
    stop('The contingency table should have the same number of rows and columns!')
  }
  if (ncol(ratings) != ncol(weights)){
    stop('The weight matrix has fewer columns than the contingency table. Please revise your input weights!')
  }
  n <- sum(ratings) # number of subjects
  f <- n/N # finite population correction
  q <- ncol(ratings) # number of categories
  pa <- sum(weights * ratings/n) # percent agreement

  # calculation of variance - standard error - confidence interval - p-value

  pkl <- ratings/n	     #p_{kl}
  sum1 <- 0
  for(k in 1:q){
    for(l in 1:q){
      sum1 <- sum1 + pkl[k,l] * weights[k,l]^2
    }
  }
  var.pa <- ((1-f)/n) * (sum1 - pa^2)
  stderr <- sqrt(var.pa) # pa standard error
  p.value <- NA;lcb <- NA;ucb <- NA
  if (n>=2){
    p.value <- 2*(1-pt(pa/stderr,n-1))
    lcb <- pa - stderr*qt(1-(1-conflev)/2,n-1) # lower confidence bound
    ucb <- min(1,pa + stderr*qt(1-(1-conflev)/2,n-1)) # upper confidence bound
  }
  conf.int <- paste0("(",round(lcb,3),",",round(ucb,3),")")
  coeff.name <- "Percent Agreement"
  coeff.val <- pa
  coeff.se <- stderr
  coeff.ci <- conf.int
  coeff.pval <- format(p.value,digits=4,nsmall=3,scientific = TRUE)
  return(data.frame(coeff.name,coeff.val,coeff.se,coeff.ci,coeff.pval))
}

Try the irrCAC package in your browser

Any scripts or data that you put into this service are public.

irrCAC documentation built on Sept. 23, 2019, 5:05 p.m.