R/reli_stats.R

Defines functions reli_stats

Documented in reli_stats

#' Reliability Statistics
#' @description The reli_stats and reli_aov functions produce reliability statistics described by Weir (2005). This includes intraclass correlation coefficients, the coefficient of variation, and the standard MSE of measurement.
#' @param measure Name of column containing the measurement of interest
#' @param item Name of column containing the items. If this is a test-retest reliability study then this would indicate the time point (e.g., time1,time2, time3, etc.)
#' @param id Column with subject identifier
#' @param data Data frame with all data
#' @param wide Logical value (TRUE or FALSE) indicating if data is in a "wide" format. Default is TRUE.
#' @param col.names If wide is equal to TRUE then col.names is a list of the column names containing the measurements for reliability analysis.
#' @param conf.level the confidence level required. Default is 95\%.
#' @param cv_calc Coefficient of variation (CV) calculation. This function allows for 3 versions of the CV. "MSE" is the default.
#' @param other_ci Logical value (TRUE or FALSE) indicating whether to calculate confidence intervals for the CV, SEM, SEP, and SEE. Note: this will dramatically increase the computation time.
#' @param type A character string representing the type of bootstrap confidence intervals. Only "norm", "basic", and "perc" currently supported. Bias-corrected and accelerated, bca, is the default. See ?boot::boot.ci for more details.
#' @param replicates 	The number of bootstrap replicates. Passed on to the boot function. Default is 1999.
#' @details
#' These functions return intraclass correlation coefficients and other measures of reliability (CV, SEM, SEE, and SEP).
#' The estimates of variances for any of the measures are derived from linear mixed models.
#' When other_ci is set to TRUE, then a parametric bootstrap approach to calculating confidence intervals is used for the CV, SEM, SEE, and SEP.
#'
#' reli_stats uses a linear mixed model to estimate variance components. In some cases there are convergence issues.
#' When this occurs it is prudent to use reli_aov which instead utilizes sums of squares approach.
#' The results may differ slightly between the functions.
#' If reli_aov is used then rows with missing observations (e.g., if a participant has a missing observation) will be dropped.
#'
#' The CV calculation has 3 versions. The "MSE" uses the "mean squared error" from the linear mixed model used to calculate the ICCs.
#' The "SEM" option instead uses the SEM calculation and expresses CV as a ratio of the SEM to the overall mean.
#' The "residuals" option u uses the model residuals to calculate the root mean square error which is then divided by the grand mean.
#'
#' @return Returns single list with the results of the agreement analysis.
#'
#' \describe{
#'   \item{\code{"icc"}}{Table of ICC results}
#'   \item{\code{"lmer"}}{Linear mixed model from lme4}
#'   \item{\code{"anova"}}{Analysis of Variance table}
#'   \item{\code{"var_comp"}}{Table of Variance Components}
#'   \item{\code{"n.id"}}{Number of subjects/participants}
#'   \item{\code{"n.items"}}{Number of items/time points}
#'   \item{\code{"cv"}}{Coefficient of Variation}
#'   \item{\code{"SEM"}}{List with Standard MSE of Measurement estimate (est) }
#'   \item{\code{"SEE"}}{List with Standard MSE of the Estimate estimate (est)}
#'   \item{\code{"SEP"}}{List with Standard MSE of Predicitions (est)}
#'   \item{\code{"call"}}{the matched call}
#'
#' }

#' @examples
#' data('reps')
#' reli_stats(data = reps, wide = TRUE, col.names = c("x","y"))
#'
#' @section References:
#'
#' Weir, J. P. (2005). Quantifying test-retest reliability using the intraclass correlation coefficient and the SEM. The Journal of Strength & Conditioning Research, 19(1), 231-240.
#'
#' Shrout, P.E. and Fleiss, J.L. (1976). Intraclass correlations: uses in assessing rater reliability. Psychological Bulletin, 86, 420-3428.
#'
#' McGraw, K. O. and Wong, S. P. (1996). Forming inferences about some intraclass correlation coefficients. Psychological Methods, 1, 30-46. See errata on page 390 of same volume.
#'
#' @importFrom stats pnorm qnorm lm dchisq qchisq sd var residuals aov
#' @importFrom tidyselect all_of
#' @importFrom insight get_response
#' @import dplyr
#' @import ggplot2
#' @import lme4
#' @export


reli_stats = function(measure,
                      item,
                      id,
                      data,
                      wide = FALSE,
                      col.names = NULL,
                      cv_calc = c("MSE","residuals","SEM"),
                      conf.level = .95,
                      other_ci = FALSE,
                      type = c("perc", "norm", "basic") ,
                      replicates = 1999){
  cv_calc = match.arg(cv_calc)
  type = match.arg(type)
  alpha = 1-conf.level
  x = data
  if(wide == TRUE){
    if(is.null(col.names)){
      stop("Must provide column names (col.names) if wide = TRUE")
    }
    x = x[,col.names]
    n_id <- dim(x)[1]
    n_items <- dim(x)[2]
    x.s <- stack(as.data.frame(x))
    x.df <- data.frame(x.s, subs = rep(paste("S", 1:n_id, sep = ""),
                                       n_items))
  } else {
    if(is.null(measure) || is.null(item) ||is.null(id)){
      stop("Must provide measure, item, and id column names if wide = FALSE")
    }
    x.df = x %>%
      select(all_of(measure),all_of(item),all_of(id))
  }

  colnames(x.df) <- c("values", "items", "id")

  mod.lmer <- lmer(values ~ 1 + (1 | id) + (1 | items),
                   data = x.df,
                   na.action = na.omit)
  num_lvls = ngrps(mod.lmer)
  n_items = num_lvls["items"]
  n_id = num_lvls["id"]
  vc <- VarCorr(mod.lmer) # Get Variance Components
  MS_id <- vc$id[1, 1] # var by id
  MS_items <- vc$items[1, 1] # var by item
  MSE <- (attributes(vc)$sc)^2
  # Create variance table
  MS.df <- data.frame(variance = c(MS_id, MS_items, MSE,
                                   NA))
  rownames(MS.df) <- c("ID", "Items", "Residual", "Total")
  MS.df["Total", ] <- sum(MS.df[1:3, 1], na.rm = TRUE)
  MS.df["percent"] <- MS.df/MS.df["Total", 1]

  MSB <- n_items * MS_id + MSE
  MSJ <- n_id * MS_items + MSE
  MSW <- MSE + MS_items
  stats <- matrix(NA, ncol = 3, nrow = 5)
  stats[1, 1] <- dfB <- n_id - 1
  stats[1, 2] <- dfJ <- n_items - 1
  stats[1, 3] <- dfE <- (n_id - 1) * (n_items - 1)
  stats[2, 1] <- MSB * (n_id - 1)
  stats[2, 2] <- MSJ * (n_items - 1)
  stats[2, 3] <- MSE * (n_id - 1) * (n_items - 1)
  stats[3, 1] <- MSB
  stats[3, 2] <- MSJ
  stats[3, 3] <- MSE
  stats[4, 1] <- FB <- MSB/MSE
  stats[4, 2] <- FJ <- MSJ/MSE
  stats[5, 1] <- -expm1(pf(FB, dfB, dfE, log.p = TRUE))
  stats[5, 2] <- -expm1(pf(FJ, dfJ, dfE, log.p = TRUE))

  colnames(stats) <- c("Subjects", "items", "Residual")
  rownames(stats) <- c("df", "SumSq", "MS", "F", "p")
  # transpose
  stat.final = t(stats)
  # Calculate ICCs
  ICC1 <- (MSB - MSW)/(MSB + (n_items - 1) * MSW)
  ICC2 <- (MSB - MSE)/(MSB + (n_items - 1) * MSE + n_items * (MSJ -
                                                      MSE)/n_id)
  ICC3 <- (MSB - MSE)/(MSB + (n_items - 1) * MSE)
  ICC_1_k <- (MSB - MSW)/(MSB)
  ICC_2_k <- (MSB - MSE)/(MSB + (MSJ - MSE)/n_id)
  ICC_3_k <- (MSB - MSE)/MSB
  F11 <- MSB/MSW
  df11n <- n_id - 1
  df11d <- n_id * (n_items - 1)
  p11 <- -expm1(pf(F11, df11n, df11d, log.p = TRUE))
  F21 <- MSB/MSE
  df21n <- n_id - 1
  df21d <- (n_id - 1) * (n_items - 1)
  p21 <- -expm1(pf(F21, df21n, df21d, log.p = TRUE))
  F31 <- F21
  results <- data.frame(matrix(NA, ncol = 6, nrow = 6))
  colnames(results) <- c("model","measures","type", "icc","lower.ci", "upper.ci")

  results$model = c("one-way random","two-way random", "two-way fixed",
                    "one-way random","two-way random", "two-way fixed")
  results$measures = c("Agreement", "Agreement", "Consistency",
                       "Avg. Agreement", "Avg. Agreement", "Avg. Consistency")
  results$type = c("ICC1","ICC2","ICC3","ICC1k","ICC2k","ICC3k")
  results$icc = c(ICC1,ICC2,ICC3,ICC_1_k,ICC_2_k,ICC_3_k)

  F1L <- F11/qf(1 - alpha, df11n, df11d)
  F1U <- F11 * qf(1 - alpha, df11d, df11n)
  L1 <- (F1L - 1)/(F1L + (n_items - 1))
  U1 <- (F1U - 1)/(F1U + n_items - 1)
  F3L <- F31/qf(1 - alpha, df21n, df21d)
  F3U <- F31 * qf(1 - alpha, df21d, df21n)
  results[1, 5] <- L1
  results[1, 6] <- U1
  results[3, 5] <- (F3L - 1)/(F3L + n_items - 1)
  results[3, 6] <- (F3U - 1)/(F3U + n_items - 1)
  results[4, 5] <- 1 - 1/F1L
  results[4, 6] <- 1 - 1/F1U
  results[6, 5] <- 1 - 1/F3L
  results[6, 6] <- 1 - 1/F3U
  Fj <- MSJ/MSE
  vn <- (n_items - 1) * (n_id - 1) * ((n_items * ICC2 * Fj + n_id *
                                     (1 + (n_items - 1) * ICC2) - n_items * ICC2))^2
  vd <- (n_id - 1) * n_items^2 * ICC2^2 * Fj^2 + (n_id * (1 +
                                                         (n_items - 1) * ICC2) - n_items * ICC2)^2
  v <- vn/vd
  F3U <- qf(1 - alpha, n_id - 1, v)
  F3L <- qf(1 - alpha, v, n_id - 1)
  L3 <- n_id * (MSB - F3U * MSE)/(F3U * (n_items * MSJ + (n_items *
                                                        n_id - n_items - n_id) * MSE) + n_id * MSB)
  results[2, 5] <- L3
  U3 <- n_id * (F3L * MSB - MSE)/(n_items * MSJ + (n_items * n_id -
                                                 n_items - n_id) * MSE + n_id * F3L * MSB)
  results[2, 6] <- U3
  L3k <- L3 * n_items/(1 + L3 * (n_items - 1))
  U3k <- U3 * n_items/(1 + U3 * (n_items - 1))
  results[5, 5] <- L3k
  results[5, 6] <- U3k


  if(other_ci == TRUE){

    boot_reli <- function(.) {
      reli_mod_mse(., cv_calc = cv_calc)
    }

    boo2 <- bootMer(mod.lmer, boot_reli, nsim = replicates,
                    type = "parametric", use.u = FALSE)
    res_other = tidy_boot(boo2,
                         conf.int = TRUE,
                         conf.level = conf.level,
                         conf.method = type) %>%
      rename(estimate = statistic,
             se = std.error,
             lower.ci = conf.low,
             upper.ci = conf.high) %>%
      as.data.frame()
      row.names(res_other) = res_other$term
      res_other = res_other %>%
        select(estimate,bias,se,lower.ci,upper.ci)
  } else{
    SEM = sqrt(MSE)
    sd_tots = sqrt(sum(stats[2,])/(n_id-1))
    SEE = sd_tots*sqrt(ICC3*(1-ICC3))
    SEP = sd_tots*sqrt(1-ICC3^2)

    mw <- mean(x.df$values, na.rm = TRUE)
    if(cv_calc == "residuals"){
      stddev <- sqrt(mean(residuals(mod.lmer)^2))
    } else if(cv_calc == "MSE"){
      stddev <- sqrt(MSE)
    } else if(cv_calc == "SEM"){
      stddev <- SEM
    } else {
      stop("cv_calc must be SEM, MSE, or residuals")
    }

    cv_out = stddev/mw
    res_other = data.frame(
      estimate = c(cv_out, SEM, SEP, SEE),
      bias = NA,
      se = NA,
      lower.ci = NA,
      upper.ci = NA,
      row.names = c("cv", "SEM", "SEP", "SEE")
    )
  }


  # Save call
  lm_mod = list(call = list(formula = as.formula(x.df$values ~ x.df$id + x.df$items)))
  call2 = match.call()

  call2$lm_mod = lm_mod

  # Save call
  lm_mod = list(call = list(formula = as.formula(x.df$values ~ x.df$id + x.df$items)))
  call2 = match.call()

  call2$lm_mod = lm_mod
  if(is.null(call2$conf.level)){
    call2$conf.level = conf.level
  }

  if(is.null(call2$other_ci)){
    call2$other_ci = other_ci
  }

  result <- list(icc = results,
                 lmer = mod.lmer,
                 anova = stats,
                 var_comp = MS.df,
                 n.id = nrow(ranef(mod.lmer)$id),
                 n.item = nrow(ranef(mod.lmer)$item),
                 call = call2,
                 cv = res_other["cv",],
                 SEM = res_other["SEM",],
                 SEE = res_other["SEE",],
                 SEP = res_other["SEP",])


  structure(result,
            class = "simple_reli")
}

#' @rdname reli_stats
#' @export
#'
reli_aov = function(measure,
                    item,
                    id,
                    data,
                    wide = FALSE,
                    col.names = NULL,
                    cv_calc = c("MSE","residuals","SEM"),
                    conf.level = .95,
                    other_ci = FALSE,
                    type = "perc",
                    replicates = 1999) {
  cv_calc = match.arg(cv_calc)

  alpha = 1-conf.level
  x = data
  if(wide == TRUE){
    if(is.null(col.names)){
      stop("Must provide column names (col.names) if wide = TRUE")
    }
    x = x[,col.names]
    n_id <- dim(x)[1]
    n_items <- dim(x)[2]
    x.s <- stack(as.data.frame(x))
    x.df <- data.frame(x.s, subs = rep(paste("S", 1:n_id, sep = ""),
                                       n_items))
  } else {
    if(is.null(measure) || is.null(item) ||is.null(id)){
      stop("Must provide measure, item, and id column names if wide = FALSE")
    }
    x.df = x %>%
      select(all_of(measure),all_of(item),all_of(id))
  }

  colnames(x.df) <- c("values", "items", "id")
  na_df = subset(x.df, is.na(values))
  na_drops = unique(na_df$id)
  if(any(is.na(x.df$items)) || any(is.na(x.df$id))){
    stop("Missing values found in item label and/or id labels.")
  }
  x.df =  subset(x.df, !(x.df$id %in% na_drops))

  n_id <- length(unique(x.df$id))
  n_items <- length(unique(x.df$items))

  aov.x <- aov(values~as.factor(id)+as.factor(items),data=x.df)
  s.aov <- summary(aov.x)
  stats <- matrix(unlist(s.aov),ncol=3,byrow=TRUE)
  MSB <- stats[3,1]
  MSW <- (stats[2,2] + stats[2,3])/(stats[1,2] + stats[1,3])
  MSJ <- stats[3,2]
  MSE <- stats[3,3]

  # Create variance table
  MS_items = MSW - MSE
  MS_id = (MSB-MSE)/n_items
  MS.df <- data.frame(variance = c(MS_id, MS_items, MSE,
                                   NA))
  rownames(MS.df) <- c("ID", "Items", "Residual", "Total")
  MS.df["Total", ] <- sum(MS.df[1:3, 1], na.rm = TRUE)
  MS.df["percent"] <- MS.df/MS.df["Total", 1]

  stats <- matrix(NA, ncol = 3, nrow = 5)
  stats[1, 1] <- dfB <- n_id - 1
  stats[1, 2] <- dfJ <- n_items - 1
  stats[1, 3] <- dfE <- (n_id - 1) * (n_items - 1)
  stats[2, 1] <- MSB * (n_id - 1)
  stats[2, 2] <- MSJ * (n_items - 1)
  stats[2, 3] <- MSE * (n_id - 1) * (n_items - 1)
  stats[3, 1] <- MSB
  stats[3, 2] <- MSJ
  stats[3, 3] <- MSE
  stats[4, 1] <- FB <- MSB/MSE
  stats[4, 2] <- FJ <- MSJ/MSE
  stats[5, 1] <- -expm1(pf(FB, dfB, dfE, log.p = TRUE))
  stats[5, 2] <- -expm1(pf(FJ, dfJ, dfE, log.p = TRUE))

  colnames(stats) <- c("Subjects", "items", "Residual")
  rownames(stats) <- c("df", "SumSq", "MS", "F", "p")
  # transpose
  stat.final = t(stats)
  # Calculate ICCs
  ICC1 <- (MSB - MSW)/(MSB + (n_items - 1) * MSW)
  ICC2 <- (MSB - MSE)/(MSB + (n_items - 1) * MSE + n_items * (MSJ -
                                                                MSE)/n_id)
  ICC3 <- (MSB - MSE)/(MSB + (n_items - 1) * MSE)
  ICC_1_k <- (MSB - MSW)/(MSB)
  ICC_2_k <- (MSB - MSE)/(MSB + (MSJ - MSE)/n_id)
  ICC_3_k <- (MSB - MSE)/MSB
  F11 <- MSB/MSW
  df11n <- n_id - 1
  df11d <- n_id * (n_items - 1)
  p11 <- -expm1(pf(F11, df11n, df11d, log.p = TRUE))
  F21 <- MSB/MSE
  df21n <- n_id - 1
  df21d <- (n_id - 1) * (n_items - 1)
  p21 <- -expm1(pf(F21, df21n, df21d, log.p = TRUE))
  F31 <- F21
  results <- data.frame(matrix(NA, ncol = 6, nrow = 6))
  colnames(results) <- c("model","measures","type", "icc","lower.ci", "upper.ci")

  results$model = c("one-way random","two-way random", "two-way fixed",
                    "one-way random","two-way random", "two-way fixed")
  results$measures = c("Agreement", "Agreement", "Consistency",
                       "Avg. Agreement", "Avg. Agreement", "Avg. Consistency")
  results$type = c("ICC1","ICC2","ICC3","ICC1k","ICC2k","ICC3k")
  results$icc = c(ICC1,ICC2,ICC3,ICC_1_k,ICC_2_k,ICC_3_k)

  F1L <- F11/qf(1 - alpha, df11n, df11d)
  F1U <- F11 * qf(1 - alpha, df11d, df11n)
  L1 <- (F1L - 1)/(F1L + (n_items - 1))
  U1 <- (F1U - 1)/(F1U + n_items - 1)
  F3L <- F31/qf(1 - alpha, df21n, df21d)
  F3U <- F31 * qf(1 - alpha, df21d, df21n)
  results[1, 5] <- L1
  results[1, 6] <- U1
  results[3, 5] <- (F3L - 1)/(F3L + n_items - 1)
  results[3, 6] <- (F3U - 1)/(F3U + n_items - 1)
  results[4, 5] <- 1 - 1/F1L
  results[4, 6] <- 1 - 1/F1U
  results[6, 5] <- 1 - 1/F3L
  results[6, 6] <- 1 - 1/F3U
  Fj <- MSJ/MSE
  vn <- (n_items - 1) * (n_id - 1) * ((n_items * ICC2 * Fj + n_id *
                                         (1 + (n_items - 1) * ICC2) - n_items * ICC2))^2
  vd <- (n_id - 1) * n_items^2 * ICC2^2 * Fj^2 + (n_id * (1 +
                                                            (n_items - 1) * ICC2) - n_items * ICC2)^2
  v <- vn/vd
  F3U <- qf(1 - alpha, n_id - 1, v)
  F3L <- qf(1 - alpha, v, n_id - 1)
  L3 <- n_id * (MSB - F3U * MSE)/(F3U * (n_items * MSJ + (n_items *
                                                            n_id - n_items - n_id) * MSE) + n_id * MSB)
  results[2, 5] <- L3
  U3 <- n_id * (F3L * MSB - MSE)/(n_items * MSJ + (n_items * n_id -
                                                     n_items - n_id) * MSE + n_id * F3L * MSB)
  results[2, 6] <- U3
  L3k <- L3 * n_items/(1 + L3 * (n_items - 1))
  U3k <- U3 * n_items/(1 + U3 * (n_items - 1))
  results[5, 5] <- L3k
  results[5, 6] <- U3k


  if(other_ci == TRUE){
    stop("Bootstrapping for these statistics is currently not supported for this function.")

  } else{
    SEM = sqrt(MSE)
    sd_tots = sqrt(sum(stats[2,])/(n_id-1))
    SEE = sd_tots*sqrt(ICC3*(1-ICC3))
    SEP = sd_tots*sqrt(1-ICC3^2)

    mw <- mean(x.df$values, na.rm = TRUE)
    if(cv_calc == "residuals"){
      stddev <- sqrt(mean(residuals(aov.x)^2))
    } else if(cv_calc == "MSE"){
      stddev <- sqrt(MSE)
    } else if(cv_calc == "SEM"){
      stddev <- SEM
    } else {
      stop("cv_calc must be SEM, MSE, or residuals")
    }

    cv_out = stddev/mw
    res_other = data.frame(
      estimate = c(cv_out, SEM, SEP, SEE),
      bias = NA,
      se = NA,
      lower.ci = NA,
      upper.ci = NA,
      row.names = c("cv", "SEM", "SEP", "SEE")
    )
  }


  # Save call
  lm_mod = list(call = list(formula = as.formula(x.df$values ~ x.df$id + x.df$items)))
  call2 = match.call()

  call2$lm_mod = lm_mod

  # Save call
  lm_mod = list(call = list(formula = as.formula(x.df$values ~ x.df$id + x.df$items)))
  call2 = match.call()

  call2$lm_mod = lm_mod
  if(is.null(call2$conf.level)){
    call2$conf.level = conf.level
  }

  if(is.null(call2$other_ci)){
    call2$other_ci = other_ci
  }

  result <- list(icc = results,
                 lmer = aov.x,
                 anova = stats,
                 var_comp = MS.df,
                 n.id = n_id,
                 n.item = n_items,
                 call = call2,
                 cv = res_other["cv",],
                 SEM = res_other["SEM",],
                 SEE = res_other["SEE",],
                 SEP = res_other["SEP",])


  structure(result,
            class = "simple_reli")
}

Try the SimplyAgree package in your browser

Any scripts or data that you put into this service are public.

SimplyAgree documentation built on Dec. 28, 2022, 2:06 a.m.