R/mcq.R

Defines functions plot.score_mcq27_output plot.prop_ss_output get_lookup_table summarize_mcq prop_ss inn score_one_mcq27 score_mcq27

Documented in get_lookup_table inn plot.prop_ss_output plot.score_mcq27_output prop_ss score_mcq27 score_one_mcq27 summarize_mcq

#' Score 27-item MCQ
#'
#' @param dat Dataframe (longform) with subjectid, questionid, and response
#' (0 for SIR/SS and 1 for LDR/LL)
#' @param impute_method One of: "none", "ggm", "GGM", "inn", "INN"
#' @param round Numeric specifying number of decimal places
#' (passed to `base::round()`)
#' @param random Boolean whether to insert a random draw (0 or 1) for NAs.
#' Default is FALSE
#' @param return_data Boolean whether to return the original data and new
#' imputed responses. Default is FALSE.
#' @param verbose Boolean whether to print subject and question ids pertaining
#' to missing data. Default is FALSE.
#' @param trans Transformation to apply to k values: "none", "log", or "ln".
#' Default is "none"
#'
#' @return Summary dataframe
#' @export
#'
#' @examples
#' score_mcq27(mcq27)
score_mcq27 <- function(dat = dat, impute_method = "none",
                        round = 6, random = FALSE,
                        trans = "none",
                        return_data = FALSE, verbose = FALSE) {

  if (!impute_method %in% c("none", "ggm", "GGM", "inn", "INN")) {
    stop("Impute method must be one of none, ggm, GGM, inn, INN")
  }

  if (!trans %in% c("none", "log", "ln")) {
    stop("Transformation must be one of 'none', 'log', 'ln'")
  }

  if (return_data) {
    dat$newresponse <- NA
  }

  # length of ids
  nids <- length(unique(dat$subjectid))

  # populate dataframe
  dfout <- data.frame(
    subjectid = unique(dat$subjectid),
    overall_k = rep(NA, length = nids),
    small_k = rep(NA, length = nids),
    medium_k = rep(NA, length = nids),
    large_k = rep(NA, length = nids),
    geomean_k = rep(NA, length = nids),
    overall_consistency = rep(NA, length = nids),
    small_consistency = rep(NA, length = nids),
    medium_consistency = rep(NA, length = nids),
    large_consistency = rep(NA, length = nids),
    composite_consistency = rep(NA, length = nids),
    overall_proportion = rep(NA, length = nids),
    small_proportion = rep(NA, length = nids),
    medium_proportion = rep(NA, length = nids),
    large_proportion = rep(NA, length = nids),
    impute_method = rep(NA, length = nids)
  )


  for (i in unique(dat$subjectid)) {

    # filter one subject
    dat_sub <- dat[dat$subjectid == i, ]

    # check for 27 items
    if (!length(dat_sub$response) == 27) stop(paste0("Response length
                                                      not equal to 27
                                                      for subjectid: ", i))

    if (impute_method %in% c("inn", "INN") & any(is.na(dat_sub$response))) {
      dat_sub <- inn(dat_sub, random = random, verbose = verbose)
    }

    dfout[dfout$subjectid %in% i, 2:(ncol(dfout)-1)] <- score_one_mcq27(dat_sub,
                                                                      impute_method,
                                                                      round = round)

    if (return_data) {
      dat$newresponse[dat$subjectid == i] <- dat_sub$response
    }

  }

  dfout$impute_method <- if (!(impute_method %in% c("inn", "INN") & random)) {
    impute_method
  } else {
    "INN with random"
  }

  if (trans == "log") {
    dfout <- dfout |>
      dplyr::mutate(dplyr::across(overall_k:geomean_k, ~ log10(.x))) |>
      dplyr::rename_with(~ paste0("log10_", .x, recycle0 = TRUE),
                  overall_k:geomean_k)
  } else if (trans == "ln") {
    dfout <- dfout |>
      dplyr::mutate(dplyr::across(overall_k:geomean_k, ~ log(.x))) |>
      dplyr::rename_with(~ paste0("ln_", .x, recycle0 = TRUE),
                  overall_k:geomean_k)
  }

  class(dfout) <- c("score_mcq27_output", class(dfout))

  if (!return_data) {
    return(dfout)
  } else {
    return(list("results" = dfout,
                "data" = dat))
  }


}

#' Score one subject's 27-item MCQ
#'
#' @param dat One subject's 27 items from the MCQ
#' @param impute_method One of: "none", "ggm", "GGM", "inn", "INN"
#' @param round Numeric specifying number of decimal places
#' (passed to `base::round()`)
#'
#' @return Vector with scored 27-item MCQ metrics
#' @importFrom psych geometric.mean
#'
#' @examples
#' beezdiscounting:::score_one_mcq27(mcq27[mcq27$subjectid %in% 1, ])
score_one_mcq27 <- function(dat, impute_method = "none", round = 6) {

  # magnitudes
  mag <- c("S", "M", "L")
  mags <- c("small", "medium", "large")

  dfout <- c(
    "overall_k" = NA,
    "small_k" = NA,
    "medium_k" = NA,
    "large_k" = NA,
    "geomean_k" = NA,
    "overall_consistency" = NA,
    "small_consistency" = NA,
    "medium_consistency" = NA,
    "large_consistency" = NA,
    "composite_consistency" = NA,
    "overall_proportion" = NA,
    "small_proportion" = NA,
    "medium_proportion" = NA,
    "large_proportion" = NA
  )

  # bring in lookup table
  dat <- merge(dat, lookup, by.x = "questionid",
               by.y = "questionid", all.x = TRUE)
  # order df
  dat <- dat[match(lookup$questionid, dat$questionid), ]

  ## overall
  # calculate consistency scores
  lngth <- 28
  cons <- vector(length = lngth)
  for (j in 1:lngth) {
    # first sorted question id equals sum of ll (1s)
    if (j == 1) cons[j] <- sum(dat$response[j:length(dat$response)])
    # very last k value bin equals sum of ss (0s)
    if (j == lngth) {
      cons[j] <- sum(dat$response[1:j - 1] == 0)
      break()
    }
    # for each question id in between, sum number of 0s before and
    # sum number of 1s at and after the current question id
    cons[j] <- sum(dat$response[1:j - 1] == 0) +
      sum(dat$response[j:length(dat$response)])
  }

  # populate consistency
  dfout["overall_consistency"] <- max(cons) / 27
  # find index where max consistency occurs
  consmaxi <- which(cons == max(cons))
  consmaxi <- sort(rbind(consmaxi, (consmaxi - 1)))
  # if the highest consistency is at the first index, replace 0 index with 1
  if (0 %in% consmaxi) {
    consmaxi[which(consmaxi == 0)] <- 1
  }
  if (length(consmaxi) != 0) {
    kval <- gtools::running(c(lookup$kindiff, .25)[consmaxi],
                            fun = psych::geometric.mean, width = 2, by = 2)
    dfout["overall_k"] <- psych::geometric.mean(kval)
  } else {
    dfout["overall_k"] <- NA
  }

  dfout["overall_proportion"] <- cons[1] / 27

  for (k in 1:3) {

    dat_mag <- dat[dat$magnitude == mag[k], ]
    # calculate consistency scores
    lngth <- 10
    cons <- vector(length = lngth)
    for (j in 1:lngth) {
      # first sorted question id equals sum of ll (1s)
      if (j == 1) cons[j] <- sum(dat_mag$response[j:length(dat_mag$response)])
      # very last k value bin equals sum of ss (0s)
      if (j == lngth) {
        cons[j] <- sum(dat_mag$response[1:j - 1] == 0)
        break()
      }
      # for each question id in between, sum number of 0s before and
      # sum number of 1s at and after the current question id
      cons[j] <- sum(dat_mag$response[1:j - 1] == 0) +
        sum(dat_mag$response[j:length(dat_mag$response)])
    }

    dfout[paste0(mags[k], "_consistency")] <- max(cons) / 9
    # find index where max consistency occurs
    consmaxi <- which(cons == max(cons))
    consmaxi <- sort(rbind(consmaxi, (consmaxi - 1)))
    # if the highest consistency is at the first index, replace 0 index with 1
    if (0 %in% consmaxi) {
      consmaxi[which(consmaxi == 0)] <- 1
    }
    if (length(consmaxi) != 0) {
      kval <- gtools::running(c(subset(lookup, magnitude == mag[k])$kindiff,
                                subset(lookup, magnitude == mag[k])$kindiff[10])[consmaxi],
                              fun = psych::geometric.mean, width = 2, by = 2)
      dfout[paste0(mags[k], "_k")] <- psych::geometric.mean(kval)
    } else {
      dfout[paste0(mags[k], "_k")] <- NA
    }
    dfout[paste0(mags[k], "_proportion")] <- cons[1] / 9


  }

  dfout["geomean_k"] <- psych::geometric.mean(dfout[c("small_k",
                                                      "medium_k",
                                                      "large_k")],
                              na.rm = if (impute_method %in% c("ggm", "GGM")) TRUE else FALSE)
  dfout["composite_consistency"] <- sum(dfout[c("small_consistency",
                                             "medium_consistency",
                                             "large_consistency")]) / 3

  dfout <- round(dfout, digits = round)

  return(dfout)

}

#' Calculates item nearest neighbor imputation approach discussed by
#' Yeh et al. (2023)
#'
#' @param dat A single subject's 27-item MCQ data in long form
#' @param random Boolean whether to insert a random draw (0 or 1) for NAs
#' @param verbose Boolean whether to print subject and question ids pertaining
#' to missing data
#'
#' @return An imputed data set to be scored
#'
inn <- function(dat, random, verbose) {
  dat <- merge(dat, lookup,
    by.x = "questionid",
    by.y = "questionid", all.x = TRUE
  )
  dat <- dat[match(lookup$questionid, dat$questionid), ]
  if (verbose) print(paste("NA found for id:", unique(dat$subjectid)))
  split_dfs <- suppressWarnings(split(dat, (0:nrow(dat) %/% 3))[-10])

  for (i in seq_along(split_dfs)) {
    if (!any(is.na(split_dfs[[i]]$response))) {
      next()
    } else {
      subqids <- split_dfs[[i]]$questionid
      naqids <- split_dfs[[i]]$questionid[which(is.na(split_dfs[[i]]$response))]
      if (verbose) print(paste0(c("NAs for questionids: ", naqids), collapse = " "))
      if (length(unique(split_dfs[[i]]$response[!(split_dfs[[i]]$questionid %in% naqids)])) == 1) {
        # if all non-na values are the same, replace with that non-na number
        dat$response[dat$questionid %in% naqids] <- unique(split_dfs[[i]]$response[!(split_dfs[[i]]$questionid %in% naqids)])
      } else {
        if (random) {
          dat$response[dat$questionid %in% naqids] <- sample(0:1,
            length(naqids),
            replace = TRUE
          )
        }
      }
    }
  }
  return(dat[order(as.numeric(dat$questionid)), c(2, 1, 3)])
}


#' Calculate proportion of SIR/SS responses at each k value
#'
#' @param dat Dataframe (longform) with subjectid, questionid, and response
#' (0 for SIR/SS and 1 for LDR/LL)
#'
#' @return Dataframe with proportion of SIR/SS responses at each k rank
#' @export
#'
#' @examples prop_ss(mcq27)
prop_ss <- function(dat) {

  # bring in lookup table
  dat <- merge(dat, lookup, by.x = "questionid",
               by.y = "questionid", all.x = TRUE)
  # order df
  dat <- dat[match(lookup$questionid, dat$questionid), ]

  prop_ss_tbl <- dplyr::group_by(dat, k_rank) |>
    dplyr::summarise(prop_ss = sum(response == 0, na.rm = TRUE) / 3) |>
    dplyr::ungroup() |>
    dplyr::mutate(prop_ss = round(prop_ss, 2))

  if (any(is.na(dat$response))) {
    warning("Missing data found and ignored. Consider imputing missing data.")
  }
  class(prop_ss_tbl) <- c("prop_ss_output", class(prop_ss_tbl))
  return(prop_ss_tbl)

}

#' Provide a summary of the results from the MCQ ouutput table.
#'
#' @param res Dataframe with MCQ results (output from the `calc_mcq` function)
#' @param na.rm Boolean whether to remove NAs from the calculation
#'
#' @return Dataframe with summary statistics
#' @export
#'
#' @examples summarize_mcq(score_mcq27(mcq27))
summarize_mcq <- function(res, na.rm = TRUE) {
  sum_tab <- res %>%
    dplyr::summarise(
      dplyr::across(dplyr::contains("overall_k"):composite_consistency, list(
        Mean = ~ mean(., na.rm = na.rm),
        SD = ~ sd(., na.rm = na.rm),
        SEM = ~ sd(., na.rm = na.rm) / sqrt(dplyr::n())
      ), .names = "{.col}-{.fn}")
    ) |>
    tidyr::pivot_longer(dplyr::everything(), names_to = c(".value", "Statistic"), names_sep = "-") %>%
    tidyr::pivot_longer(-Statistic, names_to = "Metric", values_to = "value") |>
    tidyr::pivot_wider(names_from = Statistic, values_from = value)

  return(sum_tab)
}


#' Get internal lookup table for the 27-item MCQ
#'
#' @return Dataframe with questionid, magnitude, and kindiff
#' @export
#'
#' @examples get_lookup_table()
get_lookup_table <- function() {
  return(lookup)
}


#' Plot Proportion of SIR/SS Choices by k Value
#'
#' This function creates a plot of the proportion of SIR/SS
#' choices by k value using the output of the `prop_ss` function.
#'
#' @param x Output from the `prop_ss` function
#' @param ... Additional arguments passed to `ggplot2::geom_point()`
#' @param pt_shape Shape of the points in the plot. Default is 21.
#' @param pt_fill Fill color of the points in the plot. Default is "white".
#' @param pt_size Size of the points in the plot. Default is 3.
#' @param title Title of the plot. Default is "Proportion of SIR/SS choices by k value".
#' @param xlab Label for the x-axis. Default is "k value rank".
#' @param ylab Label for the y-axis. Default is "Proportion of SS choices".
#'
#' @return A ggplot object.
#' @export
#'
#' @examples plot(prop_ss(mcq27))
plot.prop_ss_output <- function(
    x,
    ...,
    pt_shape = 21,
    pt_fill = "white",
    pt_size = 3,
    title = "Proportion of SIR/SS choices by k value",
    xlab = "k value rank",
    ylab = "Proportion of SS choices"
    ) {
  x |>
    dplyr::mutate(group = 1) |>
    ggplot2::ggplot(ggplot2::aes(x = factor(k_rank), y = prop_ss, group = group)) +
    ggplot2::geom_line() +
    ggplot2::geom_point(
      shape = pt_shape,
      fill = pt_fill,
      size = pt_size
    ) +
    ggplot2::labs(title = title,
         x = xlab,
         y = ylab) +
    ggplot2::theme_minimal() +
    ggplot2::scale_x_discrete(labels = c(
      "0.00016", "0.0004", "0.001",
      "0.0025", "0.006", "0.016",
      "0.041", "0.1", "0.25")
    )
}


#' Plot MCQ-27 Scores
#'
#' This function creates a plot of the MCQ-27 scores for
#' different metrics (small_k, medium_k, large_k, geomean_k, overall_k).
#' The function handles different logarithmic transformations of the k-values
#' and adjusts the y-axis label accordingly.
#'
#' @param x A data frame returned by the `score_mcq27` function.
#' @param ... Additional arguments passed to methods.
#' @param xlab Label for the x-axis. Default is "Metric".
#' @param alpha Transparency of the points in the plot. Default is 0.3.
#'
#' @return A ggplot object showing the boxplot of MCQ-27 scores.
#' @export
#'
#' @examples plot(score_mcq27(mcq27))
plot.score_mcq27_output <- function(x, ..., xlab = "Metric", alpha = 0.3) {
  target_levels <- c("small_k", "medium_k", "large_k", "geomean_k", "overall_k")
  tmp <- x |>
    dplyr::select(dplyr::contains(c("id", "_k"))) |>
    tidyr::pivot_longer(cols = dplyr::contains("_k"), names_to = "metric", values_to = "value")

  if (any(grepl("log10", tmp$metric))) {
    tmp <- tmp |>
      dplyr::mutate(
        metric = gsub("log10_", "", metric),
        metric = factor(metric, levels = target_levels)
      )
    ylab = "Log10(k) value"
  } else if (any(grepl("ln", tmp$metric))) {
    tmp <- tmp |>
      dplyr::mutate(
        metric = gsub("ln_", "", metric),
        metric = factor(metric, levels = target_levels)
      )
    ylab <- "Ln(k) value"
  } else {
    tmp <- tmp |>
      dplyr::mutate(metric = factor(metric, levels = target_levels))
    ylab <- "k value"
  }

  bplot <- tmp %>%
    ggplot2::ggplot(ggplot2::aes(x = metric, y = value)) +
    ggplot2::geom_boxplot() +
    ggplot2::geom_point(alpha = alpha) +
    ggplot2::theme_minimal() +
    ggplot2::labs(
      x = xlab,
      y = ylab
    )

  return(bplot)
}

Try the beezdiscounting package in your browser

Any scripts or data that you put into this service are public.

beezdiscounting documentation built on April 4, 2025, 4:44 a.m.