R/LikertEZ.R

Defines functions rii_weighted rank_items cronbach_alpha plot_item summary_table_all summarize

Documented in cronbach_alpha plot_item rank_items rii_weighted summarize summary_table_all

#' @import ggplot2
#' @importFrom stats chisq.test median na.omit sd var
#' @importFrom utils head
NULL

# Suppress global variable NOTES from R CMD check
utils::globalVariables(c("Count", "Response"))

#' Summarize a Likert item
#'
#' This function calculates summary statistics for a Likert item, including
#' mean, median, mode, and performs a chi-square test.
#'
#' @param responses Numeric vector of responses.
#' @param max_scale The maximum scale value.
#' @param exact If TRUE, use exact Monte Carlo method.
#' @param B Number of simulations for Monte Carlo.
#' @param tidy If TRUE, returns a tidy data frame.
#' @return A list or data.frame with summary statistics.
#' @export
#' @examples
#' responses <- c(1, 2, 3, 4, 5, 4, 3, 2, NA)
#' summarize(responses)
summarize <- function(responses, max_scale = 5, exact = TRUE, B = 10000, tidy = FALSE) {
  responses <- na.omit(as.numeric(responses))
  N <- length(responses)

  freq_table <- table(factor(responses, levels = 1:max_scale))
  percent_table <- round(100 * prop.table(freq_table), 2)
  mode_val <- as.numeric(names(freq_table)[which.max(freq_table)])
  expected <- rep(sum(freq_table) / max_scale, max_scale)

  if (exact && any(expected < 5)) {
    chi_test <- chisq.test(freq_table, p = rep(1 / max_scale, max_scale), simulate.p.value = TRUE, B = B)
  } else {
    chi_test <- suppressWarnings(chisq.test(freq_table, p = rep(1 / max_scale, max_scale)))
  }

  weights <- as.numeric(names(freq_table))
  fi <- as.numeric(freq_table)
  RII <- sum(weights * fi) / (max_scale * sum(fi))

  result <- list(
    Summary = list(
      Mean = mean(responses),
      Median = median(responses),
      SD = sd(responses),
      Min = min(responses),
      Max = max(responses),
      Mode = mode_val,
      Missing = sum(is.na(responses))
    ),
    Counts = as.list(freq_table),
    Percentages = as.list(percent_table),
    RII = RII,
    Chi_Square = list(
      Statistic = unname(chi_test$statistic),
      DF = unname(chi_test$parameter),
      P_Value = unname(chi_test$p.value),
      Method = chi_test$method
    )
  )

  if (tidy) {
    df <- as.data.frame(result$Summary)
    names(df) <- names(result$Summary)
    return(df)
  }

  return(result)
}

#' Create a tidy summary table of all items
#'
#' This function generates a tidy summary table for all ordinal items in a data.frame. The table includes statistics such as mean, median, standard deviation, counts, and percentages.
#'
#' @param data A data.frame of ordinal items.
#' @param max_scale Max value on the Likert scale (default: 5).
#' @param scale_labels Optional vector of labels for each scale point.
#' @param decimals Number of decimal places for percentages (default: 2).
#'
#' @return A data.frame with summary statistics for all items.
#' @export
#' @examples
#' dat <- data.frame(Q1 = c(1, 2, 3, 4, 5), Q2 = c(2, 2, 3, 4, NA))
#' summary_table_all(dat)
summary_table_all <- function(data, max_scale = 5, scale_labels = NULL, decimals = 2) {
  all_items <- lapply(names(data), function(name) {
    result <- summarize(data[[name]], max_scale = max_scale)
    summary_stats <- result$Summary
    counts <- result$Counts
    percents <- result$Percentages

    df <- data.frame(
      Item = name,
      Mean = summary_stats$Mean,
      Median = summary_stats$Median,
      SD = summary_stats$SD,
      Min = summary_stats$Min,
      Max = summary_stats$Max,
      Mode = summary_stats$Mode
    )

    for (i in 1:max_scale) {
      label <- if (!is.null(scale_labels) && i <= length(scale_labels)) scale_labels[i] else paste0("cat", i)
      col_label <- paste0(label, "_N(%)")
      count <- counts[[as.character(i)]]
      percent <- format(round(percents[[as.character(i)]], decimals), nsmall = decimals)
      df[[col_label]] <- paste0(count, " (", percent, "%)")
    }

    return(df)
  })

  summary_df <- do.call(rbind, all_items)
  rownames(summary_df) <- NULL
  return(summary_df)
}

#' Barplot with RII annotation
#'
#' This function generates a barplot showing the distribution of responses for a single item, with the Relative Importance Index (RII) annotated.
#'
#' @param responses Numeric vector of ordinal responses.
#' @param max_scale Max Likert scale value (default: 5).
#' @param scale_labels Optional vector of labels for each scale point.
#'
#' @return A ggplot2 bar plot with RII annotation.
#' @export
#' @examples
#' responses <- c(1, 2, 3, 4, 5, 3, 2, 1, NA)
#' plot_item(responses)
plot_item <- function(responses, max_scale = 5, scale_labels = NULL) {
  responses <- na.omit(as.numeric(responses))
  df <- as.data.frame(table(factor(responses, levels = 1:max_scale)))
  names(df) <- c("Response", "Count")
  rii_val <- round(sum(as.numeric(df$Response) * df$Count) / (max_scale * sum(df$Count)), 3)

  if (!is.null(scale_labels)) {
    df$Response <- factor(df$Response, labels = scale_labels)
  }

  ggplot(df, aes(x = Response, y = Count)) +
    geom_bar(stat = "identity", fill = "steelblue") +
    geom_text(aes(label = Count), vjust = -0.5) +
    labs(title = paste("Response Distribution (RII:", rii_val, ")"),
         x = "Response", y = "Count") +
    theme_minimal()
}

#' Cronbach Alpha for a set of ordinal items
#'
#' This function calculates the Cronbach Alpha for a set of ordinal items to assess their reliability or internal consistency.
#'
#' @param data A data.frame with the ordinal items. Each column represents an item.
#'
#' @return The Cronbach alpha value as a numeric value between 0 and 1.
#' @export
cronbach_alpha <- function(data) {
  data <- na.omit(data)
  k <- ncol(data)
  item_var <- apply(data, 2, var)
  total_var <- var(rowSums(data))
  alpha <- (k / (k - 1)) * (1 - sum(item_var) / total_var)
  return(round(alpha, 3))
}

#' Rank items by RII or Mean
#'
#' This function ranks items in the data based on either the Relative Importance Index (RII) or the mean of responses.
#'
#' @param data A data.frame of ordinal items.
#' @param method Method to rank items. Either "rii" (for Relative Importance Index) or "mean" (for mean response).
#' @param max_scale Max Likert scale value (default: 5).
#' @param n Number of top items to return (default: 5).
#' @param top Logical. If TRUE, returns the top items, otherwise returns the bottom items (default: TRUE).
#'
#' @return A vector of ranked items.
#' @export
rank_items <- function(data, method = "rii", max_scale = 5, n = 5, top = TRUE) {
  results <- sapply(data, function(col) {
    col <- as.numeric(col)
    if (method == "rii") {
      freq <- table(factor(col, levels = 1:max_scale))
      weights <- as.numeric(names(freq))
      fi <- as.numeric(freq)
      sum(weights * fi) / (max_scale * sum(fi))
    } else {
      mean(col, na.rm = TRUE)
    }
  })
  sorted <- sort(results, decreasing = top)
  return(head(sorted, n))
}

#' Weighted RII Calculation
#'
#' This function computes the weighted Relative Importance Index (RII) for a set of ordinal responses with associated weights.
#'
#' @param responses Numeric vector of ordinal responses.
#' @param weights Numeric vector of weights for each response.
#' @param max_scale Max Likert scale value (default: 5).
#'
#' @return The weighted RII as a numeric value.
#' @export
rii_weighted <- function(responses, weights, max_scale = 5) {
  responses <- as.numeric(responses)
  weights <- as.numeric(weights)
  valid <- !is.na(responses) & !is.na(weights)
  responses <- responses[valid]
  weights <- weights[valid]
  weighted_sum <- sum(responses * weights)
  max_weighted <- sum(max_scale * weights)
  return(weighted_sum / max_weighted)
}

Try the LikertEZ package in your browser

Any scripts or data that you put into this service are public.

LikertEZ documentation built on April 3, 2025, 11:27 p.m.