R/item.scores.R
In misty: Miscellaneous Functions 'T. Yanagida'

Documented in item.scores

#' Scale Scores
#'
#' This function computes (prorated) scale scores by averaging the (available)
#' items that measure a single construct by default.
#'
#' Prorated mean scale scores are computed by averaging the available items, e.g.,
#' if a participant answers 4 out of 8 items, the prorated scale score is the
#' average of the 4 responses. Averaging the available items is equivalent to
#' substituting the mean of a participant's own observed items for each of the
#' participant's missing items, i.e., \emph{person mean imputation} (Mazza, Enders
#' & Ruehlman, 2015) or \emph{ipsative mean imputation} (Schafer & Graham, 2002).
#'
#' Proration may be reasonable when (1) a relatively high proportion of the items
#' (e.g., 0.8) and never fewer than half are used to form the scale score, (2)
#' means of the items comprising a scale are similar and (3) the item-total
#' correlations are similar (Enders, 2010; Graham, 2009; Graham, 2012). Results
#' of simulation studies indicate that proration is prone to substantial bias
#' when either the item means or the inter-item correlation vary (Lee, Bartholow,
#' McCarthy, Pederson & Sher, 2014; Mazza et al., 2015).
#'
#' @param data     a data frame with numeric vectors.
#' @param ...      an expression indicating the variable names in \code{data},
#'                 e.g., \code{item.scores(dat, x1, x2, x3)}. Note that the
#'                 operators \code{.}, \code{+}, \code{-}, \code{~}, \code{:},
#'                 \code{::}, and \code{!} can also be used to select variables,
#'                 see 'Details' in the \code{\link{df.subset}} function.
#' @param fun      a character string indicating the function used to compute
#'                 scale scores, default: \code{"mean"}.
#' @param prorated logical: if \code{TRUE} (default), prorated scale scores are
#'                 computed (see 'Details'); if \code{FALSE}, scale scores of
#'                 only complete cases are computed.
#' @param p.avail  a numeric value indicating the minimum proportion of available
#'                 item responses needed for computing a prorated scale score for
#'                 each case, e.g. \code{p.avail = 0.8} indicates that scale scores
#'                 are only computed for cases with at least 80\% of item responses
#'                 available. By default prorated scale scores are computed for
#'                 all cases with at least one item response. Note that either
#'                 argument \code{p.avail} or \code{n.avail} is used to specify
#'                 the proration criterion.
#' @param n.avail  an integer indicating the minimum number of available item
#'                 responses needed for computing a prorated scale score for each
#'                 case, e.g. \code{n.avail = 2} indicates that scale scores are
#'                 only computed for cases with item responses on at least 2 items.
#'                 By default prorated scale scores are computed for all cases
#'                 with at least one item response. Note that either argument
#'                 \code{p.avail} or \code{n.avail} is used to specify the proration
#'                 criterion.
#' @param append   logical: if \code{TRUE} (default), a variable with scale scores
#'                 is appended to the data frame specified in the argument \code{data}.
#' @param name     a character string indicating the names of the variable appended
#'                 to the data frame specified in the argument \code{data} when
#'                 \code{append = TRUE}. By default, the variable is named \code{scores}.
#' @param as.na    a numeric vector indicating user-defined missing values,
#'                 i.e. these values are converted to \code{NA} before conducting
#'                 the analysis.
#' @param check    logical: if \code{TRUE} (default), argument specification is checked.
#'
#' @author
#' Takuya Yanagida \email{takuya.yanagida@@univie.ac.at}
#'
#' @seealso
#' \code{\link{cluster.scores}}, \code{\link{item.alpha}}, \code{\link{item.cfa}},
#' \code{\link{item.omega}},
#'
#' @references
#' Enders, C. K. (2010). \emph{Applied missing data analysis}. New York, NY: Guilford
#' Press.
#'
#' Graham, J. W. (2009). Missing data analysis: Making it work in the real world.
#' \emph{Annual Review of Psychology, 60}, 549-576.
#' https://doi.org/10.1146/annurev.psych.58.110405.085530
#'
#' Graham, J. W. (2012). Missing data: Analysis and design. New York, NY: Springer
#'
#' Lee, M. R., Bartholow, B. D., McCarhy, D. M., Pederson, S. L., & Sher, K. J. (2014).
#' Two alternative approaches to conventional person-mean imputation scoring of the
#' self-rating of the effects of alcohol scale (SRE). \emph{Psychology of Addictive Behaviors, 29},
#' 231-236. https://doi.org/10.1037/adb0000015
#'
#' Mazza, G. L., Enders, C. G., & Ruehlman, L. S. (2015). Addressing item-level missing
#' data: A comparison of proration and full information maximum likelihood estimation.
#' \emph{Multivariate Behavioral Research, 50}, 504-519.
#' https://doi.org/10.1080/00273171.2015.1068157
#'
#' Schafer, J. L., & Graham, J. W. (2002). Missing data: Our view of the state of
#' the art. \emph{Psychological Methods, 7}, 147-177.' https://doi.org/10.1037/1082-989X.7.2.147
#'
#' @return
#' Returns a numeric vector with the same length as \code{nrow(x)} containing (prorated)
#' scale scores.
#'
#' @export
#'
#' @examples
#' dat <- data.frame(item1 = c(3,  2,  4, 1,  5, 1,  3, NA),
#'                   item2 = c(2,  2, NA, 2,  4, 2, NA,  1),
#'                   item3 = c(1,  1,  2, 2,  4, 3, NA, NA),
#'                   item4 = c(4,  2,  4, 4, NA, 2, NA, NA),
#'                   item5 = c(3, NA, NA, 2,  4, 3, NA,  3))
#'
#' # Example 1: Prorated mean scale scores
#' item.scores(dat)
#'
#' # Example 2: Prorated standard deviation scale scores
#' item.scores(dat, fun = "sd")
#'
#' # Example 3: Sum scale scores without proration
#' item.scores(dat, fun = "sum", prorated = FALSE)
#'
#' # Example 4: Prorated mean scale scores,
#' # minimum proportion of available item responses = 0.8
#' item.scores(dat, p.avail = 0.8)
#'
#' # Example 5: Prorated mean scale scores,
#' # minimum number of available item responses = 3
#' item.scores(dat, n.avail = 3)
item.scores <- function(data, ..., fun = c("mean", "sum", "median", "var", "sd", "min", "max"),
                        prorated = TRUE, p.avail = NULL, n.avail = NULL,
                        append = TRUE, name = "scores", as.na = NULL, check = TRUE) {

  #_____________________________________________________________________________
  #
  # Initial Check --------------------------------------------------------------

  # Check if input 'data' is missing
  if (isTRUE(missing(data))) { stop("Please specify a data frame for the argument 'data'", call. = FALSE) }

  # Check if input 'data' is NULL
  if (isTRUE(is.null(data))) { stop("Input specified for the argument 'data' is NULL.", call. = FALSE) }

  #_____________________________________________________________________________
  #
  # Data -----------------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Data using the argument 'data' ####

  if (isTRUE(!missing(...))) {

    # Extract data and convert tibble into data frame or vector
    x <- as.data.frame(data[, .var.names(..., data = data)])

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Data without using the argument 'data' ####

  } else {

    # Data frame
    x <- as.data.frame(data)

  }

  #_____________________________________________________________________________
  #
  # Input Check ----------------------------------------------------------------

  # Check inputs
  .check.input(logical = c("prorated", "append"),
               numeric = list(p.avail = 1L, n.avail = 1L), character = list(name = 1L),
               s.character = list(fun = c("mean", "sum", "median", "var", "sd", "min", "max")), envir = environment(), input.check = check)

  # Additional checks
  if (isTRUE(check)) {

    # Check input 'data'
    if (isTRUE(!is.matrix(x) && !is.data.frame(x))) { stop("Please specify a data frame for the argument 'data'.", call. = FALSE) }

    # Check input 'data'
    if (isTRUE(any(apply(x, 2L, function(y) !is.numeric(y))))) { stop("Please specify a data frame with numeric vectors for the argument 'data'.", call. = FALSE) }

    # Check argument p.avail
    if (isTRUE(!is.null(p.avail) && !is.null(n.avail))) { stop("Please specify either argument 'p.avail' or 'n.avail', but not both arguments.", call. = FALSE) }

    # Check argument p.avail and prorated
    if (isTRUE(!prorated && (!is.null(p.avail) | !is.null(n.avail)))) { warning("Argument specification 'prorated = FALSE' is equivalent to 'p.avail = 1' or 'n.avail = ncol(x).", call. = FALSE) }

   }

  #_____________________________________________________________________________
  #
  # Arguments ------------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Function used to compute scale scores ####

  fun <- ifelse(all(c("mean", "sum", "median", "var", "sd", "min", "max") %in% fun), "mean", fun)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Number of item responses ####

  x.miss <- apply(x, 1L, function(y) sum(!is.na(y)))

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Proration ####

  if (isTRUE(prorated)) {

    # avail = NULL, n.avail = NULL
    if (isTRUE(is.null(p.avail) && is.null(n.avail))) {

      # Thresdhold for proraton = p.avail
      n.items <- 1L

    }


    if (isTRUE(!is.null(p.avail) && is.null(n.avail))) {

      # Thresdhold for proraton = p.avail
      n.items <- ceiling(p.avail * ncol(x))

    }


    if (isTRUE(is.null(p.avail) && !is.null(n.avail))) {

      # Thresdhold for proraton = n.avail
      n.items <- n.avail

    }

  } else {

    # Thresdhold for proration = 1
    n.items <- ncol(x)

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Convert user-missing values into NA ####

  if (isTRUE(!is.null(as.na))) { x <- .as.na(x, na = as.na) }

  #_____________________________________________________________________________
  #
  # Main Function --------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Mean ####
  switch(fun, mean = {

    object <- misty::as.na(rowMeans(x, na.rm = TRUE), na = "NaN", check = FALSE)
    object[which(x.miss < n.items)] <- NA

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Sum ####
  }, sum = {

    object <- misty::as.na(rowMeans(x, na.rm = TRUE)*ncol(x), na = "NaN", check = FALSE)
    object[which(x.miss < n.items)] <- NA

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Median ####
  }, median = {

    object <- apply(x, 1L, function(y) ifelse(length(na.omit(y)) > 0L, median(y, na.rm = TRUE), NA))
    object[which(x.miss < n.items)] <- NA

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Variance ####
  }, var = {

    object <- apply(x, 1L, function(y) ifelse(length(na.omit(y)) > 0L, var(y, na.rm = TRUE), NA))
    object[which(x.miss < n.items)] <- NA

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Standard deviation ####
  }, sd = {

    object <- apply(x, 1L, function(y) ifelse(length(na.omit(y)) > 0L, sd(y, na.rm = TRUE), NA))
    object[which(x.miss < n.items)] <- NA

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Minimum ####
  }, min = {

    object <- apply(x, 1L, function(y) ifelse(length(na.omit(y)) > 0L, min(y, na.rm = TRUE), NA))
    object[which(x.miss < n.items)] <- NA

  #-----------------------------------------
  # Maximum
  }, max = {

    object <- apply(x, 1L, function(y) ifelse(length(na.omit(y)) > 0L, max(y, na.rm = TRUE), NA))
    object[which(x.miss < n.items)] <- NA

  })

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Append ####

  if (isTRUE(!missing(...) && append)) { object <- data.frame(data,  setNames(as.data.frame(object), nm = name)) }

  #_____________________________________________________________________________
  #
  # Output ---------------------------------------------------------------------

  return(object)

}

#_______________________________________________________________________________