R/row_sums.R

Defines functions row_mids row_means.mids row_means.default total_mean.data.frame total_mean row_means row_sums.mids row_sums.default row_sums

Documented in row_means row_means.default row_means.mids row_sums row_sums.default row_sums.mids total_mean

#' @title Row sums and means for data frames
#' @name row_sums
#'
#' @description \code{row_sums()} and \code{row_means()} compute row sums or means
#'    for at least \code{n} valid values per row. The functions are designed
#'    to work nicely within a pipe-workflow and allow select-helpers
#'    for selecting variables.
#'
#' @param n May either be
#'          \itemize{
#'            \item a numeric value that indicates the amount of valid values per row to calculate the row mean or sum;
#'            \item a value between 0 and 1, indicating a proportion of valid values per row to calculate the row mean or sum (see 'Details').
#'            \item or \code{Inf}. If \code{n = Inf}, all values per row must be non-missing to compute row mean or sum.
#'          }
#'          If a row's sum of valid (i.e. non-\code{NA}) values is less than \code{n}, \code{NA} will be returned as value for the row mean or sum.
#' @param var Name of new the variable with the row sums or means.
#'
#' @inheritParams to_dummy
#' @inheritParams rec
#'
#' @return For \code{row_sums()}, a data frame with a new variable: the row sums from
#'    \code{x}; for \code{row_means()}, a data frame with a new variable: the row
#'    means from \code{x}. If \code{append = FALSE}, only the new variable
#'    with row sums resp. row means is returned. \code{total_mean()} returns
#'    the mean of all values from all specified columns in a data frame.
#'
#' @details For \code{n}, must be a numeric value from \code{0} to \code{ncol(x)}. If
#'    a \emph{row} in \code{x} has at least \code{n} non-missing values, the
#'    row mean or sum is returned. If \code{n} is a non-integer value from 0 to 1,
#'    \code{n} is considered to indicate the proportion of necessary non-missing
#'    values per row. E.g., if \code{n = .75}, a row must have at least \code{ncol(x) * n}
#'    non-missing values for the row mean or sum to be calculated. See 'Examples'.
#'
#' @examples
#' data(efc)
#' efc %>% row_sums(c82cop1:c90cop9, n = 3, append = FALSE)
#'
#' library(dplyr)
#' row_sums(efc, contains("cop"), n = 2, append = FALSE)
#'
#' dat <- data.frame(
#'   c1 = c(1,2,NA,4),
#'   c2 = c(NA,2,NA,5),
#'   c3 = c(NA,4,NA,NA),
#'   c4 = c(2,3,7,8),
#'   c5 = c(1,7,5,3)
#' )
#' dat
#'
#' row_means(dat, n = 4)
#' row_sums(dat, n = 4)
#'
#' row_means(dat, c1:c4, n = 4)
#' # at least 40% non-missing
#' row_means(dat, c1:c4, n = .4)
#' row_sums(dat, c1:c4, n = .4)
#'
#' # total mean of all values in the data frame
#' total_mean(dat)
#'
#' # create sum-score of COPE-Index, and append to data
#' efc %>%
#'   select(c82cop1:c90cop9) %>%
#'   row_sums(n = 1)
#'
#' # if data frame has only one column, this column is returned
#' row_sums(dat[, 1, drop = FALSE], n = 0)
#'
#' @export
row_sums <- function(x, ...) {
  UseMethod("row_sums")
}


#' @rdname row_sums
#' @export
row_sums.default <- function(x, ..., n, var = "rowsums", append = TRUE) {
  # evaluate arguments, generate data
  .dat <- get_dot_data(x, dplyr::quos(...))

  # remember original data, if user wants to bind columns
  orix <- x

  if (is.data.frame(x)) {

    # for Inf-values, use all columns
    if (is.infinite(n)) n <- ncol(.dat)

    # is 'n' indicating a proportion?
    digs <- n %% 1
    if (digs != 0) n <- round(ncol(.dat) * digs)

    # check if we have a data frame with at least two columns
    if (ncol(.dat) < 2) {
      message("No row-sums are returned, because `x` has only one column.", call. = TRUE)
      colnames(orix) <- var
      return(orix)
    }

    # n may not be larger as df's amount of columns
    if (ncol(.dat) < n) {
      warning("`n` must be smaller or equal to number of columns in data frame.", call. = TRUE)
      return(orix)
    }

    rs <- apply(.dat, 1, function(x) ifelse(sum(!is.na(x)) >= n, sum(x, na.rm = TRUE), NA))
  } else {
    stop("`x` must be a data frame.", call. = FALSE)
  }


  # to data frame, and rename variable
  rs <- as.data.frame(rs)
  colnames(rs) <- var

  # combine data
  if (append) rs <- dplyr::bind_cols(orix, rs)

  rs
}


#' @rdname row_sums
#' @export
row_sums.mids <- function(x, ..., var = "rowsums", append = TRUE) {
  rfun <- rowSums
  row_mids(x = x, ..., var = var, append = append, rfun = rfun)
}


#' @rdname row_sums
#' @export
row_means <- function(x, ...) {
  UseMethod("row_means")
}


#' @rdname row_sums
#' @export
total_mean <- function(x, ...) {
  UseMethod("total_mean")
}


#' @export
total_mean.data.frame <- function(x, ...) {
  # evaluate arguments, generate data
  .dat <- get_dot_data(x, dplyr::quos(...))
  sum(colSums(.dat, na.rm = TRUE)) / sum(apply(.dat, 1:2, function(x) !is.na(x)))
}


#' @rdname row_sums
#' @export
row_means.default <- function(x, ..., n, var = "rowmeans", append = TRUE) {
  # evaluate arguments, generate data
  .dat <- get_dot_data(x, dplyr::quos(...))

  # remember original data, if user wants to bind columns
  orix <- x

  if (is.data.frame(x)) {
    # for Inf-values, use all columns
    if (is.infinite(n)) n <- ncol(.dat)

    # is 'n' indicating a proportion?
    digs <- n %% 1
    if (digs != 0) n <- round(ncol(.dat) * digs)

    # check if we have a data frame with at least two columns
    if (ncol(.dat) < 2) {
      message("No row-means are returned, because `x` has only one column.", call. = TRUE)
      colnames(orix) <- var
      return(orix)
    }

    # n may not be larger as df's amount of columns
    if (ncol(.dat) < n) {
      warning("`n` must be smaller or equal to number of columns in data frame.", call. = TRUE)
      return(orix)
    }

    rm <- apply(.dat, 1, function(x) ifelse(sum(!is.na(x)) >= n, mean(x, na.rm = TRUE), NA))

  } else {
    stop("`x` must be a data frame.", call. = FALSE)
  }

  # to data frame, and rename variable
  rm <- as.data.frame(rm)
  colnames(rm) <- var

  # combine data
  if (append) rm <- dplyr::bind_cols(orix, rm)

  rm
}


#' @rdname row_sums
#' @export
row_means.mids <- function(x, ..., var = "rowmeans", append = TRUE) {
  rfun <- rowMeans
  row_mids(x = x, ..., var = var, append = append, rfun = rfun)
}


row_mids <- function(x, ..., var, append, rfun, count = NULL) {
  # check if suggested package is available
  if (!requireNamespace("mice", quietly = TRUE))
    stop("Package `mice` needed for this function to work. Please install it.", call. = FALSE)

  # check classes
  if (!inherits(x, "mids"))
    stop("`x` must be a `mids`-object, as returned by the `mice()`-function.", call. = FALSE)


  # quote dots and convert mids into long-data.frame

  vars <- dplyr::quos(...)
  long <- mice::complete(x, action = "long", include = TRUE)


  # group by imputation, so we can easily iterate each imputed dataset

  ndf <- long %>%
    dplyr::group_by(.data$.imp) %>%
    .nest()


  # select variable and compute rowsums. add this variable
  # to each imputed

  if (is.null(count)) {
    ndf$data <- purrr::map(ndf$data, ~ mutate(
      .x,
      rowsums = .x %>%
        dplyr::select(!!! vars) %>%
        rfun()
    ))
  } else {
    ndf$data <- purrr::map(ndf$data, ~ mutate(
      .x,
      rowsums = .x %>%
        dplyr::select(!!! vars) %>%
        rfun(count = count)
    ))
  }


  # rename new variable

  ndf$data <- purrr::map(ndf$data, function(.x) {
    colnames(.x)[ncol(.x)] <- var
    .x
  })


  # return mids-object. need to use "as.data.frame()",
  # because "as.mids()" can't cope with tibbles

  ndf %>%
    .unnest() %>%
    as.data.frame() %>%
    mice::as.mids()
}
strengejacke/sjmisc documentation built on June 29, 2023, 4:28 p.m.