R/prop_multi.R

Defines functions prop_multi

Documented in prop_multi

#' Compute the proportion of items for multiple non exclusive choices
#'
#' @description Uses an id variable and groupings to calculate proportions in a multiple-choice (checkbox) scenario.
#' I.e., answer proportions sum to more than 100% since more than a single option may be valid for the same user (id).
#'
#' @param x a tbl() to compute proportions, this can be a grouped tbl()
#' @param idvar the response id variable name as *string* (from which distinct
#'  respondents are to be computed)
#' @param ... Variables to group by.
#'
#' @return A tbl, grouped the same way as x, with the proportions computed out of the distinct
#'  respondents. If x is grouped before the ..., the total of the distinct respondents will be
#'  calculated out of level of the previous groupings and not from the whole x data.
#'
#' @details
#' The function uses the variables names specificed in ..., to conduct a grouping
#'
#' With a variable name in ...,  calculates proportions cases of variable out of the total n unique idvar
#' grouped by the variable.
#' with a variable name in ... and pre grouped data,
#'   calc proportions cases of variable out of the total n unique idvar in the pregrouped cathegories
#' grouped by the variable.
#'
#' @examples
#' multi_choice <- tribble(~id, ~choice, ~yr,
#'                         1, "A", 2017,
#'                         1, "B", 2017,
#'                         1, "A", 2018,
#'                         2, "A", 2019,
#'                         2, "C", 2019,
#'                         2, "A", 2017,
#'                         3, "A", 2017,
#'                         3, "A", 2018)
#'
#' # disregard the yr variable
#' multi_choice %>%
#'    prop_multi(choice, idvar = id)
#'
#' # add the yr variable as a pre-grouping variable and use it as well
#' multi_choice %>%
#'    group_by(yr) %>%
#'    prop_multi(choice, idvar = "id")
#'
#'
#' @seealso prop, add_prop
#' @importFrom magrittr %>%
#'
#' @export
prop_multi <- function(x, ..., idvar) {

  # make sure that there is a grouping in the function's arguments
  multy_args <- eval(substitute(alist(...)))
  if (length(multy_args)==0){
    stop("the function expects at least one grouping variable for which to calculate proportions")
  }

  # extract previous group_by on x
  prev_groups=groups(x)

  # As default use response ID as a grouping variable
  if (missing(idvar)){
    ID <- "response_id"
    idvar <- sym(ID)
  } else {
    ID <- idvar
    idvar <- enquo(ID)
  }


  distinct_people <- x %>%
    select(!!!prev_groups, !!idvar) %>%
    distinct() %>%
    count(name = "tot")

  if (is.null(prev_groups) | length(prev_groups)==0) {
    people <- distinct_people %>% pull(tot)

    x %>%
      select(!!!prev_groups, !!idvar, ...) %>%
      distinct() %>%
      group_by(...) %>%
      summarize(n=n()) %>%
      mutate(tot=people) %>%
      mutate(prop=n/tot)

  } else {

    s <- (paste(group_vars(x),collapse= ", "))
    warning(paste0("tot is calculated out of the pre-grouping levels of: \"", s,
                  "\" and \nnot from total unique values of \"",ID,"\""))

    x %>%
      select(!!!prev_groups, !!idvar, ...) %>%
      distinct() %>%
      group_by(!!!prev_groups,...) %>%
      summarize(n=n()) %>%
      left_join(distinct_people,by=group_vars(x)) %>%
      mutate(prop=n/tot)
  }
}
sarid-ins/saridr documentation built on Nov. 10, 2020, 9:07 p.m.