R/tabbook-additions.R

Defines functions extToContentType getCatalog tabBookWeightSpec tabBookMulti_crunchtabs tabBookResult download_result varFilter tabBookSingle_crunchtabs tabBook_crunchtabs resultsObject nonTabBookSummary.TextVariable nonTabBookSummary.DatetimeVariable nonTabBookSummary.NumericVariable nonTabBookSummary.default nonTabBookSummary reflowQuestionNumbers

Documented in nonTabBookSummary nonTabBookSummary.DatetimeVariable nonTabBookSummary.default nonTabBookSummary.NumericVariable nonTabBookSummary.TextVariable reflowQuestionNumbers resultsObject tabBook_crunchtabs tabBookWeightSpec

#' Reflow Question Numbers
#'
#' When we manually add summaries of
#' numeric, datetime or text variables
#' we must "reflow" the question numbers
#' so that they match dataset order.
#'
#' @param x A results object from within the \link{crosstabs} function.
reflowQuestionNumbers <- function(x) {
  for (i in seq_along(x)) {
    x[[i]]$number <- i
  }
  x
}

#' Prepare Summary Content
#'
#' Prepare summary content for toplines for classes that
#' are not covered by tabBook such as NumericVariable, DatetimeVariables
#' and TextVariable
#'
#' @param x A variable of class NumericVariable, DatetimeVariable or TextVariable
#' @param weighted Logical. Are these data weighted?
#' @param num The number of verbatim responses to present as a sample. Defaults to 10.
#' @param tz A timezone. Defaults to UTC.
#' @param ... Additional arguments passed to methods
#' @export
nonTabBookSummary <- function(x, ...) {
  UseMethod("nonTabBookSummary", x)
}

#' @rdname nonTabBookSummary
#' @export
nonTabBookSummary.default <- function(x, weighted = TRUE, num = 10, tz = "UTC", ...) {
  wrong_class_error(
    x,
    c(
      "CategoricalVariable",
      "CategoricalArrayVariable",
      "MultipleResponseVariable",
      "TextVariable",
      "NumericVariable",
      "DatetimeVariable"
    ), "nonTabBookSummary"
  )
}

#' Prepare Numeric Content
#'
#' \link[crunch]{tabBook} does not report an appropriate numeric summary
#' without being provided with a multitable. So we "fake" a numeric summary
#' by overwriting the structure of a categorical object.
#'
#' If data are weighted we display Weighted N instead of Unweighted
#' N
#'
#' @param x A variable of class \link[crunch]{NumericVariable}
#' @inheritParams nonTabBookSummary
#' @importFrom stats median quantile
#' @export
nonTabBookSummary.NumericVariable <- function(x, weighted = TRUE, num = 10, tz = "UTC", ...) { # nolint
  y <- as.vector(x)
  qt <- quantile(y, na.rm = TRUE)
  minima <- min(y, na.rm = TRUE)
  maxima <- max(y, na.rm = TRUE)
  half <- median(y, na.rm = TRUE)
  mu <- mean(y, na.rm = TRUE)
  firstq <- qt[2]
  thirdq <- qt[4]
  stdev <- sd(y, na.rm = TRUE)

  # Mock the content object

  obj <- resultsObject(
    x,
    top = NULL,
    weighted = weighted,
    body_values = c(minima, firstq, half, mu, thirdq, maxima, stdev),
    body_labels = c(
      "Minimum",
      "1st Quartile",
      "Median",
      "Mean",
      "3rd Quartile",
      "Maximum",
      "Standard Deviation"
    ),
    vector = y
  )

  obj
}

#' Prepare Datetime Content
#'
#' tabBook does not report an appropriate date time summary without
#' being provided with a multitable. So we "fake" a date time summary
#' by overwriting the structure of a categorical object.
#'
#' If data are weighted we display Weighted N instead of Unweighted
#' N
#'
#' @param x A variable of class \link[crunch]{DatetimeVariable}
#' @inheritParams nonTabBookSummary
#' @export
nonTabBookSummary.DatetimeVariable <- function(x, weighted = TRUE, num = 10, tz = "UTC", ...) { # nolint
  y <- as.POSIXct(as.vector(x))
  qt <- quantile(y, na.rm = TRUE)
  minima <- min(y, na.rm = TRUE)
  maxima <- max(y, na.rm = TRUE)
  half <- median(y, na.rm = TRUE)
  firstq <- qt[2]
  thirdq <- qt[4]

  # Mock the content object
  obj <- resultsObject(
    x,
    top = NULL,
    weighted = weighted,
    body_values = c(minima, firstq, half, thirdq, maxima),
    body_labels = c(
      "Minimum",
      "1st Quartile",
      "Median",
      "3rd Quartile",
      "Maximum"
    ),
    vector = y
  )

  obj
}

#' Prepare Text Content
#'
#' tabBook does not report an appropriate date time summary without
#' being provided with a multitable. So we "fake" a date time summary
#' by overwriting the structure of a categorical object.
#'
#' If data are weighted we display Weighted N instead of Unweighted
#' N
#'
#' @param x A variable of class \link[crunch]{TextVariable}
#' @inheritParams nonTabBookSummary
#' @export
nonTabBookSummary.TextVariable <- function(x, weighted = TRUE, num = 10, tz = "UTC", ...) { # nolint
  set.seed(42)
  y <- as.vector(x)
  z <- sort(sample(unique(y[!is.na(y)]), num, replace = FALSE))

  # Mock the content object
  obj <- resultsObject(
    x,
    weighted = weighted,
    body_values = rep("", length(z)),
    body_labels = z,
    vector = y
  )

  obj
}


#' Generic Results Object
#'
#' As \link[crunch]{tabBook} does not provide us with a way to  create summaries
#' for some variable types we are forced to create an object that bypasses
#' the reformatVar function. Our goal is to use as much of the
#' code infrastructure for theming purposes as possible while
#' allowing the creation of new topline summary objects
#'
#' @param x A dataset variable
#' @param top The top of the results object. NULL by default
#' @param weighted Logical. Are these data weighted?
#' @param body_values The values to present
#' @param body_labels The labels to present
#' @param vector The data vector
resultsObject <- function(x, top = NULL, weighted, body_values, body_labels, vector) {
  stopifnot(length(body_values) == length(body_labels))

  top <- top
  data_list <- list()
  data_list$body <- data.frame(
    x = body_values,
    row.names = body_labels
  )
  names(data_list$body) <- NA_character_

  # Presentation differences if data are
  # weighted or unweighted

  if (weighted) {
    data_list$weighted_n <- data.frame(
      x = sum(!is.na(vector)),
      row.names = "Weighted N"
    )
    names(data_list$weighted_n) <- NA_character_

    bottom <- c(weighted_n = "weighted_n")
    data_order <- c("body", weighted_n = "weighted_n")
  } else {
    data_list$unweighted_n <- data.frame(
      x = sum(!is.na(vector)),
      row.names = "Unweighted N"
    )
    names(data_list$unweighted_n) <- NA_character_

    bottom <- c(unweighted_n = "unweighted_n")
    data_order <- c("body", unweighted_n = "unweighted_n")
  }

  structure(
    list(
      alias = crunch::alias(x),
      name = crunch::name(x),
      description = ifelse(
        crunch::description(x) == "",
        crunch::name(x),
        crunch::description(x)
      ),
      notes = crunch::notes(x),
      type = class(x)[1],
      top = NULL,
      bottom = bottom,
      data_order = data_order,
      inserts = rep("Category", length(body_values)),
      data_list = data_list,
      min_cell_top = NULL,
      no_totals = TRUE,
      mean_median = FALSE,
      min_cell_body = matrix(rep(NA, length(body_values))),
      min_cell_bottom = matrix(FALSE),
      min_cell = FALSE,
      rownames = c(body_labels, ifelse(weighted, "Weighted N", "Unweighted N"))
    ),
    class = c("ToplineVar", "CrossTabVar")
  )
}

#' Compute a Tab Book
#'
#' This function allows you to generate a tab book from a multitable and data.
#' As with other functions, you can select the rows and columns you want to
#' work with by subsetting the `dataset` you pass into the function.
#'
#' By specifying a "json" `format`, instead of generating an Excel
#' workbook, you'll get a TabBookResult object, containing nested CrunchCube
#' results. You can then further format these and construct custom tab reports.
#'
#' Tabbook pages are organized in the order the variables are stored by the
#' server, unless complex weights are specified, in which case, the variables
#' are sorted in the order of the weight specification dataset (like the
#' one created by `tabBookWeightSpec()`).
#'
#' @param multitable a `Multitable` object
#' @param dataset CrunchDataset, which may be subset with a filter expression
#' on the rows, and a selection of variables to use on the columns.
#' @param weight a CrunchVariable that has been designated as a potential
#' weight variable for `dataset`, or `NULL` for unweighted results.
#' Default is the currently applied [`weight`]. Additionally, weights can be
#' set on a per variable basis for json export only. To do so, specify the weight
#' as either a list (which will be passed to `tabBookWeightSpec()`, or a data.frame
#' that mimics the structure. See [`tabBookWeightSpec()`] for more details.
#' generated from the `multitable`'s name if one is not supplied and the
#' "xlsx" format is requested. Not required for "json" format export.
#' of \code{\link{filters}} defined in the dataset.
#' @param append_default_wt passed to [`tabBookWeightSpec()`] if `weight` is a list
#' @return If "json" format is requested, the function returns an object of
#' class `TabBookResult`, containing a list of `MultitableResult`
#' objects, which themselves contain `CrunchCube`s. For single weight tabbook,
#' the variables are always sorted in the order the server stores them in,
#' but complex weights are sorted in the order of the data.frame given. If
#' "xlsx" is requested, the function invisibly returns the filename (`file`,
#' if specified, or the the autogenerated file name). If you request "json" and
#' wish to access the JSON data underlying the `TabBookResult`, pass in a path
#' for `file` and you will get a JSON file written there as well.
#' @examples
#' \dontrun{
#' m <- newMultitable(~ gender + age4 + marstat, data = ds)
#' tabBook(m, ds, format = "xlsx", file = "wealthy-tab-book.xlsx", filter = "wealthy")
#' book <- tabBook(m, ds) # Returns a TabBookResult
#' tables <- prop.table(book, 2)
#' }
#' @importFrom jsonlite fromJSON
#' @export
tabBook_crunchtabs <- function(multitable, dataset, weight = crunch::weight(dataset),
                               append_default_wt = TRUE) {
  if (is.null(weight) | is.variable(weight)) {
    return(tabBookSingle_crunchtabs(multitable, dataset, weight))
  } else if (is.list(weight) || is.data.frame(weight)) {
    return(tabBookMulti_crunchtabs(
      multitable,
      dataset,
      weight,
      append_default_wt
    ))
  } else {
    stop("weight must be NULL, a CrunchVariable or a list indicating a multi-weight spec")
  }
}

tabBookSingle_crunchtabs <- function(multitable, dataset, weight) {
  if (!is.null(weight)) {
    weight <- self(weight)
  }
  # filter <- standardize_tabbook_filter(dataset, filter)
  body <- list(
    filter = NULL,
    weight = weight,
    options = list(format = NULL)
  )

  body$where <- varFilter(dataset)

  tabbook_url <- crunch::shojiURL(multitable, "views", "export")

  ## POST the query, which (after progress polling) returns a URL to download
  result <- crunch::crPOST(tabbook_url,
    config = httr::add_headers(`Accept` = "application/json"),
    body = crunch::toJSON(body)
  )

  out <- download_result(result)
  TabBookResult <- utils::getFromNamespace("TabBookResult", "crunch")
  return(TabBookResult(out))
}


varFilter <- function(dataset) {
  variablesFilter <- utils::getFromNamespace("variablesFilter", "crunch")
  variablesFilter(dataset)
}

download_result <- function(result) {
  retry <- utils::getFromNamespace("retry", "crunch")
  retry(crunch::crGET(result), wait = 0.5) # For mocks
}

tabBookResult <- function(...) {
  TabBookResult <- utils::getFromNamespace("TabBookResult", "crunch")
  TabBookResult(...) # For mocks
}

#' @importFrom stats ave
#' @importFrom utils stack
tabBookMulti_crunchtabs <- function(
                                    multitable,
                                    dataset,
                                    weight_spec,
                                    append_default_wt) {
  if (length(weight_spec) == 0) {
    stop("Empty list not allowed as a weight spec, use NULL to indicate no weights")
  }

  if (is.data.frame(weight_spec) && !setequal(names(weight_spec), c("weight", "alias"))) {
    stop("if weight_spec is a data.frame it must have exactly two columns: 'weight' & 'alias'")
  }

  if (any(duplicated(weight_spec))) {
    stop("Found duplicate weight and alias combinations in weight_spec")
  }

  # We can't trust that weight variables are included in the dataset subset
  # that we are using for the tabbook, so we need to load the full variable list
  # NB: The `relative=on` is to get a cache hit, and might need to change
  # (comes from `variablesFilter()`)


  if (is.list(weight_spec)) weight_spec <- tabBookWeightSpec(
    dataset, weights = weight_spec, append_default_wt)

  wt_vars <- unique(weight_spec$weight)
  # Add a column that indicates what page the variable will be on
  # in the weight-specific tabbook
  weight_spec$page_num <- as.numeric(ave(weight_spec$weight, weight_spec$weight, FUN = seq_along))

  books <- lapply(wt_vars, function(wt) {
    page_vars <- weight_spec$alias[weight_spec$weight == wt]
    tabBookSingle_crunchtabs(
      multitable,
      dataset[page_vars],
      weight = dataset[[wt]]
    )
  })

  names(books) <- wt_vars

  # stitch together
  # Most of the objects should be the same because they come from the same multitable
  # But the analyses object contain an item per variable with the weight included
  # and then the pages section contains the cube results
  analyses <- mapply(
    weight = weight_spec$weight,
    page_num = weight_spec$page_num,
    FUN = function(weight, page_num) {
      books[[which(names(books) == weight)]]@.Data[[1]]$analyses[[page_num]]
    },
    SIMPLIFY = FALSE,
    USE.NAMES = FALSE
  )
  pages <- mapply(
    weight = weight_spec$weight,
    page_num = weight_spec$page_num,
    FUN = function(weight, page_num) {
      books[[which(names(books) == weight)]]@.Data[[2]][[page_num]]
    },
    SIMPLIFY = FALSE,
    USE.NAMES = FALSE
  )

  combined <- books[[1]] # start with first one for skeleton
  combined@.Data[[1]]$analyses <- analyses
  combined@.Data[[2]] <- pages
  combined
  return(combined)
}


#' Helper function for setting complex weights on a `tabbook`
#'
#' For json [`tabBook()`], you can specify a weight per variable in the
#' dataset, where each row in the data.frame indicates a weight and
#' alias to use for each page.
#'
#' @param dataset A `CrunchDataset`
#' @param weights A list where each item has a name that indicates the
#' weight's alias that should be use (no name indicates unweighted) and
#' each item is a vector of variable aliases to include as pages in the
#' `tabbook`.
#' @param append_default_wt Whether to append the dataset's default weight
#' (or unweighted pages if no weight is set) for all variables.
#'
#' @return A data.frame with two columns, `weight`, the alias of the weight to use,
#' and alias, the alias of the variable to use the weight on. If `append_default_wt`
#' is `TRUE`, the returned object is sorted in the order of aliases in the dataset,
#' and with the default weight first, followed by the weights specified in the `weights`
#' argument.
#' @export
#'
#' @examples
#' \dontrun{
#' ds <- newExampleDataset("pets")
#' mt <- newMultitable(~q1, ds)
#'
#' weight_spec <- tabBookWeightSpec(
#'   ds,
#'   list(wt1 = "gender", wt2 = "starttime", "gender")
#' )
#'
#' # Now can use the weight spec in `tabBook()`
#' tabbook <- tabBook(mt, ds, weight = weight_spec)
#' }
tabBookWeightSpec <- function(dataset, weights, append_default_wt = TRUE) {
  weight_df <- stack(weights)
  names(weight_df) <- c("alias", "weight")
  # stack does mostly what we want, but we don't want factor
  weight_df$weight <- as.character(weight_df$weight)

  # If we don't need to append the default weights, we're done
  if (!append_default_wt) {
    return(weight_df)
  }

  default_weight <- if (is.null(weight(dataset))) "" else alias(weight(dataset))
  default_weight_df <- data.frame(
    alias = names(dataset),
    weight = default_weight,
    stringsAsFactors = FALSE
  )

  # Combine, but reorder so that the variables are in the same order as they are in the
  # original dataset, with the default weight first and then the weights
  # from the list are ordered after in the order they came in
  default_weight_df$wt_pos <- 0
  weight_df$wt_pos <- seq_len(nrow(weight_df))

  out <- rbind(weight_df, default_weight_df)
  out <- out[order(match(out$alias, names(dataset)), out$wt_pos), ]
  out$wt_pos <- NULL
  row.names(out) <- NULL # Really just for testing purposes

  duplicated <- duplicated(out)
  if (any(duplicated)) {
    warning("Dropping duplicated alias & weight combinations")
    out <- out[!duplicated, ]
  }
  out
}


getCatalog <- function(dataset) {
  ShojiCatalog <- utils::getFromNamespace("ShojiCatalog", "crunch")
  ShojiCatalog(crGET(self(allVariables(dataset)), query = list(relative = "on")))
}


extToContentType <- function(ext) {
  mapping <- list(
    json = "application/json",
    xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
  )
  return(mapping[[ext]])
}
Crunch-io/crunchtabs documentation built on Jan. 31, 2023, 12:14 p.m.