R/tab-book.R
In crunch: Crunch.io Data Tools

Documented in tabBook

#' Compute a Tab Book
#'
#' This function allows you to generate a tab book from a multitable and data.
#' As with other functions, you can select the rows and columns you want to
#' work with by subsetting the `dataset` you pass into the function.
#'
#' By specifying a "json" `format`, instead of generating an Excel
#' workbook, you'll get a TabBookResult object, containing nested CrunchCube
#' results. You can then further format these and construct custom tab reports.
#' @param multitable a `Multitable` object
#' @param dataset CrunchDataset, which may be subset with a filter expression
#' on the rows, and a selection of variables to use on the columns.
#' @param weight a CrunchVariable that has been designated as a potential
#' weight variable for `dataset`, or `NULL` for unweighted results.
#' Default is the currently applied [`weight`].
#' @param output_format character export format: currently supported values are "json"
#' (default), "xlsx" and "csv".
#' @param file character local filename to write to. A default filename will be
#' generated from the `multitable`'s name if one is not supplied and the
#' "xlsx" format is requested. Not required for "json" format export.
#' @param filter a Crunch `filter` object or a vector of names
#' of \code{\link{filters}} defined in the dataset.
#' @param use_legacy_endpoint Logical, indicating whether to use a 'legacy'
#' endpoint for compatibility (this endpoint will be removed in the future).
#' Defaults to `FALSE`, but can be set in the function, or with the environment
#' variable `R_USE_LEGACY_TABBOOK_ENDPOINT` or R option
#' `use.legacy.tabbook.endpoint`.
#' @param ... Additional "options" passed to the tab book POST request.
#' More details can be found
#' [in the crunch API documentation](
#' https://crunch.io/api/reference/#post-/datasets/-dataset_id-/multitables/)
#' @return If "json" format is requested, the function returns an object of
#' class `TabBookResult`, containing a list of `MultitableResult`
#' objects, which themselves contain `CrunchCube`s. If "xlsx" or "csv", is requested,
#' the function invisibly returns the filename (`file`, if specified, or the
#' the autogenerated file name). If you request "json" and wish to access the
#' JSON data underlying the `TabBookResult`, pass in a path for `file`
#' and you will get a JSON file written there as well.
#' @examples
#' \dontrun{
#' # Excel export
#' m <- newMultitable(~ gender + age4 + marstat, data = ds)
#' tabBook(m, ds, format = "xlsx", file = "wealthy-tab-book.xlsx", filter = "wealthy")
#'
#' # csv export
#' tabBook(
#'     m,
#'     ds[c("q5a", "q8", "q2a_1", "q2a_2")],
#'     output_format = "csv",
#'     file = "tabbook.csv",
#'     format = list(
#'         pval_colors = FALSE,
#'         decimal_places = list(percentages = 0L, other = 2L),
#'         show_empty = FALSE
#'     ),
#'     sig_threshold = 0.05,
#'     doc_layout = list(toc = FALSE, variable_sheets = "one_sheet"),
#'     fields = c(
#'         "col_percent", "row_percent", "count_unweighted", "mean",
#'         "valid_count_weighted", "valid_count_unweighted"
#'     ),
#'     page_layout = list(
#'         rows = list(
#'             top = c("base_weighted", "base_unweighted"),
#'             bottom = c("scale_mean", "scale_median")
#'         ),
#'         measure_layout = "long"
#'     )
#' )
#'
#' # JSON export (loads into R)
#' book <- tabBook(m, ds)
#' tables <- prop.table(book, 2)
#'
#' }
#' @importFrom jsonlite fromJSON
#' @export
tabBook <- function(multitable, dataset, weight = crunch::weight(dataset),
                    output_format = c("json", "xlsx", "csv"), file, filter = NULL,
                    use_legacy_endpoint = envOrOption(
                        "use.legacy.tabbook.endpoint", FALSE, expect_lgl = TRUE
                    ),
                    ...) {
    dots <- list(...)
    if ("format" %in% names(dots) && is.character(dots$format)) {
        warning(
            "Passing string to `format` is deprecated in `tabBook()`. Use `output_format` instead."
        )
        fmt <- match.arg(dots$format, c("json", "xlsx", "csv"))
        dots$format <- NULL
    } else {
        fmt <- match.arg(output_format)
    }

    accept <- extToContentType(fmt)
    if (missing(file)) {
        if (fmt == "json") {
            ## We don't need a file.
            file <- NULL
        } else {
            ## Generate a reasonable filename in the current working dir
            file <- paste(name(multitable), fmt, sep = ".")
        }
    }

    if (!is.null(weight)) {
        weight <- self(weight)
    }

    filter <- standardize_tabbook_filter(dataset, filter)

    body <- list(
        filter = filter,
        weight = weight,
        options = dots
    )
    ## Add this after so that if it is NULL, the "where" key isn't present
    body$where <- variablesFilter(dataset)

    if (use_legacy_endpoint) {
        warning(
            "The legacy tabbook endpoint has been deprecated and will be removed in the future."
        )
        tabbook_url <- shojiURL(multitable, "views", "tabbook")
    } else {
        tabbook_url <- shojiURL(multitable, "views", "export")
    }

    ## POST the query, which (after progress polling) returns a URL to download
    result <- crPOST(tabbook_url,
        config = add_headers(`Accept` = accept),
        body = toJSON(body)
    )
    if (is.null(file)) {
        ## Read in the tab book content and turn it into useful objects
        out <- retry(crGET(result), wait = 0.5) #nocov
        return(TabBookResult(out))
    } else {
        file <- crDownload(result, file)
        ## (invisibly) return the filename
        invisible(file)
    }
}

extToContentType <- function(ext) {
    mapping <- list(
        csv = "text/csv",
        json = "application/json",
        xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
    )
    return(mapping[[ext]])
}

# Possibly went a little overboard allowing different filter options in tabbook
# extract out the logic here
standardize_filter_list <- function(filter, dataset = NULL) {
    if (is.null(filter)) {
        return(NULL)
    } else if (all(is.character(filter))) {
        if (is.null(dataset)) {
            halt("Dataset unavailable, refer to filter names using `filters(ds)` instead.")
        }
        filter_name <- filter
        available <- filter_name %in% names(filters(dataset))
        if (any(!available)) {
            halt("Could not find filter named: ", paste(filter_name[!available], collapse = ", "))
        }
        return(standardize_filter_list(filters(dataset)[filter], dataset))
    } else if (inherits(filter, "FilterCatalog")) {
        return(lapply(urls(filter), function(x) {
            list(filter = x)
        }))
    } else if (inherits(filter, "CrunchFilter")) {
        return(list(list(filter = self(filter))))
    } else if (is.Expr(filter)) {
        return(list(zcl(filter)))
    } else if (is.list(filter)) {
        filter <- lapply(filter, standardize_filter_list, dataset = dataset)
        return(unlist(filter, recursive = FALSE))
    }
    halt("Unknown filter type") #nocov
}
standardize_tabbook_filter <- function(dataset, filter) {
    filter <- standardize_filter_list(filter, dataset)

    expr_filter <- activeFilter(dataset)
    if (is.CrunchExpr(expr_filter)) {
        expr_filter <- list(c(zcl(expr_filter), name = formatExpression(expr_filter)))
    }

    if(length(filter) > 0 && !is.null(expr_filter)) {
        filter <- unname(c(filter, expr_filter))
    } else if (!is.null(expr_filter)) {
        filter <- expr_filter
    }
    filter
}

#' TabBookResult and MultitableResult dimension
#'
#' @param x a TabBookResult or MultitableResult
#' @return Returns what you'd expect.
#' @name tabbook-dim
NULL

setMethod("initialize", "TabBookResult", function(.Object, ...) {
    .Object <- callNextMethod(.Object, ...)
    .Object$sheets <- lapply(.Object$sheets, MultitableResult)
    return(.Object)
})

#' @rdname crunch-extract
#' @export
setMethod("[[", c("TabBookResult", "numeric"), function(x, i, ...) {
    x$sheets[[i]]
})
#' @rdname crunch-extract
#' @export
setMethod("[[", c("TabBookResult", "character"), function(x, i, ...) {
    x$sheets[[match(i, names(x))]]
})

#' @rdname tabbook-dim
#' @export
setMethod("dim", "TabBookResult", function(x) {
    nrows <- length(x)
    ncols <- ifelse(nrows, length(x[[1]]), 0L)
    return(c(nrows, ncols))
})
#' @rdname describe-catalog
#' @export
setMethod("names", "TabBookResult", function(x) {
    unlist(lapply(x$meta$analyses, function(sheet) sheet$name))
})
#' @rdname describe-catalog
#' @export
setMethod("aliases", "TabBookResult", function(x) {
    unlist(lapply(x, function(mt) aliases(mt[[1]])[1]), use.names = FALSE)
})
#' @rdname describe-catalog
#' @export
setMethod("descriptions", "TabBookResult", function(x) {
    unlist(lapply(x, function(mt) descriptions(mt[[1]])[1]), use.names = FALSE)
})

setMethod("lapply", "TabBookResult", function(X, FUN, ...) {
    lapply(X$sheets, FUN, ...)
})

setMethod("initialize", "MultitableResult", function(.Object, ...) {
    .Object <- callNextMethod(.Object, ...)
    ## The first cube in the results list is the "total" column. It's a 1-D
    ## cube, add a second "dimension" so that it appears to be 2-D, n x 1
    .Object$result[[1]]$result$dimensions <- c(
        .Object$result[[1]]$result$dimensions,
        list(list(
            type = list(
                class = "enum",
                elements = list(
                    list(id = 0, value = "", missing = FALSE)
                )
            ),
            references = list(
                alias = "total",
                name = "Total"
            )
        ))
    )
    .Object$result <- lapply(.Object$result, function(cube) {
        cube <- CrunchCube(cube)
        cube <- rearrange3DTabbookDims(cube)
        cube <- rearrangeNumArrTabbookDims(cube)
        cube
    })

    return(.Object)
})

# Tabbook cubes can only have 3 dimensions if they have a cat/numeric array
# somewhere. At the time of writing, there is no validation on the template,
# so it is possible it could be in either the template or the row variables.
# This code preserves the legacy behavior where we have special logic to handle
# the situation where categorical arrays
# (see https://www.pivotaltracker.com/n/projects/2172644/stories/176401734)
# NB: Tabbooks have 2 variables per cube, so there cannot be a 3D cube with
# more than 1 selected dimension (2 MRs would be a 2D cube).
# The goal is to have the dimensions be rearranged so that it's c(2, 3, 1)
# but need to keep the MR together
rearrange3DTabbookDims <- function(cube) {
    if (length(dim(cube)) != 3L) return(cube)
    ## TODO: refactor with CrunchCubep-native methods (eg, dimensions<-, aperm)

    ## check if there is an MR, in which case there are actually 4 dims
    ## underlyingly, not 3 dims
    selecteds <- is.selectedDimension(cube@dims)
    if (!any(selecteds)) {
        ## If cubes are categorical array x multitable (non-array), aperm the
        ## cubes so that column is multitable var (3 -> 2), row is
        ## category of array (2 -> 1), subvar is "tab" (1 -> 3)
        dim_order <- c(2, 3, 1)
    } else if (any(which(selecteds) %in% c(3, 4))) {
        ## the selected dimension is in the second half of the cube, so
        ## the MR is in the multitable
        ## The "3rd dimension" is actually c(3, 4), so dim_order is:
        ## c(2, (3, 4), 1)
        dim_order <- c(2, 3, 4, 1)
    } else if (any(which(selecteds) %in% c(1, 2))) {
        ## the selected dimension is in the first half of the cube, so
        ## the array is in the multitable. (This shouldn't really be allowed)
        ## The "first" dimension is actually c(1, 2), so it would be:
        ## c(3, 4, (1, 2))
        ## However, to match legacy behavior, we do 4, 3, 1, 2,
        ## Greg isn't really sure why we do this, but since it only
        ## comes up when there's a cat array in the template,
        ## I just leave it as is.
        dim_order <- c(4, 3, 1, 2)
    }

    cube@dims <- CubeDims(cube@dims[dim_order])
    cube@arrays <- lapply(cube@arrays, function(x) {
        out <- aperm(x, perm = dim_order)
        attr(out, "measure_type") <- attr(x, "measure_type")
        out
    })
    return(cube)
}

# Numeric arrays have a "measure axis" rather than a dimension,
# and it gets put in the wrong order for display purposes.
# When we find a numeric array as the last dimension, reorder
# it so that numeric array is second-to-last
rearrangeNumArrTabbookDims <- function(cube) {
    dim_types <- getDimTypes(cube)
    if (dim_types[length(dim_types)] != "numarray_items") return(cube)

    ndims <- length(dim_types)
    if (ndims == 2) {
        dim_order <- c(2, 1)
    } else if (ndims == 3) { # MR (or CatArray) by numeric array
        dim_order <- c(3, 1, 2)
    } else {
        # Shouldn't ever happen, but don't want to error here
        return(cube)
    }
    cube@dims <- CubeDims(cube@dims[dim_order])
    cube@arrays <- lapply(cube@arrays, function(x) {
        out <- aperm(x, perm = dim_order)
        attr(out, "measure_type") <- attr(x, "measure_type")
        out
    })
    cube
}

#' @rdname crunch-extract
#' @export
setMethod("[[", "MultitableResult", function(x, i, ...) {
    x$result[[i]]
})
setMethod("lapply", "MultitableResult", function(X, FUN, ...) {
    lapply(X$result, FUN, ...)
})
#' @rdname describe-catalog
#' @export
setMethod("names", "MultitableResult", function(x) {
    unlist(lapply(x, function(cube) names(cube)[2]), use.names = FALSE)
})
#' @rdname describe-catalog
#' @export
setMethod("aliases", "MultitableResult", function(x) {
    unlist(lapply(x, function(cube) aliases(cube)[2]), use.names = FALSE)
})
#' @rdname describe-catalog
#' @export
setMethod("descriptions", "MultitableResult", function(x) {
    unlist(lapply(x, function(cube) descriptions(cube)[2]), use.names = FALSE)
})

#' @export
as.array.TabBookResult <- function(x, ...) lapply(x, as.array)

#' @export
as.array.MultitableResult <- function(x, ...) lapply(x, as.array)

#' @rdname cube-computing
#' @export
setMethod("prop.table", "MultitableResult", function(x, margin = NULL) {
    lapply(x, prop.table, margin = margin)
})

#' @rdname cube-computing
#' @export
setMethod("prop.table", "TabBookResult", function(x, margin = NULL) {
    lapply(x, function(x) {
        tryCatch(prop.table(x, margin = margin), error = function(e) NULL)
    })
})

#' @rdname cube-computing
#' @export
setMethod("bases", "TabBookResult", function(x, margin = NULL) {
    lapply(x, function(x) {
        tryCatch(bases(x, margin = margin), error = function(e) NULL)
    })
})

#' @rdname cube-computing
#' @export
setMethod("bases", "MultitableResult", function(x, margin = NULL) {
    lapply(x, bases, margin = margin)
})