R/format_tt.R

Defines functions format_quarto format_markdown format_math format_tt_lazy format_tt

Documented in format_tt

#' Format columns of a data frame
#'
#' This function formats the columns of a data frame based on the column type (logical, date, numeric). 
#' It allows various formatting options like significant digits, decimal points, and scientific notation.
#' It also includes custom formatting for date and boolean values.
#' If this function is applied several times to the same cell, the last transformation is retained and the previous calls are ignored, except for the `escape` argument which can be applied to previously transformed data.
#'
#' @param x A data frame or a vector to be formatted.
#' @param i Row indices where the formatting should be applied.
#' @param digits Number of significant digits or decimal places.
#' @param num_fmt The format for numeric values; one of 'significant', 'significant_cell', 'decimal', or 'scientific'.
#' @param num_zero Logical; if TRUE, trailing zeros are kept in "decimal" format (but not in "significant" format).
#' @param num_mark_big Character to use as a thousands separator.
#' @param num_mark_dec Decimal mark character. Default is the global option 'OutDec'.
#' @param num_suffix Logical; if TRUE display short numbers with `digits` significant digits and K (thousands), M (millions), B (billions), or T (trillions) suffixes.
#' @param date A string passed to the `format()` function, such as "%Y-%m-%d". See the "Details" section in `?strptime`
#' @param bool A function to format logical columns. Defaults to title case.
#' @param math Logical. If TRUE, wrap cell values in math mode `$..$`. This is useful for LaTeX output or with HTML MathJax `options(tinytable_html_mathjax=TRUE)`.
#' @param other A function to format columns of other types. Defaults to `as.character()`.
#' @param replace Logical, String or Named list of vectors
#' - TRUE: Replace `NA` by an empty string.
#' - FALSE: Print `NA` as the string "NA".
#' - String: Replace `NA` entries by the user-supplied string.
#' - Named list: Replace matching elements of the vectors in the list by theirs names. Example: 
#'      - `list("-" = c(NA, NaN), "Tiny" = -Inf, "Massive" = Inf)`
#' @param escape Logical or "latex" or "html". If TRUE, escape special characters to display them as text in the format of the output of a `tt()` table.
#' - If `i` and `j` are both `NULL`, escape all cells, column names, caption, notes, and spanning labels created by `group_tt()`.
#' @param markdown Logical; if TRUE, render markdown syntax in cells. Ex: `_italicized text_` is properly italicized in HTML and LaTeX.
#' @param fn Function for custom formatting. Accepts a vector and returns a character vector of the same length.
#' @param quarto Logical. Enable Quarto data processing and wrap cell content in a `data-qmd` span (HTML) or `\QuartoMarkdownBase64{}` macro (LaTeX). See warnings in the Global Options section below.
#' @param sprintf String passed to the `?sprintf` function to format numbers or interpolate strings with a user-defined pattern (similar to the `glue` package, but using Base R).
#' @param ... Additional arguments are ignored.
#' @inheritParams tt
#' @inheritParams style_tt
#' @template global_options
#'
#' @return A data frame with formatted columns.
#' @export
#' @examples
#' dat <- data.frame(
#'   a = rnorm(3, mean = 10000),
#'   b = rnorm(3, 10000))
#' tab <- tt(dat)
#' format_tt(tab,
#'  digits = 2,
#'  num_mark_dec = ",",
#'  num_mark_big = " ")
#'  
#' k <- tt(data.frame(x = c(0.000123456789, 12.4356789)))
#' format_tt(k, digits = 2, num_fmt = "significant_cell")
#'  
#' dat <- data.frame(
#'    a = c("Burger", "Halloumi", "Tofu", "Beans"),
#'    b = c(1.43202, 201.399, 0.146188, 0.0031),
#'    c = c(98938272783457, 7288839482, 29111727, 93945))
#' tt(dat) |>
#'  format_tt(j = "a", sprintf = "Food: %s") |>
#'  format_tt(j = 2, digits = 1, num_fmt = "decimal", num_zero = TRUE) |>
#'  format_tt(j = "c", digits = 2, num_suffix = TRUE)
#'  
#' y <- tt(data.frame(x = c(123456789.678, 12435.6789)))
#' format_tt(y, digits=3, num_mark_big=" ")
#'
#' x <- tt(data.frame(Text = c("_italicized text_", "__bold text__")))
#' format_tt(x, markdown=TRUE)
#'
#' tab <- data.frame(a = c(NA, 1, 2), b = c(3, NA, 5))
#' tt(tab) |> format_tt(replace = "-")
#'
#' dat <- data.frame(
#'    "LaTeX" = c("Dollars $", "Percent %", "Underscore _"),
#'    "HTML" = c("<br>", "<sup>4</sup>", "<emph>blah</emph>")
#' )
#' tt(dat) |> format_tt(escape = TRUE)   
#'
format_tt <- function(x,
                      i = NULL,
                      j = NULL,
                      digits = get_option("tinytable_format_digits", default = NULL),
                      num_fmt = get_option("tinytable_format_num_fmt", default = "significant"),
                      num_zero = get_option("tinytable_format_num_zero", default = FALSE),
                      num_suffix = get_option("tinytable_format_num_suffix", default = FALSE),
                      num_mark_big = get_option("tinytable_format_num_mark_big", default = ""),
                      num_mark_dec = get_option("tinytable_format_num_mark_dec", default = getOption("OutDec", default = ".")),
                      date = get_option("tinytable_format_date", default = "%Y-%m-%d"),
                      bool = get_option("tinytable_format_bool", default = function(column) tools::toTitleCase(tolower(column))),
                      math = get_option("tinytable_format_math", default = FALSE),
                      other = get_option("tinytable_format_other", default = as.character),
                      replace = get_option("tinytable_format_replace", default = TRUE),
                      escape = get_option("tinytable_format_escape", default = FALSE),
                      markdown = get_option("tinytable_format_markdown", default = FALSE),
                      quarto = get_option("tinytable_format_quarto", default = FALSE),
                      fn = get_option("tinytable_format_fn", default = NULL),
                      sprintf = get_option("tinytable_format_sprintf", default = NULL),
                      ...
                      ) {
    out <- x

    dots <- list(...)
    if ("replace_na" %in% names(dots)) {
        replace <- dots[["replace_na"]]
        warning("The `replace_na` argument was renamed `replace`.", call. = FALSE)
    }

    if (inherits(out, "tinytable")) {
        cal <- call("format_tt_lazy", 
            i = i,
            j = j,
            digits = digits,
            num_fmt = num_fmt,
            num_zero = num_zero,
            num_suffix = num_suffix,
            num_mark_big = num_mark_big,
            num_mark_dec = num_mark_dec,
            replace = replace,
            fn = fn,
            sprintf = sprintf,
            url = url,
            date = date,
            bool = bool,
            math = math,
            escape = escape,
            markdown = markdown,
            quarto = quarto,
            other = other)
        out@lazy_format <- c(out@lazy_format, list(cal))
    } else {

        out <- format_tt_lazy(out,
            i = i,
            j = j,
            digits = digits,
            num_fmt = num_fmt,
            num_zero = num_zero,
            num_suffix = num_suffix,
            num_mark_big = num_mark_big,
            num_mark_dec = num_mark_dec,
            replace = replace,
            fn = fn,
            sprintf = sprintf,
            url = url,
            date = date,
            bool = bool,
            math = math,
            other = other,
            escape = escape,
            quarto = quarto,
            markdown = markdown)
    }

    return(out)
}

format_tt_lazy <- function(x,
                           i = NULL,
                           j = NULL,
                           digits,
                           num_fmt = "significant",
                           num_zero = FALSE,
                           num_suffix = FALSE,
                           num_mark_big = "",
                           num_mark_dec = NULL,
                           replace = TRUE,
                           fn = NULL,
                           sprintf = NULL,
                           url = FALSE,
                           date = "%Y-%m-%d",
                           bool = identity,
                           math = FALSE,
                           escape = FALSE,
                           markdown = FALSE,
                           quarto = quarto,
                           other = as.character
                           ) {

    # format_tt() supports vectors
    if (isTRUE(check_atomic_vector(x))) {
        atomic_vector <- TRUE
        if (is.factor(x)) x <- as.character(x)
        ori <- out <- x <- data.frame(tinytable = x, stringsAsFactors = FALSE)
        j <- 1
    } else if (is.data.frame(x)) {
        atomic_vector <- FALSE
        ori <- out <- x
    } else if (inherits(x, "tinytable")){
        atomic_vector <- FALSE
        # if no other format_tt() call has been applied, we ctan have numeric values
        out <- x@table_dataframe
        ori <- x@data
    } else {
        stop("`x` must be a `tinytable` object, a data frame, or an atomic vector.", call. = FALSE)
    }

    assert_integerish(digits, len = 1, null.ok = TRUE)
    assert_integerish(i, null.ok = TRUE)
    assert_choice(num_fmt, c("significant", "significant_cell", "decimal", "scientific"))
    assert_flag(num_zero)
    assert_string(num_mark_big)
    assert_string(num_mark_dec)
    assert_string(date)
    assert_function(bool)
    assert_function(identity)
    assert_function(fn, null.ok = TRUE)
    assert_string(sprintf, null.ok = TRUE)
    assert_flag(markdown)
    assert_flag(quarto)
    replace <- sanitize_replace(replace)
    sanity_num_mark(digits, num_mark_big, num_mark_dec)

    i <- sanitize_i(i, x, pre_group_i = TRUE)
    j <- sanitize_j(j, x)
    ibody <- attr(i, "body")
    inull <- isTRUE(attr(i, "null"))
    jnull <- isTRUE(attr(j, "null"))

    # In sanity_tt(), we fill in missing NULL `j` in the format-specific versions,
    # because tabularray can do whole column styling. Here, we need to fill in
    # NULL for all formats since this is applied before creating the table.
    # nrow(out) because nrow(x) sometimes includes rows that will be added **in the lazy future** by group_tt()

    # format each column
    # Issue #230: drop=TRUE fixes bug which returned a character dput-like vector
    for (col in j) {
        # sprintf() is self-contained
        if (!is.null(sprintf)) {
            out[ibody, col] <- base::sprintf(sprintf, ori[ibody, col, drop = TRUE])

        } else {
            # logical
            if (is.logical(ori[ibody, col])) {
                out[ibody, col] <- bool(ori[ibody, col, drop = TRUE])

                # date
            } else if (inherits(ori[ibody, col], "Date")) {
                out[ibody, col] <- format(ori[ibody, col, drop = TRUE], date)

                # numeric
            } else if (is.numeric(ori[ibody, col, drop = TRUE])) {
                tmp <- format_numeric(ori[ibody, col], 
                    num_suffix = num_suffix, 
                    digits = digits, 
                    num_mark_big = num_mark_big, 
                    num_mark_dec = num_mark_dec, 
                    num_zero = num_zero, 
                    num_fmt = num_fmt)
                if (!is.null(tmp)) out[ibody, col] <- tmp

                # other
            } else {
                out[ibody, col] <- other(ori[ibody, col, drop = TRUE])
            }
        }

        for (k in seq_along(replace)) {
            idx <- ori[ibody, col, drop = TRUE] %in% replace[[k]]
            out[ibody, col][idx] <- names(replace)[[k]]
        }

    } # loop over columns

    # Custom functions overwrite all the other formatting, but is before markdown
    # before escaping
    if (is.function(fn)) {
        for (col in j) {
            out[ibody, col] <- fn(ori[ibody, col, drop = TRUE])
        }
    }

    if (isTRUE(math)) {
        for (row in ibody) {
            for (col in j) {
                out[row, col] <- format_math(out[row, col], math)
            }
        }
        if (inull && jnull) {
            x@caption <- format_math(x@caption, math)
            colnames(x) <- format_math(colnames(x), math)
            for (idx in seq_along(x@notes)) {
                n <- x@notes[[idx]]
                if (is.character(n) && length(n) == 1) {
                    x@notes[[idx]] <- format_math(n, math = math)
                } else if (is.list(n) && "text" %in% names(n)) {
                    n$text <- format_math(n$text, math = math)
                    x@notes[[idx]] <- n
                }
            }
            for (idx in seq_along(x@lazy_group)) {
                g <- x@lazy_group[[idx]]
                if (!is.null(g$j)) {
                    names(g$j) <- format_math(names(g$j), math = math)
                }
                if (!is.null(g$i)) {
                    names(g$i) <- format_math(names(g$i), math = math)
                }
                x@lazy_group[[idx]] <- g
            }
        }
    }


    # escape latex characters
    if (!isFALSE(escape)) {
        if (isTRUE(escape == "latex")) {
            o <- "latex"
        } else if (isTRUE(escape == "html")) {
            o <- "html"
        } else if (isTRUE(escape == "typst")) {
            o <- "typst"
        } else if (inherits(x, "tinytable")) {
            o <- x@output
        } else {
            o <- FALSE
        }

        # body
        for (row in ibody) {
            for (col in j) {
                out[row, col] <- escape_text(out[row, col], output = o)
            }
        }

        # column names
        if (0 %in% i) {
            colnames(x) <- escape_text(colnames(x), output = o)
        }

        # caption & groups: if i and j are both null
        if (inull && jnull) {
            if (inherits(x, "tinytable")) {
                x@caption <- escape_text(x@caption, output = o)

                for (idx in seq_along(x@notes)) {
                    n <- x@notes[[idx]]
                    if (is.character(n) && length(n) == 1) {
                        x@notes[[idx]] <- escape_text(n, output = o)
                    } else if (is.list(n) && "text" %in% names(n)) {
                        n$text <- escape_text(n$text, output = o)
                        x@notes[[idx]] <- n
                    }
                }

                for (idx in seq_along(x@lazy_group)) {
                    g <- x@lazy_group[[idx]]
                    if (!is.null(g$j)) {
                        names(g$j) <- escape_text(names(g$j), output = o)
                    }
                    if (!is.null(g$i)) {
                        names(g$i) <- escape_text(names(g$i), output = o)
                    }
                    x@lazy_group[[idx]] <- g
                }
            }
        }
    }

    # markdown and quarto at the very end
    for (col in j) {
        if (isTRUE(markdown)) {
            assert_dependency("markdown")
            out <- format_markdown(out = out, i = i, col = col, x = x)
        }

        if (isTRUE(quarto)) {
            tmp <- format_quarto(out = out, i = i, col = col, x = x)
            out <- tmp$out
            x <- tmp$x
        }
    }

    if (inull && jnull && isTRUE(markdown)) {
        colnames(x) <- format_markdown(colnames(x), x = x)
        if (inherits(x, "tinytable")) {
            for (k in seq_along(x@lazy_group)) {
                g <- x@lazy_group[[k]]
                if (!is.null(g$j)) {
                    names(g$j) <- format_markdown(names(g$j), x = x)
                }
                if (!is.null(g$i)) {
                    names(g$i) <- format_markdown(names(g$i), x = x)
                }
                x@lazy_group[[k]] <- g
            }
        }
    }

    # output
    if (isTRUE(atomic_vector)) {
        return(out[[1]])
    } else if (!inherits(x, "tinytable")) {
        return(out)
    } else {
        x@table_dataframe <- out
        return(x)
    }
}


format_math <- function(out, math) {
    if (isTRUE(math)) {
        out <- sprintf("$%s$", out)
    }
    return(out)
}

format_markdown <- function(out, i = NULL, col = NULL, x) {
    tmpfun_html <- function(k) {
        k <- trimws(markdown::mark_html(text = k, template = FALSE))
        k <- sub("<p>", "", k, fixed = TRUE)
        k <- sub("</p>", "", k, fixed = TRUE)
        return(k)
    }

    tmpfun_latex <- function(k) {
        k <- trimws(markdown::mark_latex(text = k, template = FALSE))
        return(k)
    }

    if (inherits(out, "data.frame")) {
        ipos <- i[i > 0]
        if (length(ipos) > 0) {
            if (inherits(x, "tinytable_bootstrap")) {
                out[ipos, col] <- sapply(out[ipos, col], function(k) tmpfun_html(k))
            } else if (inherits(x, "tinytable_tabularray")) {
                out[ipos, col] <- sapply(out[ipos, col], function(k) tmpfun_latex(k))
            }
        }
    } else {
        if (inherits(x, "tinytable_bootstrap")) {
            out <- sapply(out, function(k) tmpfun_html(k))
        } else if (inherits(x, "tinytable_tabularray")) {
            out <- sapply(out, function(k) tmpfun_latex(k))
        } 
    }

    return(out)
}


format_quarto <- function(out, i, col, x) {
    if (isTRUE(x@output == "html")) {
        fun <- function(z) {
            z@table_string <- sub("data-quarto-disable-processing='true'",
                "data-quarto-disable-processing='false'",
                z@table_string,
                fixed = TRUE)
            return(z)
        }
        x <- style_tt(x, finalize = fun)
        out[i, col] <- sprintf('<span data-qmd="%s"></span>', out[i, col, drop = TRUE])
    } else if (isTRUE(x@output == "latex")) {
        assert_dependency("base64enc")
        tmp <- sapply(out[i, col, drop = TRUE], function(z) base64enc::base64encode(charToRaw(z)))
        out[i, col] <- sprintf("\\QuartoMarkdownBase64{%s}", tmp)
    }

    return(list("out" = out, "x" = x))
}

Try the tinytable package in your browser

Any scripts or data that you put into this service are public.

tinytable documentation built on Oct. 5, 2024, 5:06 p.m.