rawr: rawr miscellaneous

Documented in abbr binconr c2h case catlist color_pval combine_table combine_table2 countr describeDate describeDateBy describeFuns dmy h2c html_align inject_div inl_anova inl_ca inl_chisq inl_cuzick inl_fisher inl_jt inl_kruskal inl_kw inl_logrank inl_t inl_wilcox intr intr2 iprint match_ctc n2w N2W num2char pvalr pvalr2 roundr roundr.data.frame roundr.default roundr.matrix show_html show_markdown show_math tabler tabler_by tabler_by2 tabler.coxph tabler.default tabler.glm tabler.lm tabler_resp tabler_stat tabler_stat2 tabler.survfit tox_worst writeftable write_htmlTable

### formatting, knitr, html-related, misc utils
# show_html, show_markdown, show_math, roundr, roundr.default, roundr.matrix,
# roundr.data.frame, intr, pvalr, pvalr2, color_pval, catlist, binconr,
# num2char, n2w, N2W, iprint, writeftable, tabler, tabler.default,
# tabler.lm, tabler.glm, tabler.survfit, tabler_by, tabler_by2, tabler_stat,
# describeDate, describeDateBy, tabler_stat2, tabler_resp, match_ctc, tox_worst,
# countr, dmy, combine_table, combine_table2, inject_div, case, write_htmlTable,
# html_align, abbr, html2char, h2c, c2h
#
# S3 methods:
# roundr, tabler
#
## in-line stats
# anova, chisq, cuzick, fisher, jt, kruskal, ca, kw, t, wilcox, logrank
#
# unexported:
# getPvalCAtest, getPvalCuzick, getPvalJTtest, getPvalKruskal, getPvalKWtest,
# getPvalLogrank, getPvalTtest, getPval_, guess_test, describeConfint,
# describeFactors, describeSurv, tabler_stat_list, tabler_stat_html,
# guess_digits, get_tabler_stat_n, resp1, r_or_better1
###


#' In-line stats
#'
#' Convenience functions to summarize statistical tests.
#'
#' @param x numeric or factor-like variable, table, or a list depending on
#'   the statistic
#' @param y (optional) group or stratification variable
#' @param ... additional arguments passed to the stat function
#' @param paired logical; if \code{TRUE}, performs a paired test
#' @param details logical; if \code{FALSE}, only the p-value is printed; if
#'   \code{TRUE}, additional details depending on the test (e.g., the test
#'   statistic, degrees of freedom depending, etc.) are printed
#' @param digits number of digits past the decimal point to keep
#' @param object for \code{inl_logrank}, a \code{\link[survival]{survdiff}}
#'   object or formula to be passed to \code{survdiff}
#'
#' @examples
#' x <- mtcars$vs
#' y <- mtcars$am
#' 
#' ## t-test (two-sample)
#' t.test(x ~ y)
#' inl_t(split(x, y))
#' inl_t(x, y)
#' 
#' ## t-test (paired)
#' t.test(x, y, paired = TRUE)
#' inl_t(x, y, paired = TRUE)
#' inl_t(list(x, y), paired = TRUE)
#' inl_t(c(x, y), factor(rep(1:2, each = length(x))), paired = TRUE)
#' 
#' 
#' ## fisher's exact
#' fisher.test(x, y)
#' inl_fisher(x, y)
#' inl_fisher(table(x, y))
#' 
#' 
#' ## chi-squared test for count data
#' chisq.test(table(x, y))
#' inl_chisq(x, y)
#' inl_chisq(table(x, y))
#' 
#' 
#' ## wilcoxon rank-sum test
#' wilcox.test(mtcars$mpg ~ y)
#' inl_wilcox(mtcars$mpg, y)
#' inl_wilcox(split(mtcars$mpg, y))
#' 
#' ## wilcoxon signed-rank test
#' wilcox.test(x, y, paired = TRUE)
#' inl_wilcox(x, y, paired = TRUE)
#' inl_wilcox(list(x, y), paired = TRUE)
#' inl_wilcox(c(x, y), factor(rep(1:2, each = length(x))), paired = TRUE)
#' 
#' 
#' ## cuzick's trend test
#' cuzick.test(mpg ~ gear, mtcars)
#' inl_cuzick(mtcars$mpg, mtcars$gear)
#' inl_cuzick(split(mtcars$mpg, mtcars$gear))
#' 
#' 
#' ## jonckheere-terpstra test
#' jt.test(table(mtcars$gear, mtcars$cyl))
#' inl_jt(mtcars$gear, mtcars$cyl)
#' inl_jt(table(mtcars$gear, mtcars$cyl))
#' 
#' 
#' ## cochran-armitage test for trend
#' ca.test(table(mtcars$vs, mtcars$cyl))
#' inl_ca(mtcars$vs, mtcars$cyl)
#' inl_ca(table(mtcars$vs, mtcars$cyl))
#' 
#' 
#' ## test for trend in proportions
#' kw.test(table(mtcars$gear, mtcars$cyl))
#' inl_kw(mtcars$gear, mtcars$cyl)
#' inl_kw(table(mtcars$gear, mtcars$cyl))
#' 
#' 
#' ## log-rank test
#' library('survival')
#' s1 <- survdiff(Surv(time, status) ~ sex, colon)
#' s2 <- survdiff(Surv(time, status) ~ sex + strata(age), colon)
#' inl_logrank(s1)
#' inl_logrank(s2)
#' inl_logrank(Surv(time, status) ~ sex, data = colon)
#'
#' @name inline_stats
NULL

#' @rdname inline_stats
#' @export
inl_anova <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  res <- anova(lm(y ~ x, ...))

  if (!details)
    pvalr(res[['Pr(>F)']][[1L]], show.p = TRUE)
  else {
    sprintf(
      'F: %s (%s df), one-way ANOVA p-value: %s',
      roundr(res[['F value']][[1L]], digits), res$Df[[1L]],
      pvalr(res[['Pr(>F)']][[1L]])
    )
  }
}

#' @rdname inline_stats
#' @export
inl_ca <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  tbl <- if (!is.null(dim(x)))
    x else table(x, y)
  
  suppressWarnings({
    res <- ca.test(tbl, ...)
  })
  
  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      '&chi;<sup>2</sup>: %s (%s df), Cochran-Armitage p-value: %s',
      roundr(res$statistic, digits), res$parameter, pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_chisq <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  tbl <- if (!is.null(dim(x)))
    x else table(x, y)

  suppressWarnings({
    res <- chisq.test(tbl, ...)
  })

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      '&chi;<sup>2</sup>: %s (%s df), chi-squared p-value: %s',
      roundr(res$statistic, digits), res$parameter, pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_cuzick <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  if (inherits(x, 'list')) {
    x <- rbindlist(x)
    y <- x[, 1L]
    x <- x[, 2L]
  }
  res <- cuzick.test(x ~ as.factor(y), ...)

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      'z: %s (%s ordered groups), Cuzick trend p-value: %s',
      roundr(res$statistic, digits), length(res$estimate), pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_fisher <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  tbl <- if (!is.null(dim(x)))
    x else table(x, y)
  res <- fisher.test(tbl, ...)

  if (!details || any(dim(tbl) > 2L))
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      'OR: %s (%s%% CI: %s - %s), Fisher\'s exact p-value: %s',
      roundr(res$estimate, digits), attr(res$conf.int, 'conf.level') * 100,
      roundr(res$conf.int[1L], digits), roundr(res$conf.int[2L], digits),
      pvalr(res$p.value, show.p = FALSE)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_jt <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  res <- jt.test(x, y, ...)

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      'z: %s, Jonckheere-Terpstra p-value: %s',
      roundr(res$statistic, digits), pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_kruskal <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  if (inherits(x, 'list')) {
    x <- rbindlist(x)
    y <- x[, 1L]
    x <- x[, 2L]
  }
  res <- kruskal.test(x ~ as.factor(y), ...)

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      '&chi;<sup>2</sup>: %s (%s df), Kruskal-Wallis rank-sum p-value: %s',
      roundr(res$statistic, digits), res$parameter, pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_kw <- function(x, y = NULL, ..., details = TRUE, digits = 2L) {
  tbl <- if (!is.null(dim(x)))
    x else table(x, y)

  suppressWarnings({
    res <- kw.test(tbl, ...)
  })

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      '&chi;<sup>2</sup>: %s (%s df), test for trend in proportions p-value: %s',
      roundr(res$statistic, digits), res$parameter, pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_logrank <- function(object, ..., details = TRUE, digits = 2L) {
  res <- lr_pval(object, ..., details = TRUE)
  str <- !is.null(object$strata)

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      '%slog-rank test p-value: %s',
      ifelse(str, 'stratified ', ''), pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_t <- function(x, y = NULL, ..., paired = FALSE,
                  details = TRUE, digits = 2L) {
  if (inherits(x, 'list')) {
    if (length(x) != 2L)
      stop('grouping factor must have exactly 2 levels')
    
    if (paired) {
      y <- x[[2L]]
      x <- x[[1L]]
    } else {
      x <- rbindlist(x)
      y <- x[, 1L]
      x <- x[, 2L]
    }
  }

  res <- if (paired) {
    if (is.factor(y)) {
      y <- split(x, y)
      x <- y[[1L]]
      y <- y[[2L]]
    }
    t.test(x, y, paired = TRUE)
  } else t.test(x ~ as.factor(y), ...)

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      't: %s, %s p-value: %s',
      roundr(res$statistic, digits), res$method, pvalr(res$p.value)
    )
  }
}

#' @rdname inline_stats
#' @export
inl_wilcox <- function(x, y = NULL, ..., paired = FALSE,
                       details = TRUE, digits = 2L) {
  if (inherits(x, 'list')) {
    if (paired) {
      y <- x[[2L]]
      x <- x[[1L]]
    } else {
      x <- rbindlist(x)
      y <- x[, 1L]
      x <- x[, 2L]
    }
  }

  res <- suppressWarnings({
    if (paired) {
      if (is.factor(y)) {
        y <- split(x, y)
        x <- y[[1L]]
        y <- y[[2L]]
      }
      wilcox.test(x, y, paired = TRUE, ...)
    } else wilcox.test(x ~ as.factor(y), ...)
  })

  if (!details)
    pvalr(res$p.value, show.p = TRUE)
  else {
    sprintf(
      'w: %s, Wilcoxon %s p-value: %s',
      roundr(res$statistic, digits),
      if (paired) 'signed-rank' else 'rank-sum', pvalr(res$p.value)
    )
  }
}

#' Show HTML
#'
#' Render html in rstudio viewer or browser.
#'
#' @param ... one or more character strings
#' @param use_viewer logical; if \code{TRUE}, attempts to use
#'   \code{rstudioapi::viewer} or opens in default browser on error
#'
#' @return
#' The html code (invisibly) as a character string.
#'
#' @seealso
#' \code{\link{show_math}}, \code{\link{show_markdown}}
#'
#' @examples
#' \dontrun{
#' show_html("
#' <div align = center><h1>A heading<sup>&dagger;</sup><h1></div>
#' <font size = 1><sup>&dagger;</sup>That was the heading</font>
#' ")
#'
#' library('htmlTable')
#' show_html(htmlTable(mtcars, output = FALSE), use_viewer = FALSE)
#' }
#'
#' @export

show_html <- function(..., use_viewer = !is.null(getOption('viewer'))) {
  x <- c(...)
  if (is.null(x))
    return(invisible(NULL))

  htmlFile <- tempfile(fileext = '.html')
  writeLines(x, con = htmlFile)

  if (use_viewer)
    tryCatch(
      rstudioapi::viewer(htmlFile),
      error = function(e) {
        message('Viewer not available - opening in browser.\n',
                'In RStudio, try installing the \'rstudio\' package.',
                domain = NA)
        browseURL(htmlFile)
      })
  else browseURL(htmlFile)

  invisible(x)
}

#' Show markdown
#'
#' Render markdown to html in rstudio viewer or default browser.
#'
#' @param ... one or more character strings
#' @param use_viewer logical; if \code{TRUE}, attempts to use
#'   \code{rstudioapi::viewer} or opens in default browser on error
#' @param markArgs a list of addition arguments passed to
#'   \code{\link[markdown]{markdownToHTML}}
#'
#' @return
#' The html code (invisibly) as a character string.
#'
#' @seealso
#' \code{\link{show_html}}, \code{\link{show_math}}
#'
#' @examples
#' \dontrun{
#' library('markdown')
#'
#' ## separate strings of markdown
#' show_markdown('## this is a header','here is some *plain* text  ',
#'               '<font color=red>ahhh</font>')
#'
#'
#' ## or as a single character string
#' mkd <- "
#' 1. Here is some markdown I want to test
#' 2. This is a list
#'   + a sub-point
#'   + another sub-point
#'
#' ```
#' This text is displayed verbatim / preformatted
#' ```
#'
#' and this text will be formatted using r syntax
#'
#' ```r
#' mean(1:5)
#' ```
#'
#' And now I will make a table:
#'
#' First Header  | Second Header
#' ------------- | -------------
#' Content Cell  | Content Cell
#' Content Cell  | Content Cell
#'
#' ## ta-daaa!
#'
#' > here's a picture with a [hyperlink](www.google.com):
#'
#' <div align=center>
#'     <a href='http://xkcd.com/710/'>
#'         <img src='http://imgs.xkcd.com/comics/collatz_conjecture.png'
#'              style='display: block; margin: auto;' />
#'     </a>
#' </div>
#' "
#'
#' show_markdown(mkd)
#'
#'
#' ## the default stylesheet is
#' getOption('markdown.HTML.stylesheet')
#'
#' ## apply another style sheet by setting the option or passing
#' ## arguments directly to markdown::markdownToHTML
#'
#' ## here are four css files in this package
#' list.files(system.file(package = 'rawr', 'styles'))
#' kcss <- system.file(package = 'rawr', 'styles', 'knitr.css')
#' gcss <- system.file(package = 'rawr', 'styles', 'github.css')
#' ## file.show(gcss)
#'
#'
#' show_markdown(mkd, markArgs = list(stylesheet = gcss))
#' show_markdown(mkd, markArgs = list(stylesheet = kcss),
#'               use_viewer = FALSE)
#' }
#'
#' @export

show_markdown <- function(..., use_viewer = !is.null(getOption('viewer')),
                          markArgs = list()) {
  txt <- list(text = c(...))
  mdk <- do.call(markdown::markdownToHTML, c(txt, markArgs))
  show_html(mdk, use_viewer = use_viewer)
}

#' Show math equations
#'
#' Displays math equations in \code{rstudioapi::viewer} or browser using the
#' \href{http://www.mathjax.org}{MathJax} javascript engine.
#'
#' @param ... one or more character strings
#' @param css optional css formatting
#' @param use_viewer logical; if \code{TRUE}, attempts to use
#'   \code{rstudioapi::viewer} or opens in default browser on error
#'
#' @return
#' The html code (invisibly) as a character string.
#'
#' @seealso
#' \code{\link{show_html}}, \code{\link{show_markdown}},
#' \href{http://detexify.kirelabs.org/classify.html}{draw math symbols},
#' \url{https://stackoverflow.com/q/31193843/2994949}
#'
#' @examples
#' \dontrun{
#' form1 <- '$$A=\\frac{B}{C}$$'
#'
#' form2 <- '$$
#'   \\frac{1}{\\displaystyle 1+
#'   \\frac{1}{\\displaystyle 2+
#'   \\frac{1}{\\displaystyle 3+x}}} +
#'   \\frac{1}{1+\\frac{1}{2+\\frac{1}{3+x}}}
#' $$'
#'
#' form3 <- '\\frac{d}{dx}\\left( \\int_{0}^{x} f(u)\\,du\\right)=f(x)'
#'
#' form4 <- "
#'   \\forall a,b,c \\in \\mathbb{R} \\\\
#'   \\begin{align}
#'                         a + b &= c \\\\
#'                (a + b)(a - b) &= c(a - b) \\\\
#'                     a^2 - b^2 &= ca - cb \\\\
#'                      a^2 - ca &= b^2 - cb \\\\
#'     a^2 - ca + \\frac{c^2}{4} &= b^2 - cb + \\frac{c^2}{4} \\\\
#'          (a - \\frac{c}{2})^2 &= (b - \\frac{c}{2})^2 \\\\
#'              a - \\frac{c}{2} &= b - \\frac{c}{2} \\\\
#'                             a &= b \\qquad \\qquad \\blacksquare \\\\
#'    \\end{align}
#' "
#'
#' show_math(form1)
#' cat(show_math(form4))
#'
#' ## use default browser
#' show_math(form2, use_viewer = FALSE)
#'
#' ## concatenate multiple expressions
#' show_math(form1, form2, form3, css = 'color: red; font-size: 15px;')
#' }
#'
#' @export

show_math <- function(..., css = '', use_viewer = !is.null(getOption('viewer'))) {
  mj <- "
  <script>
  (function () {
    var script = document.createElement('script');
    script.type = 'text/javascript';
    script.src  = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
    document.getElementsByTagName('head')[0].appendChild(script);
  })();
  </script>
  "

  ## use \[ expr \] instead of $$ expr $$
  check_expr <- function(x) {
    sprintf('\\[ %s \\]', gsub('\\\\\\[|\\\\]', '', gsub('^\\$+|\\$+$', '', x)))
  }

  x <- paste(sapply(c(...), check_expr), collapse = '<br />')

  if (!nzchar(x))
    return(invisible(NULL))

  show_html(
    sprintf('<span class="math" style="font-size: 24px; %s;">\n', css),
    x, '\n</span>\n', mj, use_viewer = use_viewer
  )
}

#' roundr
#'
#' Improved rounding formatter.
#'
#' Uses \code{\link[base]{sprintf}} to round numeric value while keeping
#' trailing zeros.
#'
#' @param x numeric vector, matrix, or data frame
#' @param digits number of digits past the decimal point to keep
#' @param format logical; if \code{TRUE}, large numbers will be formatted
#'   with commas
#' @param check logical; if \code{TRUE}, formatted strings are checked for
#'   consistency with \code{\link[base]{round}} and warned if not identical
#' @param scipen see \code{\link{options}}
#'
#' @return
#' An object having the same class as \code{x}.
#'
#' @seealso
#' \code{\link{round}}; \code{\link{Round}}; \code{\link{sprintf}};
#' \code{\link{round_to}}; \code{\link{pvalr}}
#'
#' @examples
#' ## compare
#' round(0.199, 2)
#' roundr(0.199, 2)
#' 
#' round(1e9)
#' roundr(1e9, 0)
#' roundr(1e9, 0, scipen = 10)
#'
#' ## drops negative when x rounds to 0, eg, case 1:
#' roundr(c(1000, 1, -0.0002, 0.0002, 0.5, -0.5, -0.002), digits = 3)
#'
#' ## for matrices or data frames (including factors and/or characters)
#' roundr(matrix(1:9, 3))
#'
#' dd <- within(head(mtcars), {
#'   mpg <- as.character(mpg)
#'   cyl <- factor(cyl, labels = LETTERS[1:3])
#' })
#' roundr(dd)
#'
#' @export

roundr <- function(x, digits = 1L, format = TRUE, check = TRUE,
                   scipen = getOption('scipen')) {
  UseMethod('roundr')
}

#' @rdname roundr
#' @export
roundr.default <- function(x, digits = 1L, format = TRUE, check = TRUE,
                           scipen = getOption('scipen')) {
  if (!is.numeric(x) || is.complex(x))
    stop('non-numeric argument to mathematical function')
  
  oo <- getOption('scipen')
  options(scipen = scipen)
  on.exit(options(scipen = oo))
  
  fmt <- paste0('%.', digits, 'f')
  res <- sprintf(fmt, round(x, digits))
  
  if (format) {
    res[is.na(x)] <- NA
    res <- sapply(res, function(xx) {
      if (is.na(xx) || as.numeric(xx) < 1e3)
        return(xx)
      int <- trunc(as.numeric(xx))
      if (grepl('\\.', xx))
        gsub('[0-9]+(?=\\.)', format(int, big.mark = ','), xx, perl = TRUE)
      else format(int, big.mark = ',')
    })
  }

  ## drop leading '-' if value is 0.00...
  zero <- sprintf(fmt, 0)
  res[res == paste0('-', zero) & !is.na(res)] <- zero[!is.na(res)]

  res[is.na(x)] <- NA
  
  if (check) {
    current <- type.convert(gsub(',|\\.0+$', '', res), as.is = TRUE)
    target <- c(round(x, digits))
    if (any(current != target, na.rm = TRUE))
      warning(
        'current != target\ncurrent: ', toString(current),
        '\ntarget: ', toString(target)
      )
  }

  setNames(res, names(x))
}

#' @rdname roundr
#' @export
roundr.matrix <- function(x, digits = 1L, format = TRUE, check = TRUE,
                          scipen = getOption('scipen')) {
  if (!is.numeric(x) || is.complex(x))
    stop(deparse(substitute(x)), ' is not numeric')
  x[] <- roundr.default(x, digits, format, scipen)
  x
}

#' @rdname roundr
#' @export
roundr.data.frame <- function(x, digits = 1L, format = TRUE, check = TRUE,
                              scipen = getOption('scipen')) {
  x[] <- lapply(x, function(xx)
    if (is.numeric(xx) || is.complex(xx))
      roundr.default(xx, digits, format, check, scipen) else xx)
  x
}

#' Interval formatter
#'
#' Calculate summary statistic with range or confidence interval.
#'
#' @param ... numeric vector or string of numeric vectors
#' @param fun summary stat function, usually \code{\link{mean}} or
#'   \code{\link{median}}
#' @param conf width of confidence interval in \code{[0,1]}; if \code{NULL}
#'   (default), returns min and max of \code{...}
#' @param digits number of digits (includes trailing 0s)
#' @param na.rm logical; if \code{TRUE}, any \code{\link{NA}} and \code{NaN}
#'   are removed from \code{...} before \code{FUN} and \code{\link{quantile}}
#'   are computed
#'
#' @seealso
#' \code{\link[rawr]{roundr}}
#'
#' @examples
#' x <- 1:10
#' intr(x)
#' intr(x, conf = 0.95)
#'
#' ## inner quartile range
#' cbind(
#'   'median (IQR)' = lapply(mtcars, intr, conf = 0.5, digits = 2),
#'   'mean (range)' = lapply(mtcars, intr, fun = mean, digits = 2)
#' )
#' 
#' ## compare to
#' summary(mtcars)
#'
#' @export

intr <- function(..., fun = median, conf = NULL, digits = 0L, na.rm = FALSE) {
  lst <- list(...)
  if (is.null(conf) || conf == 0 ||
      findInterval(conf, 0:1, rightmost.closed = FALSE) != 1L)
    conf <- 1L

  sapply(lst, function(x) {
    bounds <- quantile(x, c((1 - conf) / 2 * c(1, -1) + 0:1), na.rm = na.rm)
    bounds <- roundr(bounds, digits)
    val <- roundr(fun(x, na.rm = na.rm), digits)

    if (!conf %in% 0:1)
      sprintf('%s (%s%% CI: %s - %s)', val, conf * 100,
              bounds[1L], bounds[2L])
    else sprintf('%s (range: %s - %s)', val, bounds[1L], bounds[2L])
  })
}

#' @param conf.int for \code{intr2}, the confidence level if
#'   \code{conf = TRUE}; default is \code{0.95}
#' @param range,ci,sd,mad,iqr logical; for \code{intr2} if \code{TRUE},
#'   the range (default), confidence interval, standard deviation, median
#'   absolute deviation, and/or inner quartile range are shown
#' 
#' @examples
#' x <- mtcars$mpg
#' intr2(x)
#' intr2(x, fun = mean, sd = TRUE, ci = TRUE, conf.int = 0.9)
#' 
#' ## compare
#' x <- list(mtcars$mpg, mtcars$wt)
#' intr2(x, range = TRUE, iqr = TRUE, digits = 2)
#' summary(mtcars[, c('mpg', 'wt')])
#' 
#' @rdname intr
#' @export

intr2 <- function(..., fun = median, na.rm = FALSE, digits = 0L, conf.int = 0.95,
                  range = TRUE, ci = FALSE, sd = FALSE, mad = FALSE, iqr = FALSE) {
  lst <- if (islist(..1))
    c(...) else list(...)
  
  sapply(lst, function(x) {
    pr <- c((1 - conf.int[1L]) / 2 * c(1, -1) + 0:1)
    l1 <- roundr(quantile(x, pr, na.rm = TRUE), digits)
    l2 <- roundr(quantile(x, c(0.25, 0.75), na.rm = TRUE), digits)
    l3 <- roundr(range(x, na.rm = TRUE), digits)
    
    val <- roundr(fun(if (na.rm) na.omit(x) else x), digits)
    
    lbl <- c(
      lim = sprintf('range: %s - %s', l3[1L], l3[2L]),
      ci  = sprintf('%s%% CI: %s - %s', conf.int[1L] * 100, l1[1L], l1[2L]),
      sd  = sprintf('SD: %s', roundr(sd(x), digits)),
      mad = sprintf('MAD: %s', roundr(mad(x), digits)),
      iqr = sprintf('IQR: %s - %s', l2[1L], l2[2L])
    )
    lbl <- lbl[c(range, ci, sd, mad, iqr)]
    
    sprintf('%s (%s)', val, paste(lbl, collapse = '; '))
  })
}

#' p-value formatter
#' 
#' @description
#' Formats several cases of p-values; see details.
#' 
#' Note that a previous \code{pvalr} function had additional arguments. For
#' this version, see \code{rawr2::pvalr}.
#' 
#' @details
#' \code{pvalr} rounds numeric values to a specified number of significant
#' \code{digits} and add (if necessary) trailing 0s. The result will be a
#' character string to prevent loss of significant digits.
#' 
#' Note that negative values are treated as small p-values (similar to
#' \code{format.pval}), but unlike \code{format.pval}, values greater than 1
#' are formatted as \code{> 1 - 1 / 10 ^ digits}
#'
#' \code{pvalr2} deals with common cases of character string p-values which are
#' not ideal (0.000, 1.00, etc.) and will leave others unchanged.
#'
#' @param pv for \code{pvalr}, a numeric value or vector of p-values; for
#'   \code{pvalr2}, a vector of p-values as character strings
#' @param sig.limit lower bound for precision; smaller values will be shown as
#'   \code{< sig.limit}
#' @param digits number of significant digits
#' @param scientific see \code{\link[base]{format}}
#' @param html logical; if \code{TRUE}, uses HTML entities for \code{<}
#'   and \code{>}
#' @param show.p logical; if \code{TRUE}, inserts \code{p = }, \code{p < }, or
#'   \code{p > } where appropriate
#' @param ... ignored
#'
#' @seealso
#' \code{\link[rawr]{roundr}}; \code{\link[base]{format.pval}};
#' \code{rawr:::signif2}
#'
#' @examples
#' pv <- c(-1, 0.001, 0.0001, 0.0042, 0.0601, 0.1335, 0.4999, 0.9, 0.997, 1, 2)
#' format.pval(pv, eps = 0.001)
#'
#' pvalr(pv)
#' pvalr(pv, digits = 3)
#' pvalr(pv, show.p = TRUE, html = TRUE)
#' 
#' \dontrun{
#' ## older version of pvalr in rawr2 package
#' ## note differences in digits used
#' rawr2::pvalr(pv)
#' rawr2::pvalr(pv, journal = FALSE)
#' rawr2::pvalr(pv, limits = 0.1)
#' }
#'
#' @export

pvalr <- function(pv, sig.limit = 0.001, digits = 2L, scientific = FALSE,
                  html = FALSE, show.p = FALSE, ...) {
  stopifnot(
    sig.limit > 0,
    sig.limit < 1
  )
  
  pstr <- c('', 'p ')[show.p + 1L]
  high <- 1 - 1 / 10 ^ digits
  
  sapply(pv, function(x) {
    if (is.na(x) | !nzchar(x) | !is.numeric(x))
      NA
    else if (x > high)
      paste0(pstr, c('> ', '&gt; ')[html + 1L], high)
    else if (x < sig.limit)
      paste0(pstr, c('< ', '&lt; ')[html + 1L],
             format.pval(sig.limit, scientific = scientific))
    else paste0(c('', 'p = ')[show.p + 1L], signif2(x, digits))
  })
}

signif2 <- function(x, digits = 6L) {
  sapply(x, function(xx) {
    s <- signif(xx, digits = digits)
    formatC(s, digits = digits, format = 'fg', flag = '#')
  })
}

#' @rdname pvalr
#' @examples
#' pv <- c('0.00000', '< 0.001', '0.0', '0.123', '0.6', '1', '1.0', '1.000')
#' pvalr2(pv)
#'
#' @export

pvalr2 <- function(pv, html = FALSE, show.p = FALSE) {
  x <- gsub('^1$', '1.0', pv)
  x <- gsub('(?:^1\\.|\\G)\\K0(?=0*$)', '9', x, perl = TRUE)
  x <- gsub('^1\\.', '> 0.', x)
  x <- gsub('^(0\\.0*)0$', '< \\11', x)

  if (html) {
    x <- gsub('>', '&gt;', x)
    x <- gsub('<', '&lt;', x)
  }

  if (show.p)
    ifelse(grepl('[<>]', pvalr2(pv)), paste0('p ', x), paste0('p = ', x))
  else x
}

#' @rdname pvalr
#'
#' @param breaks,cols a numeric vector defining breaks in \code{(0,1)} (passed
#'   to \code{\link{findInterval}}) and the corresponding colors
#' @param format_pval logical; if \code{TRUE}, p-values will be formatted
#'   using \code{\link{pvalr}}; alternatively, a function may by used which
#'   will be applied to each p-value
#' @param na a string used for \code{NA} p-values (default is \code{"-"})
#'
#' @examples
#' \dontrun{
#' pv <- c(0, 0.00001, 0.03, 0.06, 0.11, 0.49, 0.51, 0.89, 0.9, 1)
#' show_html(color_pval(pv))
#' show_html(color_pval(pv, format_pval = format.pval))
#' show_html(iprint(color_pval(pv, show.p = TRUE)))
#' }
#'
#' @export

color_pval <- function(pv, breaks = c(0, 0.01, 0.05, 0.1, 0.5, 1),
                       cols = colorRampPalette(2:1)(length(breaks)),
                       sig.limit = 0.001, digits = 2L, show.p = FALSE,
                       format_pval = TRUE, na = '-', ...) {
  if (!is.numeric(pv))
    return(pv)
  pvn <- pv

  stopifnot(length(breaks) == length(cols))

  pv <- if (isTRUE(format_pval))
    pvalr(pvn, sig.limit, digits, scientific = FALSE, html = TRUE, show.p)
  else if (identical(format_pval, FALSE))
    pv else format_pval(pv)

  pvc <- cols[findInterval(pvn, breaks)]
  res <- sprintf('<font color=\"%s\">%s</font>', pvc, pv)

  replace(res, grepl('>NA<', res, fixed = TRUE), na)
}

#' Concatenate list for output
#'
#' Print a \code{list} (usually named) as a character vector or string.
#'
#' @param x a list to concatenate
#' @param sep,collapse passed to \code{\link{paste}} controlling the string
#'   to separate name from value and list elements, respectively; if
#'   \code{collapse} is a non character string, the result will be a vector
#'   of strings
#'
#' @seealso
#' \code{\link{iprint}}
#'
#' @examples
#' l <- list(a = 1:3, b = 2, '4')
#' catlist(l)
#' catlist(l, collapse = FALSE)
#' cat(catlist(par()[1:5], sep = ':\n  ', collapse = '\n'))
#'
#' @export

catlist <- function(x, sep = ' = ', collapse = ', ') {
  res <- paste(names(x), x, sep = sep)

  idx <- !nzchar(names(x))
  res[idx] <- gsub(sprintf('^%s', sep), '', res)[idx]

  if (is.character(collapse))
    paste(res, collapse = collapse) else res
}

#' \code{bincon} formatter
#'
#' @description
#' Binomial confidence interval (\code{\link{bincon}}) formatter.
#'
#' Note that this function will also calculate confidence intervals for
#' two-stage designs by \code{method="two-stage"}. If both \code{r} and
#' \code{n} are length 2, a two-stage design is assumed. If \code{r} and
#' \code{n} are not both length 1 or 2, the function will fail. For vector
#' inputs, see \code{\link{bincon}}.
#'
#' @param r number of responses (successes)
#' @param n number of observations (trials)
#' @param conf level of confidence
#' @param digits number of digits
#' @param est logical; if \code{TRUE}, includes the point estimate
#' @param frac logical; if \code{TRUE}, includes the fraction \code{r/n}
#' @param show_conf logical; if \code{TRUE} includes the confidence level
#' @param pct.sign logical; if \code{TRUE}, percent sign is shown; otherwise,
#'   percents are shown without sign (this does not affect the confidence text)
#' @param method method to use (default is exact); see \code{\link{bincon}}
#' @param percent logical; if \code{TRUE} (default), estimates and intervals
#'   are returned as percentages
#'
#' @seealso
#' \code{\link{bincon}}; \code{\link[Hmisc]{binconf}}
#'
#' @examples
#' binconr(5, 10)
#' binconr(5, 10, percent = FALSE)
#'
#' binconr(5, 10, 0.90, est = FALSE)
#' binconr(45, 53, digits = 1, conf = 0.975)
#' binconr(45, 53, show_conf = FALSE, frac = TRUE)
#'
#' ## length 2 vectors assume two-stage confidence intervals
#' binconr(c(15, 45), c(20, 33), show_conf = FALSE, frac = TRUE)
#' ## compare
#' rawr:::twocon(20, 33, 15, 45)
#'
#' @export

binconr <- function(r, n, conf = 0.95, digits = 0L, est = TRUE, frac = FALSE,
                    show_conf = TRUE, pct.sign = TRUE, method = 'exact',
                    percent = TRUE) {
  lr <- length(r)
  ln <- length(n)
  xx <- if (!percent) {
    pct.sign <- FALSE
    digits <- ifelse(digits == 0, 2L, pmax(1L, digits))
    1
  } else 100

  method <- if (lr == 2L & ln == 2L)
    'two-stage' else {
      stopifnot(lr == 1L, ln == 1L)
      match.arg(method, c('exact', 'wilson', 'asymptotic'))
    }

  bc <- bincon(r, n, alpha = 1 - conf, method = method)
  stopifnot(nrow(bc) == 1L)

  tmp <- roundr(bc * xx, digits)
  res <- sprintf('%s%% CI: %s - %s%%', conf * 100, tmp[4L], tmp[5L])

  if (!show_conf)
    res <- gsub('.*% CI: ', '', res)
  if (est)
    res <- sprintf('%s%% (%s)', tmp[3L], res)
  if (!pct.sign)
    res <- gsub('%(?= \\()|%(?=\\))', '', res, perl = TRUE)
  if (frac)
    res <- sprintf('%s/%s, %s', tail(r, 1L), sum(n), res)

  structure(res, method = method)
}

#' Convert numbers to words
#'
#' Convert whole or decimal numbers to word equivalent. \code{N2W} is
#' equivalent to \code{n2w}/\code{num2char} but defaults to capitalizing
#' the first word.
#' 
#' Whole numbers twenty-one through ninety-nine are hyphenated when they are
#' written out whether used alone or as part of a larger number; for example:
#' "twenty-one" or "one million twenty-one."
#'
#' Informal and formal case differ only by the use of "and" to separate
#' 1-99: "one hundred one" is the formal case, and "one hundred and one" is
#' the informal case.
#' 
#' @param x numeric vector, less than `1e15`
#' @param cap logical; if \code{TRUE}, capitalizes the first word
#' @param hyphen logical; insert hyphen when the number is between 21 and
#'   99 (except 30, 40, etc)
#' @param informal,and logical; insert "and" between hundreds and tens
#' 
#' @references
#' \url{http://dictionary.reference.com/help/faq/language/g80.html}
#' 
#' \url{https://www.grammarbook.com/numbers/numbers.asp}
#' 
#' @seealso
#' \code{\link{case}}; adapted from \code{xfun::numbers_to_words}
#' 
#' @examples
#' n2w(1e15 - 1)
#' n2w(0:121, and = TRUE)
#' n2w(123.456)
#' N2W(1e6)
#' 
#' x <- c(-1000, 100, 52052, 3922, 3012, 201, -152, 1002, 91070432)
#' cbind(x, informal = num2char(x, informal = TRUE), formal = num2char(x, FALSE))
#' 
#' @export

num2char <- function(x, cap = TRUE, informal = FALSE, hyphen = TRUE, and = FALSE) {
  if (!is.numeric(x) || any(abs(x) >= 1e15))
    return(x)
  
  oo <- options(scipen = 15L, OutDec = '.')
  on.exit(options(oo), add = TRUE)
  informal <- informal | and
  and <- informal | and
  
  zero_to_19 <- setNames(
    c('zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
      'nine', 'ten', 'eleven', 'twelve',
      paste0(c('thir', 'four', 'fif', 'six', 'seven', 'eigh', 'nine'), 'teen')),
    0:19
  )
  tens <- setNames(
    c('twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety'),
    2:9 * 10
  )
  marks <- c('', 'thousand,', 'million,', 'billion,', 'trillion,')
  
  ## 0 - 9
  convert_1 <- function(x) {
    zero_to_19[x]
  }
  
  ## 10 - 99
  convert_2 <- function(x) {
    sp <- strsplit(x, '')[[1L]]
    if (sp[1L] == 1)
      zero_to_19[x]  # 10 - 19
    else if (sp[2L] == 0)
      tens[x]  # 20, 30, 40, ...
    # 21, 22, etc.
    else paste(tens[as.integer(sp[1L]) - 1L], convert_1(sp[2L]),
               sep = if (hyphen) '-' else ' ')
  }
  
  ## 100 - 999
  convert_3 <- function(x) {
    sp <- strsplit(x, '')[[1L]]
    n_hundreds <- paste(convert_1(sp[1L]), 'hundred')
    res <- if (sp[2L] == '0') {
      if (sp[3L] == '0')
        return(n_hundreds) # x00
      convert_1(sp[3L]) # x0x
    } else convert_2(paste(sp[2:3], collapse = '')) # xxx
    paste(n_hundreds, res, sep = if (and) ' and ' else ' ')
  }
  
  convert_le3 <- function(x) {
    x <- gsub('^0+', '', x)
    switch(nchar(x) + 1L, '', convert_1(x), convert_2(x), convert_3(x))
  }
  
  convert_one <- function(x) {
    neg <- if (x >= 0) '' else {
      x <- abs(x)
      'negative '
    }
    res <- if (x == 0) {
      ## convert_le3 removes all 0s
      'zero'
    } else {
      sp  <- strsplit(format(floor(x), big.mark = ','), ',')[[1L]] ## eg, 123,456,789
      res <- vapply(sp, convert_le3, character(1L)) ## group by 3 digits
      sp2 <- marks[length(sp):1L] ## units?
      sp2[which(res == '')] <- '' ## eg, 4,000,123, 000, remove millions
      paste(res, sp2, collapse = ' ')
    }
    res <- paste0(neg, res)
    res <- gsub('^ *|,? *$', '', res)
    res <- gsub(' {2,}', ' ', res)
    if (cap)
      res <- sub('^([a-z])', '\\U\\1', res, perl = TRUE)
    if (x - floor(x) > 0) {
      frac <- sub('^[0-9]+[.]', '', as.character(x))
      frac <- convert_1(strsplit(frac, '')[[1L]])
      res <- paste(c(res, 'point', frac), collapse = ' ')
    }
    res
  }
  
  if (length(x) > 1L)
    vapply(x, convert_one, character(1L)) else convert_one(x)
}

#' @rdname num2char
#' @export
n2w <- function(x, cap = FALSE, informal = FALSE, hyphen = TRUE, and = FALSE) {
  num2char(x, cap = cap, informal = FALSE, hyphen = TRUE, and = FALSE)
}

#' @rdname num2char
#' @export
N2W <- function(x, cap = TRUE, informal = FALSE, hyphen = TRUE, and = FALSE) {
  num2char(x, cap = cap, informal = FALSE, hyphen = TRUE, and = FALSE)
}

#' In-line printing
#'
#' Modified \code{\link[pander]{p}} function from the \pkg{pander} package.
#'
#' @param ... one or more numeric or character elements to be converted into
#'   character vectors
#' @param wrap character string to wrap each term
#' @param sep character string to separate the terms
#' @param copula character string to separate last two terms
#' @param digits number of digits past the decimal point to keep; see
#'   \code{\link{roundr}}
#'
#' @seealso
#' \code{\link[pander]{p}}; \code{\link{roundr}}; \code{\link{countr}}
#'
#' @examples
#' iprint(rnorm(2))
#' iprint(-0.000, 0.100)
#'
#' ## compare numeric and integer default printing
#' iprint(1, 2, 3)
#' iprint(1:3)
#'
#' iprint('fee', 'fi', 'fo', 'fum')
#' iprint(LETTERS[1:5], copula = ', and the letter ')
#' iprint('Thelma', 'Louise', copula = ' & ')
#'
#' @export

iprint <- function(..., wrap = '', sep = ', ', copula = ', and ',
                   digits = if (is.integer(x)) 0L else 2L) {
  x <- c(...)
  if (!(len <- length(x)))
    return(character(1L))

  f <- function(x, wrap = '"') {
    sprintf('%s%s%s', wrap, x, wrap)
  }

  if (len == 2L)
    copula <- sub(',', '', copula)
  if (is.numeric(x))
    x <- roundr(x, digits = digits)

  if (len == 1L)
    f(x, wrap)
  else if (len == 2L)
    paste(f(x, wrap), collapse = copula)
  else paste0(paste(f(head(x, -1L), wrap = wrap), collapse = sep),
              copula, f(tail(x, 1L), wrap = wrap))
}

#' Write \code{ftable}
#'
#' Write an \code{\link{ftable}} as a matrix.
#'
#' @param x an object of class \code{ftable}
#' @param quote,digits see \code{\link{format.ftable}}
#'
#' @examples
#' ## basic usage
#' writeftable(ftable(1))
#' writeftable(ftable(mtcars$gear))
#' 
#' ## tables write nicer with names
#' writeftable(ftable(gear = mtcars$gear))
#' writeftable(ftable(Titanic, row.vars = 1:3))
#'
#' @export

writeftable <- function(x, quote = FALSE, digits = getOption('digits')) {
  stopifnot(inherits(x, 'ftable'))
  xx <- trimws(format(x, quote, digits))
  cn <- locf(xx)[2L, ]
  colnames(xx) <- replace(cn, is.na(cn), '')
  xx[-(1:2), , drop = FALSE]
}

#' Tabler
#'
#' Extracts coefficients, standard errors, odds ratios, confidence intervals,
#' p-values, etc. from model fits.
#'
#' @param x an object of class \code{\link{lm}}, \code{\link{glm}},
#'   \code{\link[survival]{survfit}}, \code{\link[survival]{coxph}}
#' @param digits number of digits printed
#' @param level confidence level; default is \code{0.95}
#' @param exp logical; if \code{TRUE}, estimates and confidence intervals are
#'   exponentiated (for \code{glm} or \code{coxph} methods only)
#' @param add_reference logical; if \code{TRUE}, adds row for each reference
#'   group if applicable
#' @param ... additional arguments passed to or from other methods
#'
#' @family tabler
#'
#' @seealso
#' \code{\link{surv_table}}
#'
#' @examples
#' lmfit <- lm(mpg ~ hp + disp + wt, mtcars)
#' tabler(lmfit)
#'
#' glmfit <- glm(vs ~ drat + factor(gear), mtcars, family = 'binomial')
#' tabler(glmfit)
#' tabler(glmfit, add_reference = TRUE)
#' tabler(glmfit, exp = FALSE)
#'
#' library('survival')
#' sfit <- survfit(Surv(time, status) ~ 1, cancer, conf.int = 0.9)
#' tabler(sfit)
#'
#' cphfit <- coxph(Surv(time, status) ~ factor(sex) + age, cancer)
#' tabler(cphfit)
#' tabler(cphfit, add_reference = TRUE)
#'
#' @export

tabler <- function(x, ...) {
  UseMethod('tabler')
}

#' @rdname tabler
#' @export
tabler.default <- function(x, ...) {
  summary(x, ...)
}

#' @rdname tabler
#' @export
tabler.lm <- function(x, digits = 3L, add_reference, ...) {
  res <- data.frame(summary(x, ...)$coefficients, check.names = FALSE)
  pv <- grep('^Pr', names(res))
  res[, pv] <- pvalr(res[, pv], ...)
  res[, -pv] <- lapply(res[, -pv], round, digits = digits)
  res
}

#' @rdname tabler
#' @export
tabler.glm <- function(x, digits = 3L, level = 0.95, exp = TRUE,
                       add_reference = FALSE, ...) {
  res <- data.frame(summary(x, ...)$coefficients, check.names = FALSE)
  pv <- grep('^Pr', names(res))
  res[, pv] <- pvalr(res[, pv], ...)

  suppressMessages({
    res <- data.frame(
      exp(cbind(coef(x), confint(x, level = level))), res[, pv],
      stringsAsFactors = FALSE
    )
  })

  if (!exp)
    res[, 1:3] <- log(res[, 1:3])

  fmt <- sprintf('%%.0%1$sf (%%.0%1$sf, %%.0%1$sf)', digits)
  res <- data.frame(
    res,
    sprintf(fmt, res[, 1L], res[, 2L], res[, 3L]),
    stringsAsFactors = FALSE
  )

  res <- setNames(
    res,
    c(if (exp) 'OR' else 'Est',
      paste0(c('L', 'U'), level * 100), 'Pr(>|z|)',
      sprintf('OR (%s%% CI)', level * 100))
  )
  res[, 1:3] <- lapply(res[, 1:3], round, digits = digits)
  
  if (!is.null(xl <- x$xlevels) && add_reference) {
    for (ii in seq_along(xl)) {
      lev <- paste0(names(xl)[ii], xl[[ii]])
      idx <- which(rownames(res) %in% lev)[1L] - 1L
      res <- if (idx == 0L)
        rbind(NA, res) else rbind(head(res, idx), NA, tail(res, -idx))
      rownames(res)[idx + 1L] <- lev[1L]
    }
  }

  res
}

#' @rdname tabler
#' @export
tabler.survfit <- function(x, add_reference, ...) {
  surv_table(x, ...)
}

#' @rdname tabler
#' @export
tabler.coxph <- function(x, digits = 3L, level = 0.95, exp = TRUE,
                         add_reference = FALSE, ...) {
  res <- data.frame(summary(x, ...)$coefficients, check.names = FALSE)
  pv <- grep('^Pr', names(res))
  res[, pv] <- pvalr(res[, pv], ...)

  suppressMessages({
    res <- data.frame(
      exp(cbind(coef(x), confint(x, level = level))), res[, pv],
      stringsAsFactors = FALSE
    )
  })

  if (!exp)
    res[, 1:3] <- log(res[, 1:3])

  fmt <- sprintf('%%.0%1$sf (%%.0%1$sf, %%.0%1$sf)', digits)
  res <- data.frame(
    res,
    sprintf(fmt, res[, 1L], res[, 2L], res[, 3L]),
    stringsAsFactors = FALSE
  )

  res <- setNames(
    res,
    c(ifelse(exp, 'HR', 'Est'),
      paste0(c('L', 'U'), level * 100), 'Pr(>|z|)',
      sprintf('%s (%s%% CI)', ifelse(exp, 'HR', 'Est'), level * 100))
  )
  res[, 1:3] <- lapply(res[, 1:3], round, digits = digits)
  
  if (!is.null(xl <- x$xlevels) && add_reference) {
    for (ii in seq_along(xl)) {
      lev <- paste0(names(xl)[ii], xl[[ii]])
      idx <- which(rownames(res) %in% lev)[1L] - 1L
      res <- if (idx == 0L)
        rbind(NA, res) else rbind(head(res, idx), NA, tail(res, -idx))
      rownames(res)[idx + 1L] <- lev[1L]
    }
  }
  
  res
}

#' tabler_by
#'
#' @description
#' This function is helpful for making simple, formatted tables similar to
#' the functionality of \code{\link[tables]{tabular}}.
#'
#' \code{tabler_by} creates simple tables, and \code{tabler_by2} is a wrapper
#' which can create stratified tables (\code{tabler_by} can also achieve this
#' but requires additional steps).
#'
#' @details
#' \code{varname} and \code{byvar} should be factors, and the levels will
#' appear in the output as they occur in \code{levels(x)}.
#'
#' \code{n} is used to calculate the percent. If missing, the output will
#' only show counts in the table. If given, \code{length(n)} should be one or
#' equal to the number of levels of \code{byvar}.
#'
#' If one \code{n} is given, \code{tabler_by} assumes that this is the total
#' population for a subgroup, e.g., if creating a table for a subset of the
#' data, it is only necessary to provide the total \code{n} for that group.
#'
#' If more than one \code{n} is given, \code{tabler_by} assumes that the
#' entire data set is given as \code{data} and will use the corresponding
#' \code{n} for percents.
#'
#' @param data a data frame; variables \code{varname} and \code{byvar} should
#'   be factors
#' @param varname subgroup variable name (rows)
#' @param byvar stratification variable name (columns)
#' @param n number in each group; see details
#' @param order logical; order the result by decreasing frequency
#' @param zeros optional character string replacement for cells which have
#'   zero counts; will appear as \code{0 (0\%)} if \code{TRUE}
#' @param pct logical; if \code{TRUE} (and \code{n} is not missing), percents
#'   are shown
#' @param pct.column logical; if \code{TRUE}, percents are separated into new
#'   columns
#' @param pct.total logical; if \code{TRUE}, adds percents for total column
#' @param pct.sign logical; if \code{TRUE}, percent sign is shown; otherwise,
#'   percents are shown in parens without sign
#' @param drop logical; for \code{tabler_by} if \code{TRUE}, rows or columns
#'   with zero total counts will be removed (default); the \code{FALSE} case is
#'   useful when merging multiple \code{tabler_by} tables (e.g., this is how
#'   \code{tabler_by2} aligns stratified tables)
#' @param stratvar for \code{tabler_by2}, a factor-like variable used to
#'   stratify observations into mutually exclusive groups for which
#'   \code{tabler_by} will be performed on each subset
#' @param collapse_varname logical; for \code{tabler_by2}, combine multiple
#'   \code{varname}s into a single column with indents and extra rows for
#'   headers
#' @param collapse_format format strings for headers and normal rows,
#'   respectively, passed to \code{\link{sprintf}}
#'
#' @family tabler
#'
#' @seealso
#' \code{\link{tox_worst}}; \code{\link{match_ctc}}
#'
#' @examples
#' mt <- within(mtcars, {
#'   am   <- factor(am)
#'   gear <- factor(gear)
#'   vs   <- factor(vs, 0:2)
#'   vs2  <- factor(vs, 2:0)
#'   carb <- factor(carb)
#' })
#'
#' tabler_by(mt, 'vs', 'gear')
#' tabler_by(mt, c('vs2', 'carb'), 'gear', order = FALSE)
#' tabler_by(mt, c('vs2', 'carb'), 'gear', order = TRUE)
#'
#'
#' ## when length(n) > 1, each column uses a different n for percents
#' tabler_by(mt, 'vs', 'gear', n = 32, pct = TRUE)
#' tabler_by(mt, 'vs', 'gear', n = table(mt$gear), pct = TRUE)
#' tabler_by(mt, 'vs', 'gear', n = table(mt$gear), zeros = '-',
#'           pct = TRUE, pct.column = TRUE, pct.total = TRUE)
#'
#'
#' ## use tabler_by2 to create a stratified table
#' t1 <- tabler_by2(mt, c('vs', 'carb'), 'gear', stratvar = 'am', order = TRUE)
#'
#' ## or tabler_by to do this in several steps
#' t2 <- cbind(
#'   tabler_by(mt, varname = c('vs', 'carb'), byvar = 'gear',
#'             drop = FALSE)[, c('carb', 'Total')],
#'   tabler_by(mt[mt$am == 0, ], varname = c('vs', 'carb'), byvar = 'gear',
#'             drop = FALSE)[, -1],
#'   tabler_by(mt[mt$am == 1, ], varname = c('vs', 'carb'), byvar = 'gear',
#'             drop = FALSE)[, -1]
#' )
#'
#' ## order, drop extra rows/columns, set rownames
#' rownames(t2) <- locf(rownames(t2))
#' t2 <- t2[order(locf(rownames(t2)), -xtfrm(t2[, 2])), ]
#' t2 <- t2[!t2[, 'Total'] %in% '0', ]
#' t2 <- t2[, apply(t2, 2, function(x) !all(x %in% '0'))]
#' rownames(t2)[duplicated(rownames(t2))] <- ''
#'
#' stopifnot(identical(t1, t2))
#'
#'
#' ## example workflow
#' set.seed(1)
#' f <- function(x, ...) sample(x, 100, replace = TRUE, ...)
#' tox <- data.frame(
#'   id = rep(1:10, 10), phase = 1:2,
#'   code = f(rawr::ctcae_v4$tox_code[1:100]),
#'   grade = f(1:3, prob = c(0.6, 0.3, 0.1))
#' )
#'
#' tox <- cbind(tox, match_ctc(tox$code)[, c('tox_cat', 'tox_desc')])
#'
#' ## get worst toxicities by id, by grade
#' n <- colSums(table(tox$id, tox$phase) > 0)
#' tox[] <- lapply(tox, factor)
#' tox <- tox_worst(tox, desc = 'tox_desc')
#'
#' out <- tabler_by2(tox, 'tox_desc', 'grade', stratvar = 'phase', zeros = '-')
#' colnames(out)[1] <- sprintf('Total<br /><font size=1>n = %s</font>', sum(n))
#' cgroup <- c(
#'   '',
#'   sprintf('Phase I<br /><font size=1>n = %s</font>', n[1]),
#'   sprintf('Phase II<br /><font size=1>n = %s</font>', n[2])
#' )
#'
#' htmlTable::htmlTable(
#'   out, ctable = TRUE, cgroup = cgroup, n.cgroup = c(1, 4, 4),
#'   caption = 'Table 1: Toxicities<sup>&dagger;</sup> by phase and grade,
#'             sorted by total.',
#'   col.columns = rep(c('grey97', 'none', 'grey97'), times = c(1, 4, 4)),
#'   col.rgroup = rep(rep(c('none', 'grey97'), each = 5), 10),
#'   tfoot = paste0('<font size=1><sup>&dagger;</sup>Percents represent ',
#'            'proportion of patients out of respective phase total.</font>')
#' )
#' 
#'
#' ## same as above but add level of stratification, sort by total within group
#' out2 <- tabler_by2(tox, c('tox_cat', 'tox_desc'), 'grade', order = TRUE,
#'                    stratvar = 'phase', zeros = '-', n = c(5, 5), pct = TRUE)
#' stopifnot(
#'   identical(
#'     sort(unname(out[, grep('Total', colnames(out))[1]])),
#'     sort(unname(out2[, grep('Total', colnames(out2))[1]]))
#' ))
#'
#' colnames(out2)[1:2] <- c(
#'   'Description', sprintf('Total<br /><font size=1>n = %s</font>', sum(n))
#' )
#'
#' cgroup <- c(
#'   '', '',
#'   sprintf('Phase I<br /><font size=1>n = %s</font>', n[1]),
#'   sprintf('Phase II<br /><font size=1>n = %s</font>', n[2])
#' )
#' 
#' htmlTable::htmlTable(
#'   out2, align = 'lc', cgroup = cgroup, n.cgroup = c(1, 1, 4, 4),
#'   caption = 'Table 1: Toxicities<sup>&dagger;</sup> by category, phase,
#'   grade.'
#' )
#' 
#' 
#' ## collapse varname columns into single column with indents/extra rows
#' out3 <- tabler_by2(
#'   tox, c('tox_cat', 'tox_desc'), 'grade', stratvar = 'phase', zeros = '-',
#'   n = c(5, 5), pct = TRUE, pct.sign = FALSE,
#'   collapse_varname = TRUE, collapse_format = c('<i>%s</i>', '&emsp;%s')
#' )
#' 
#' htmlTable::htmlTable(
#'   out3, align = 'c', cgroup = cgroup[-1], n.cgroup = c(1, 1, 4, 4)[-1],
#'   caption = 'Table 1: Toxicities<sup>&dagger;</sup> by category, phase,
#'   grade.'
#' )
#' 
#' @export

tabler_by <- function(data, varname, byvar, n = NULL, order = FALSE, zeros = TRUE,
                      pct = FALSE, pct.column = FALSE, pct.total = FALSE,
                      pct.sign = FALSE, drop = TRUE) {
  stopifnot(
    length(byvar) == 1L,
    length(varname) <= 2L
  )

  ## helpers
  rm_p <- function(x) {
    gsub(' \\(.*\\)$', '', x)
  }
  ord <- function(...) {
    order(..., decreasing = TRUE)
  }

  if (!all(wh <- sapply(data[, c(varname, byvar)], is.factor))) {
    warning(sprintf('coercing %s to factor',
                    iprint(shQuote(idx <- c(varname, byvar)[!wh]))),
            domain = NA)
    data[, idx] <- lapply(data[, idx, drop = FALSE], as.factor)
  }
  if (pct.column & is.null(n))
    warning('\'n\' must be given when \'pct.column = TRUE\'', domain = NA)

  ## use ftbl format later, ttbl for counts, ptbl for percents
  ftbl <- ftable(data[, c(varname, byvar)])
  ttbl <- res <- cbind(Total = rowSums(ftbl), ftbl)
  cn <- c('Total', unlist(attr(ftbl, 'col.vars')))
  nr <- nrow(ttbl)
  nc <- ncol(ttbl)

  ## add percents, eg "N (x%)", to each column
  if (is.null(n) & length(varname) == 1L)
    n <- c(table(data[, byvar]))
  if (is.null(n) & any(pct, pct.column, pct.total))
    warning('\'n\' must be given if pct = TRUE', domain = NA)

  if (pct & !is.null(n)) {
    ## if length(n) == 1L, use same n for all strat levels (assume subgroup)
    ## else, map each n to each strat level (assume total)
    if ((ln <- length(n)) == 1L)
      n <- rep.int(n, ncol(ttbl) - 1L)
    if (length(n) != nlevels(data[, byvar]))
      stop('\'n\' should be length 1 or nlevels(data[, byvar])')

    ## add percents, make them sum to 100 by column
    ## if recursive error in rawr::Round, skip to regular round

    ## pct based on counts
    ptbl <- ttbl / matrix(rep(c(sum(ttbl[, 1L]), n), each = nr), nr) * 100
    ## pct based on n
    ptbl <- ttbl / matrix(rep(c(sum(n[seq.int(ln)]), n), each = nr), nr) * 100

    ptbl <- tryCatch(
      apply(ptbl, 2L, Round, 100),
      error = function(e) apply(ptbl, 2L, round, digits = 0L)
    )
    res <- matrix(sprintf('%s (%s%%)', ttbl, ptbl), nrow = nr, ncol = nc)
    res[] <- gsub('0 (NaN%)', '0 (0%)', res, fixed = TRUE)

    if (!pct.sign)
      res[] <- gsub('%', '', res, fixed = TRUE)

    ## split percents into individual columns
    if (pct.column) {
      res <- gsub('[^0-9 ]', '', apply(res, 1L, paste0, collapse = ' '))
      res <- as.matrix(read.table(text = res, colClasses = 'character'))
      cn <- interleave(cn, rep_len('%', nc))
      if (!pct.total) {
        cn <- cn[-2L]
        res <- res[, -2L]
      }
    } else {
      if (!pct.total)
        res[, 1L] <- rm_p(res[, 1L])
    }
  }

  ## use ftable formatting, replace table with new one
  ftbl <- writeftable(ftbl)
  ftbl <- as.matrix(locf(as.data.frame(ftbl)))

  ## column that separates labels from counts
  ftbl[is.na(ftbl)] <- ''
  idx <- which(colSums(apply(ftbl, 2L, Negate(nzchar))) == nr)
  colnames(res) <- cn
  res <- cbind(ftbl[, -(idx:ncol(ftbl)), drop = FALSE], res)

  ## order by group variable (if given) and total
  o <- if (order) {
    o <- data.frame(res[, c(varname, 'Total')], stringsAsFactors = FALSE)
    o <- within(locf(o), Total <- as.numeric(Total))
    i <- as.integer(o[, 'Total'])
    if (length(varname) == 1L)
      ord(i)
    else if (length(varname) == 2L)
      order(factor(o[, 1L], unique(o[, 1L])), i)
    else ord(-xtfrm(o[, 1L]), i)
  } else seq.int(nrow(res))
  res <- res[o, ]

  ## remove rows with 0 total since not dropped in ftable
  if (drop)
    res <- res[!grepl('^0', res[, 'Total']), ]
  if (length(varname) != 1L)
    res[, 1L] <- ifelse(duplicated(res[, 1]), '', res[, 1])

  if (!isTRUE(zeros)) {
    if (identical(FALSE, zeros))
      zeros = ''
    idx <- idx:ncol(res)
    res[, idx] <- `[<-`(res[, idx], gsub('^0.*', zeros, res[, idx]))
  }
  
  rownames(res) <- res[, 1L]
  
  res[, -1L]
}

#' @rdname tabler_by
#' @export
tabler_by2 <- function(data, varname, byvar, n = NULL, order = FALSE,
                       stratvar = NULL, zeros = TRUE,
                       pct = FALSE, pct.column = FALSE,
                       pct.total = FALSE, pct.sign = TRUE, drop = TRUE,
                       collapse_varname = FALSE, 
                       collapse_format = c('<b>%s</b>', '%s')) {
  ## helpers
  rm_p <- function(x) {
    gsub(' \\(.*\\)$', '', x)
  }
  ord  <- function(...) {
    order(..., decreasing = TRUE)
  }

  stopifnot(
    length(byvar) == 1L,
    (is.null(stratvar) || length(stratvar) == 1L),
    (ln <- length(varname)) <= 2L
  )

  data[] <- lapply(data, as.factor)
  data$`_strat_var_` <- if (is.null(stratvar))
    factor(1L) else data[, stratvar]
  bylvl <- levels(data[, '_strat_var_'])

  if (pct && is.null(n)) {
    warning('\'n\' must be given if pct = TRUE', domain = NA)
    pct <- FALSE
  } else if (!is.null(n))
    names(n) <- bylvl

  ## get (second varname if ln == 2L and) overall total column(s)
  o1 <- tabler_by(
    data, varname, '_strat_var_', n, FALSE, zeros,
    pct, pct.column, pct.total, pct.sign, FALSE
  )
  o1 <- o1[, seq.int(ln + (pct.column & pct.total)), drop = FALSE]

  ## get groups of columns for each level of byvar
  o2 <- lapply(bylvl, function(x) {
    tabler_by(
      data[data[, '_strat_var_'] == x, ], varname, byvar, n[x],
      FALSE, zeros, pct, pct.column, pct.total, pct.sign, FALSE
    )
  })

  res <- do.call('cbind', c(if (length(bylvl) == 1L) NULL else list(o1), o2))
  rownames(res) <- locf(rownames(res))

  ## remove duplicate columns, rows with 0 total, order using varname input
  res <- res[, !(duplicated(colnames(res)) & colnames(res) %in% varname)]
  if (drop) {
    res <- res[, apply(res, 2L, function(x)
      !(all(grepl('^\\s*0', x)) | all(x %in% as.character(zeros)))),
      drop = FALSE]
    res <- res[!(grepl('^\\s*0', res[, ln]) |
                   res[, ln] %in% as.character(zeros)), , drop = FALSE]
  }
  res <- res[if (!order)
    seq.int(nrow(res)) else {
      if (ln == 1L)
        ord(as.numeric(rm_p(res[, 1L]))) else
          ord(-xtfrm(rownames(res)), as.numeric(rm_p(res[, ln])))
    }, , drop = FALSE]
  rownames(res)[duplicated(rownames(res))] <- ''
  
  if (collapse_varname) {
    orn <- rownames(res)
    idx <- which(nzchar(orn))
    res <- insert(res, row = idx)
    rownames(res) <- ifelse(
      is.na(res[, 1L]),
      sprintf(collapse_format[1L], orn[idx]),
      sprintf(collapse_format[2L], res[, 1L])
    )
    res <- res[, -1L]
  }

  res
}

#' Description statistics \code{tabler}
#'
#' Wrapper function to create table of description statistics with optional
#' tests of association.
#'
#' If \code{FUN} is \code{FALSE}, no test will be performed but a column for
#' p-values will still be added; \code{FUN = NA} will prevent both the test
#' from being performed and the column from being added to the output.
#'
#' For \code{FUN = NULL} (default), the correct test will be guessed based on
#' the row and column data. The current options are \code{\link{fisher.test}},
#' \code{\link{wilcox.test}}, and \code{\link{kruskal.test}}. If the row data
#' is continuous, one of the latter two tests will be used based on the number
#' of unique values in the column data.
#'
#' For special cases, the function is not always guessed correctly (e.g., if
#' the row data contains few unique values, a Fisher test may be used where
#' not appropriate). One of the default tests can be given explicitly with a
#' character string, one of \code{"fisher"}, \code{"wilcoxon"}, \code{"ttest"},
#' \code{"kruskal"}, \code{"chisq"}, \code{"anova"}, \code{"cuzick"},
#' \code{"jt"}, \code{"ca"}, or \code{"kw"} (can be abbreviated).
#'
#' If \code{FUN} is a function, it must take two vector arguments: the row
#' variable vector, \code{data$varname}, and the column variable vector,
#' \code{data$byvar}, in this order and return a numeric p-value only.
#'
#' @param data a matrix or data frame with variables \code{varname} and
#'   \code{byvar}
#' @param varname,byvar the row and column variable, respectively
#' @param digits number of digits past the decimal point to keep
#' @param FUN a function performing the test of association between
#'   \code{varname} and \code{byvar}; \code{FALSE} will suppress the test but
#'   keep a column for p-values; \code{NA} will suppress the test and drop the
#'   column for p-values; or a character string; see details
#' @param format_pval logical; if \code{TRUE}, p-values will be formatted
#'   using \code{\link{pvalr}}; alternatively, a function may by used which will
#'   be applied to each p-value
#' @param color_pval logical; if \code{TRUE}, p-values will be colored
#'   by significance; see \code{\link{color_pval}}; alternatively, a vector of
#'   colors passed to \code{\link{color_pval}})
#' @param color_missing logical; if \code{TRUE}, rows summarizing missing
#'   values will be shown in light grey; alternatively, a color string can be
#'   used for a custom color
#' @param dagger logical or a character string giving the character to
#'   associate with \code{FUN}; if \code{FALSE}, none are used; if \code{TRUE},
#'   the defaults are used (\code{"*"} is used if \code{FUN} is given)
#' @param color_cell_by apply a color gradient to each cell (for html output);
#'   one of \code{"none"} for no coloring, \code{"value"} to color by numeric
#'   summary (e.g., for continuous variables), or \code{"pct"} to color by
#'   proportions (e.g., for factors)
#' @param cell_color a vector of colors used for \code{color_cell_by}
#' @param confint logical or \code{varname}; if \code{TRUE} (or \code{varname})
#'   rows will be formatted as confidence intervals; see \code{\link{binconr}}
#' @param survmedian,survtime,time survival object options - median survival or
#'   survival time
#' @param include_na_in_prop logical; if \code{TRUE} (default), the number of
#'   missing values is included when calculating proportions for factor levels;
#'   if \code{FALSE}, only non-missing levels count towards proportions
#' @param iqr logical; if \code{TRUE}, the interquartile range is used
#'   instead of the full range (default) for continuous variables
#' @param total logical; if \code{TRUE}, total column will be shown
#' @param continuous_fn,factor_fn functions to describe continuous and factor-like
#'   variables (default is to show median and range for continuous); see
#'   \code{\link[Gmisc]{getDescriptionStatsBy}}
#' @param ... additional arguments passed to
#'   \code{\link[Gmisc]{getDescriptionStatsBy}}
#'
#' @return
#' A matrix with additional attributes:
#'
#' \item{\code{attr(,"FUN")}}{the test passed to \code{FUN} or the test
#'   selected based on \code{varname} and \code{byvar} if \code{FUN = NULL}}
#' \item{\code{attr(,"p.value")}}{the numeric p-value returned by \code{FUN}}
#' \item{\code{attr(,"fnames")}}{a vector of the default \code{FUN} options
#'   with names to match the appropriate \code{dagger} character; see examples;
#'   if \code{FUN} is given, the function name will be added with a new dagger
#'   symbol (\code{"*"} by default or \code{dagger} if given)}
#' \item{\code{attr(,"tfoot")}}{a footnote for the table using each dagger
#'   and corresponding test name}
#'
#' @family tabler
#'
#' @seealso
#' \code{\link{fisher.test}}; \code{\link{wilcox.test}}; \code{\link{t.test}};
#' \code{\link{kruskal.test}}; \code{\link{chisq.test}}; \code{\link{anova}};
#' \code{\link{cuzick.test}}; \code{\link{jt.test}}; \code{\link{kw.test}};
#' \code{\link{ca.test}}
#'
#' @examples
#' tabler_stat(mtcars, 'mpg', 'cyl')                 # picks kruskal-wallis
#' tabler_stat(mtcars, 'mpg', 'cyl', FUN = NA)       # no test, no p-value column
#' tabler_stat(mtcars, 'mpg', 'cyl', FUN = FALSE)    # no test, p-value column
#' tabler_stat(mtcars, 'mpg', 'cyl', FUN = 'fisher') # force fisher test
#' tabler_stat(mtcars, 'mpg', 'cyl', FUN = 'anova')  # force anova test
#'
#' ## use of a custom function - see ?rawr::cuzick.test
#' tabler_stat(mtcars, 'mpg', 'cyl',
#'   continuous_fn = Gmisc::describeMean,
#'   FUN = function(x, y)
#'     cuzick.test(x ~ y, data.frame(x, y))$p.value)
#'
#' ## "cuzick" is also an option for FUN
#' tabler_stat(mtcars, 'mpg', 'cyl', FUN = 'cuzick')
#'
#'
#' ## typical usage
#' mt <- within(mtcars, {
#'   mpg[1:5] <- carb[1:5] <- drat[1:20] <- NA
#'   carb <- factor(carb, ordered = TRUE)
#'   cyl  <- factor(cyl)
#' })
#'
#' tbl <- lapply(names(mt)[-10L], function(x)
#'   tabler_stat(mt, x, 'gear', percentage_sign = FALSE,
#'               color_cell_by = ifelse(is.factor(mt[, x]), 'pct', 'none'),
#'               continuous_fn = Gmisc::describeMean))
#'
#' ht <- htmlTable::htmlTable(
#'   do.call('rbind', tbl),
#'   cgroup = c('', 'Gear', ''), n.cgroup = c(1, 3, 1),
#'   rgroup = names(mt)[-10L], n.rgroup = sapply(tbl, nrow),
#'   tfoot = toString(unique(unlist(strsplit(sapply(seq_along(tbl), function(ii)
#'     attr(tbl[[ii]], 'tfoot')), ', '))))
#' )
#' structure(ht, class = 'htmlTable')
#' 
#' 
#' ## survival object (median, 95% CI, log-rank test)
#' library('survival')
#' mt <- within(mt, {
#'   surv <- Surv(wt, vs)
#' })
#' tabler_stat(mt, 'surv', 'gear')
#' tabler_stat(mt, 'surv', 'gear', survtime = TRUE, time = 10)
#' 
#'
#' ## use the tabler_stat2 wrapper for convenience
#' tabler_stat2(mt, names(mt)[-10L], 'gear')
#'
#' tabler_stat2(mt, names(mt)[-10L], 'gear', FUN = c(cyl = 'jt'))
#'
#' mt$gear <- factor(mt$gear, ordered = TRUE)
#' tabler_stat2(mt, names(mt)[-10L], 'gear',
#'   format_pval = function(x) format.pval(x, digits = 3))
#'
#' @export

tabler_stat <- function(data, varname, byvar = NULL, digits = 0L, FUN = NULL,
                        format_pval = TRUE, color_pval = TRUE,
                        color_missing = TRUE, dagger = TRUE,
                        color_cell_by = c('none', 'value', 'pct'),
                        cell_color = palette()[1:2],
                        confint = FALSE, survmedian = FALSE,
                        survtime = FALSE, time = 0,
                        include_na_in_prop = TRUE, iqr = FALSE, total = TRUE,
                        continuous_fn = function(...)
                          Gmisc::describeMedian(..., iqr = iqr),
                        factor_fn = if (!include_na_in_prop)
                          describeFactors else Gmisc::describeFactors,
                        ...) {
  fun <- deparse(substitute(FUN))
  nof <- identical(FUN, FALSE)
  color_missing <- if (isTRUE(color_missing))
    'lightgrey'
  else if (identical(color_missing, FALSE))
    NULL else color_missing
  pcol <- if (isTRUE(color_pval))
    palette()[2:1]
  else if (!identical(color_pval, FALSE)) {
    pcol <- color_pval
    color_pval <- TRUE
    pcol
  } else NULL

  if (is.null(byvar)) {
    FUN   <- NA
    byvar <- '_by_var_'
    data[, byvar] <- factor(1L, 1:2)
  }
  
  if (length(byvar) > 1L) {
    data[, '_by_var_n_'] <- interaction(data[, byvar])
    byvar <- '_by_var_n_'
  }

  x <- if (is.character(x <- data[, varname]))
    as.factor(x) else x
  y <- as.factor(data[, byvar])
  n <- length(unique(na.omit(y)))
  
  oy <- is.ordered(y)
  if (oy & nlevels(y) < 3L) {
    warning(sprintf('%s is ordered has < 3 unique values', shQuote(byvar)))
    class(y) <- setdiff(class(y), 'ordered')
    oy <- FALSE
  }
  ox <- is.ordered(x)
  if (ox & nlevels(x) < 3L) {
    warning(sprintf('%s is ordered has < 3 unique values', shQuote(varname)))
    class(x) <- setdiff(class(x), 'ordered')
    ox <- FALSE
  }

  if (identical(survtime, FALSE) & inherits(x, 'Surv'))
    survmedian <- varname
  confint <- if (isTRUE(confint))
    varname else if (identical(confint, FALSE)) NULL else confint
  survmedian <- if (isTRUE(survmedian))
    varname else if (identical(survmedian, FALSE)) NULL else survmedian
  survtime <- if (isTRUE(survtime))
    varname else if (identical(survtime, FALSE)) NULL else survtime
  time <- time[1L]

  res <- if (inherits(x, c('Date', 'POSIXct', 'POSIXt'))) {
    color_cell_by <- 'none'
    FUN <- if (length(FUN) && is.na(FUN))
      NA else FALSE
    describeDateBy(x, y, ...)
  } else if (varname %in% confint) {
    describeConfint(x, y, ...)
  } else if (inherits(x, 'Surv') && varname %in% survmedian) {
    describeSurv(x, y, drop = is.null(byvar) | identical(byvar, '_by_var_'),
                 digits = digits, ...)
  } else if (inherits(x, 'Surv') && varname %in% survtime) {
    describeSurv(x, y, times = time, drop = is.null(byvar) | identical(byvar, '_by_var_'),
                 digits = digits, ...)
  } else {
    Gmisc::getDescriptionStatsBy(
      x, y, digits = digits, html = TRUE, add_total_col = TRUE, ...,
      show_all_values = TRUE, statistics = FALSE, useNA.digits = 0L,
      continuous_fn = continuous_fn, factor_fn = factor_fn
    )
  }
  class(res) <- 'matrix'
  ## sub out ' ( - )' pattern (all NA in continuous var)
  res[] <- gsub('\\s*\\(\\s*\\-\\s*\\)', '-', res)

  ## recolor missing
  if (is.character(color_missing)) {
    wh <- rownames(res) %in% 'Missing'
    rownames(res)[wh] <- sprintf('<font color=%s><em>%s</em></font>',
                                 color_missing, rownames(res)[wh])
    res[wh, ] <- sprintf('<font color=%s><em>%s</em></font>',
                         color_missing, res[wh, ])
  }

  ## color cells of by variable by proportion or value
  ## assume prop is in parens and value starts string
  ## eg, -value (proportion%)
  color_cell_by <- match.arg(color_cell_by)
  if (color_cell_by %ni% 'none') {
    wh <- !wh
    pp <- switch(color_cell_by,
                 value = '(^[^(]+)|.',
                 pct   = '\\(.*?([0-9.]+).*?\\)|.')
    pp <- gsub(pp, '\\1', res[wh, -1L], perl = TRUE)

    res[wh, -1L] <-
      sprintf('<font color="%s">%s</font>',
              col_scaler(as.numeric(pp), cell_color), res[wh, -1L])
  }
  
  if (identical(total, FALSE))
    res <- res[, -1L, drop = FALSE]

  ## no stat fn, no pvalue column
  if (identical(NA, FUN))
    return(
      structure(if (byvar == '_by_var_')
        res[, 1L, drop = FALSE] else res, FUN = FALSE, tfoot = '')
    )

  ## add pvalue column using stat fn
  pvn <- tryCatch(
    getPval_(x, y, FUN),
    error = function(e) {
      message(sprintf(
        '\nAn error occurred for %s\n\t%s\nSkipping test for %s\n',
        shQuote(varname), e$message, shQuote(varname))
      )
      NULL
    }
  )
  if (is.null(pvn))
    nof <- TRUE

  dags  <- c('&dagger;', '&Dagger;', '&#94;', '&sect;', 'v')
  dags1 <- dags[c(1L, 3L, 2L)]
  dags2 <- dags[c(3L, 4L, 1L)]
  dags3 <- dags[c(1L, 1L, 5L)]
  fname <- attr(pvn, 'FUN') %||% fun
  attr(fname, 'tnames') <- attr(pvn, 'name')

  fnamel <- list(
    unordered = structure(
      setNames(paste0(c('wilcox', 'kruskal', 'fisher'), '.test'), dags1),
      tnames = c('Wilcoxon rank-sum test', 'Kruskal-Wallis rank-sum test',
                 'Fisher\'s exact test')
    ),
    ordered1 = structure(
      setNames(paste0(c('ca', 'kw', 'cuzick'), '.test'), dags2),
      tnames = c('Cochran-Armitage test', 'Test for trend in proportions',
                 'Cuzick\'s trend test')
    ),
    ordered2 = structure(
      setNames(paste0(c('cuzick', 'cuzick', 'jt'), '.test'), dags3),
      tnames = c('Cuzick\'s trend test', 'Cuzick\'s trend test',
                 'Jonckheere-Terpstra test')
    )
  )
  fnames <- fnamel[[1L + oy + ox]]

  ## no fun - null out return
  ## user input fns get * identifier
  ## else guess/construct footnote string
  if (nof) {
    fname   <- FALSE
    p.value <- NULL
    fnames  <- NULL
  } else if (!is.null(FUN)) {
    fname <- tryCatch({
      # FUN <- match.arg(FUN, unique(unlist(fnamel)))
      fn <- lapply(fnamel, function(x) {
        idx <- match(fname, x)[1L]
        # idx <- tail(match(fname, x), 1L)
        if (!is.na(idx))
          structure(x[idx], tnames = attr(x, 'tnames')[idx])
        else NULL
      })
      Filter(Negate(is.null), fn)[[1L]]
    }, error = function(e) fname
    )
    if (!is.character(dagger))
      dagger <- names(fname) %||% '&#42;'
    fnames <- setNames(attr(fname, 'tnames'), dagger)
  } else {
    dagger <- if (inherits(x, 'Surv'))
      '*'
    else if (isTRUE(dagger))
      names(fnames)[match(fname, fnames, 3L)]
    else if (is.character(dagger))
      dagger else ''
    fnames <- setNames(attr(fnames, 'tnames'), names(fnames))
    fnames <- fnames[!duplicated(fnames)]

    ## for fnamel$unordered, only return one of wilcox/kruskal based on byvar
    if (!oy & !ox) {
      idx <- gsub('^kw\\.$', 'krus', substr(tolower(fname), 1L, 3L))
      idx <- pmatch(c(idx, 'fish'), unique(tolower(fnames)))
      if (anyNA(idx))
        idx <- c((n > 2L) + 1L, na.omit(idx))
      fnames <- fnames[sort(idx)]
      fnames <- fnames[!duplicated(fnames)]
    }
  }

  pvc <- if (is.null(pvn) || is.na(pvn))
    NULL else {
      if (color_pval)
        color_pval(pvn, cols = colorRampPalette(pcol)(6L),
                   format_pval = format_pval)
      else if (isTRUE(format_pval))
        sprintf('<em>%s</em>', pvalr(pvn, html = TRUE))
      else if (identical(format_pval, FALSE))
        pvn else format_pval(pvn)
    }

  m <- matrix('', nrow(res)) 
  m[1L, 1L] <- if (!length(pvc))
    '-' else sprintf('<em>%s</em><sup>%s</sup>', pvc, dagger)
  
  if (inherits(x, 'Surv'))
    fnames <- c(fnames, '*' = 'Log-rank test')

  structure(
    cbind(res, m), FUN = unname(fname), p.value = pvn, fnames = fnames,
    tfoot = if (nof)
      '' else toString(sprintf('<sup>%s</sup>%s', names(fnames), fnames))
  )
}

getPvalCAtest <- function(x, by) {
  ca.test(by, x)$p.value
}

getPvalCuzick <- function(x, by) {
  cuzick.test(x ~ by)$p.value
}

getPvalJTtest <- function(x, by) {
  jt.test(x, by)$p.value
}

getPvalKruskal <- function(x, by) {
  ## Gmisc:::getPvalKruskal fails if by is a char
  kruskal.test(x ~ as.factor(by))$p.value
}

getPvalKWtest <- function(x, by) {
  kw.test(x, by)$p.value
}

getPvalLogrank <- function(x, by) {
  data <- data.frame(x, by)
  lr_pval(survfit(x ~ by), data = data)
}

getPvalTtest <- function(x, by) {
  t.test(x ~ by, alternative = 'two.sided')$p.value
}

getPval_ <- function(x, y, FUN, n_unique_x = 10L) {
  if (identical(FUN, FALSE))
    return(NULL)

  if (is.numeric(FUN))
    return(FUN)

  if (is.function(FUN))
    return(
      structure(FUN(x, y), FUN = gsub('\\(.*', '', deparse(FUN)[2L]),
                name = deparse(FUN)[2L])
    )

  sFUN <- tryCatch(
    match.arg(FUN, c('anova', 'ca', 'chisq', 'cuzick', 'fisher', 'jt',
                     'kruskal', 'kw', 'ttest', 'wilcoxon', 'logrank')),
    error = function(e)
      if (grepl('one of', e$message))
        TRUE else stop(e$message)
  )

  if (isTRUE(sFUN))
    return(FUN)

  if (is.character(FUN))
    return(
      switch(
        sFUN,
        anova    = structure(
          Gmisc::getPvalAnova(x, y),  FUN = 'anova',
          name = 'ANOVA F-test'),
        ca       = structure(
          getPvalCAtest(x, y),        FUN = 'ca.test',
          name = 'Cochran-Armitage test for trend'),
        chisq    = structure(
          Gmisc::getPvalChiSq(x, y),  FUN = 'chisq.test',
          name = 'Pearson\'s chi-squared test'),
        cuzick   = structure(
          getPvalCuzick(x, y),        FUN = 'cuzick.test',
          name = 'Cuzick\'s trend test'),
        fisher   = structure(
          Gmisc::getPvalFisher(x, y), FUN = 'fisher.test',
          name = 'Fisher\'s exact test'),
        jt       = structure(
          getPvalJTtest(x, y),        FUN = 'jt.test',
          name = 'Jonckheere-Terpstra test'),
        kruskal  = structure(
          getPvalKruskal(x, y),       FUN = 'kruskal.test',
          name = 'Kruskal-Wallis rank-sum test'),
        kw       = structure(
          getPvalKWtest(x, y),        FUN = 'kw.test',
          name = 'Chi-squared test for trend in proportions'),
        logrank  = structure(
          getPvalLogrank(x, y),       FUN = 'survdiff',
          name = 'Log-rank test'),
        ttest    = structure(
          getPvalTtest(x, y),         FUN = 't.test',
          name = 'Unpaired T-test'),
        wilcoxon = structure(
          Gmisc::getPvalWilcox(x, y), FUN = 'wilcox.test',
          name = 'Wilcoxon rank-sum test')
      )
    )

  if (is.null(FUN))
    guess_test(x, y, n_unique_x) else NULL
}

guess_test <- function(x, y, n_unique_x = 10L) {
  ## guess stat test for table var (x) using stratification var (y)
  ## x with >= n_unique_x unique values is assumed continuous
  ## dbl/int with many (?) unique values uses rank-sum tests
  ## otherwise assume contingency table
  ox <- is.ordered(x)
  oy <- is.ordered(y)
  nx <- lunique(x, na.rm = TRUE)
  ny <- lunique(y, na.rm = TRUE)
  
  if (ox && nx <= 2L) {
    class(x) <- setdiff(class(x), 'ordered')
    ox <- FALSE
  }
  if (oy && ny <= 2L) {
    class(y) <- setdiff(class(y), 'ordered')
    oy <- FALSE
  }
  
  if (inherits(x, 'Surv'))
    return(structure(getPvalLogrank(x, y), FUN = 'survdiff',
                     name = 'Log-rank test'))

  if (!is.character(x) && !is.factor(x) && nx >= n_unique_x) {
    if (ny > 2L) {
      if (oy)
        structure(getPvalCuzick(x, y), FUN = 'cuzick.test',
                  name = 'Cuzick\'s trend test')
      else
        structure(getPvalKruskal(x, y),  FUN = 'kruskal.test',
                  name = 'Kruskal-Wallis rank-sum test')
    } else {
      structure(Gmisc::getPvalWilcox(x, y), FUN = 'wilcox.test',
                name = 'Wilcoxon rank-sum test')
    }
  } else {
    if (ox & oy & nx > 2L & ny > 2L)
      structure(getPvalJTtest(x, y), FUN = 'jt.test',
                name = 'Jonckheere-Terpstra test')
    else if ((nx == 2L & oy & ny > 2L) || (ny == 2L & ox & nx > 2L)) {
      if (ox)
        structure(getPvalCAtest(x, y), FUN = 'ca.test',
                  name = 'Cochran-Armitage test for trend')
      else if (oy)
        structure(getPvalCAtest(y, x), FUN = 'ca.test',
                  name = 'Cochran-Armitage test for trend')
      else stop('check ca')
    } else if ((nx > 2L & !ox & oy & ny > 2L) ||
               (ny > 2L & !oy & ox & nx > 2L)) {
      if (oy)
        structure(getPvalKWtest(x, y), FUN = 'kw.test',
                  name = 'Chi-squared test for trend in proportions')
      else if (ox)
        structure(getPvalKWtest(y, x), FUN = 'kw.test',
                  name = 'Chi-squared test for trend in proportions')
      else stop('check kw')
    }
    else
      structure(Gmisc::getPvalFisher(x, y), FUN = 'fisher.test',
                name = 'Fisher\'s exact test')
  }
}

#' Describe a date
#'
#' A function that returns the range of a date object.
#'
#' @param x,by vectors of dates and the variable to split \code{x} by
#' @param format the date format; see \code{\link{strptime}}
#' @param copula character string to separate the range
#' @param FUN a function to summarize \code{x}, usually \code{range} or
#'   \code{min}
#' @param add_total_col logical, \code{"first"}, or \code{"last"}; adds
#'   the total column to the output
#' @param useNA how to handle missing values, one of \code{"ifany"} (default),
#'   \code{"no"}, or \code{"always"}
#' @param useNA.digits number of digits to use for missing percentages
#' @param percentage_sign logical; if \code{TRUE}, percent signs are added
#'
#' @seealso
#' \code{\link[Gmisc]{getDescriptionStatsBy}}
#'
#' @examples
#' x <- c(1:4, NA, 5)
#' d <- as.Date(x, origin = Sys.Date())
#' y <- c(1, 1, 2, 2, 1, 3)
#' describeDate(x)
#' describeDateBy(d, y, copula = ' - ', format = '%d %B')
#' describeDateBy(d, y, percentage_sign = TRUE, useNA.digits = 2, FUN = min)
#'
#' dd <- data.frame(x, y, d)
#' tabler_stat(dd, 'd', 'y')
#' tabler_stat2(dd, c(Integer = 'x', Date = 'd'), c(Byvar = 'y'))
#'
#' @export

describeDate <- function(x, format = '%b %d, %Y', copula = ' to ', FUN = range) {
  x <- x[!is.na(x)]
  FUN <- match.fun(FUN)
  res <- format(FUN(x), format = format)

  if (length(res) == 1L)
    res else sprintf('%s%s%s', res[1L], copula, res[2L])
}

#' @rdname describeDate
#' @export
describeDateBy <- function(x, by, format = '%b %d, %Y', copula = ' to ',
                           FUN = range, add_total_col = TRUE,
                           useNA = c('ifany', 'no', 'always'),
                           useNA.digits = 0L, percentage_sign = FALSE) {
  nax <- is.na(x)
  nay <- is.na(by)

  if (anyNA(by)) {
    warning(
      '\n Your \'by\' variable has ', sum(nay), ' missing values\n',
      '  The corresponding \'x\' and \'by\' variables are automatically removed'
    )
    x <- x[!nay]
    by <- by[!nay]
  }

  by_y <- by(x, by, function(x)
    describeDate(x, format = format, copula = copula, FUN = FUN))
  by_y <- matrix(unlist(by_y), ncol = length(by_y),
                 dimnames = list('Date', names(by_y)))
  by_t <- describeDate(x, format = format, copula = copula, FUN = FUN)
  by_t <- matrix(by_t, dimnames = list('Date', 'Total'))

  res <- if (isTRUE(add_total_col) || add_total_col %in% 'first')
    cbind(by_t, by_y)
  else if (add_total_col %in% 'last')
    cbind(by_y, by_t)
  else by_y

  useNA <- match.arg(useNA)

  if (useNA %in% 'always' || (anyNA(x) & !useNA %in% 'no')) {
    mis <- c(list(x), split(x, by))
    mis <- sapply(mis, function(m) {
      s <- sum(is.na(m))
      sprintf('%s (%s%%)', s, if (s == 0)
        '0' else roundr(s / length(m) * 100, useNA.digits))
    })
    if (!percentage_sign)
      mis <- gsub('%', '', mis)

    res <- rbind(res, Missing = mis)
  }

  res
}

describeConfint <- function(x, y, include_NA = TRUE, percent = TRUE,
                            digits = ifelse(percent, 0L, 2L),
                            add_total_col = TRUE, useNA.digits = 0L,
                            conf = 0.95, frac = TRUE, ...) {
  # rawr:::describeConfint(mtcars$vs, mtcars$am)
  if (!include_NA) {
    na <- is.na(x) | is.na(y)
    x <- x[!na]
    y <- y[!na]
  }

  x <- as.factor(x)
  sp <- split(x, y)
  sp <- c(Total = list(x), sp)

  res <- lapply(sp, function(xx) {
    n <- table(xx)
    x <- sapply(n, function(r)
      binconr(r, sum(n), show_conf = FALSE, frac = frac, conf = conf,
              percent = percent, digits = digits))
    matrix(x, dimnames = list(names(n), NULL))
  })

  res <- do.call('cbind', res)
  colnames(res) <- names(sp)

  if (add_total_col)
    res else res[, -1L, drop = FALSE]
}

describeFactors <- function(..., useNA, exclude_na_prop = TRUE) {
  res <- Gmisc::describeFactors(..., useNA = 'always')
  nr <- nrow(Gmisc::describeFactors(..., useNA = 'ifany'))
  
  if (exclude_na_prop) {
    tmp <- rbind(Gmisc::describeFactors(..., useNA = 'no'), '')
    tmp <- tmp[!grepl('^Missing$', rownames(tmp)), , drop = FALSE]
    res[] <- trimws(paste(gsub(' .*', '', res), gsub('.* ', '', tmp)))
  }
  
  res[seq.int(nr), , drop = FALSE]
}

describeSurv <- function(x, y, include_NA = TRUE, times = NULL, percent = TRUE,
                         digits = ifelse(percent, 0L, 2L), drop = FALSE,
                         add_total_col = TRUE, useNA.digits = 0L,
                         conf = 0.95, conf.type = 'log', show_conf = TRUE, ...) {
  # describeSurv(Surv(mtcars$mpg, mtcars$vs), mtcars$gear)
  s0 <- survfit(x ~ 1, conf.int = conf, conf.type = conf.type)
  s1 <- survfit(x ~ y, conf.int = conf, conf.type = conf.type)
  
  nr <- function(x) {
    gsub('NA', 'NR', x)
  }
  label <- if (!is.null(times))
    ifelse(percent, 'Percent', 'Probability') else 'Median'
  
  res <- matrix(
    c(if (add_total_col) {
      if (is.null(times)) {
        surv_median(s0, ci = TRUE, digits = digits, show_conf = FALSE)
      } else {
        surv_prob(s0, times[1L], ci = TRUE, digits = digits,
                  show_conf = FALSE, percent = percent)
      }
    } else NULL,
    nr(if (is.null(times))
      surv_median(s1, ci = TRUE, digits = digits, show_conf = FALSE, print = FALSE)
      else unlist(surv_prob(s1, times[1L], which = NULL, ci = TRUE,
                            percent = percent, digits = digits,
                            show_conf = FALSE, print = FALSE)))
    ), nrow = 1L,
    dimnames = list(
      sprintf('%s (%s%% CI)', label, conf * 100),
      c(if (add_total_col) 'Total' else NULL,
        levels(if (drop) droplevels(as.factor(y)) else as.factor(y)))
    )
  )
  
  if (!show_conf) {
    res <- gsub(' .*', '', res)
    rownames(res) <- label
  }
  
  res
}

#' \code{tabler_stat} wrappers
#'
#' Helper functions for using \code{\link{tabler_stat}}.
#'
#' @param data a matrix or data frame with variables \code{varname} and
#'   \code{byvar}
#' @param varname one or more variables in \code{data} to calculate
#'   statistics by \code{byvar}; the rows variable(s) of the table
#' @param byvar a stratification variable; the column variable of the table
#' @param varname_label,byvar_label optional labels for each \code{varname}
#'   and \code{byvar}
#'
#'   note that duplicated \code{varname_label}s will be processed individually
#'   then grouped; this is useful if one variable is shown as continuous and
#'   categorical, for example; grouped variables should be consecutive, see
#'   examples
#' @param digits \code{NULL} or a vector of digits past the decimal point to
#'   keep for each \code{varname}; if \code{NULL}, these will be guessed using
#'   \code{rawr:::guess_digits}; if length 1, all will be rounded to
#'   \code{digits}
#'
#'   alternatively, to set \code{digits} for a single \code{varname} manually,
#'   a \emph{named} vector of \code{digits} corresponding to one or more
#'   \code{varname} variables can be used; see examples
#' @param FUN \code{NULL} or a list of functions performing the test of
#'   association between each \code{varname} and \code{byvar}; see
#'   \code{\link{tabler_stat}}
#' @param confint optional vector of \code{varname}(s) to summarize as
#'   confidence intervals
#' @param survmedian,survtime,time survival object options - median survival or
#'   survival time
#' @param total logical; if \code{TRUE}, total column will be shown
#' @param n (optional) the sample size for each column used to calculate
#'   percents; if length 1, recycled as necessary; if length > 1, length must
#'   be the same as the number of columns in the table including total
#' @param include_na_in_prop logical; if \code{TRUE} (default), the number of
#'   missing values is included when calculating proportions for factor levels;
#'   if \code{FALSE}, only non-missing levels count towards proportions
#' @param iqr logical; if \code{TRUE}, the interquartile range is used
#'   instead of the full range (default) for continuous variables
#' @param format_pval logical; if \code{TRUE}, p-values will be formatted
#'   using \code{\link{pvalr}}; alternatively, a function may by used which
#'   will be applied to each p-value
#' @param color_pval,color_missing,dagger \code{NULL} or vectors, recycled as
#'   needed for each \code{varname}; see \code{\link{tabler_stat}}
#' @param correct logical or one of \code{\link{p.adjust.methods}}; if p-value
#'   correction is desired, a column is added to the table with the corrected
#'   p-values
#' @param group a \emph{named} vector of variables (either index of or a
#'   value in \code{varname}) to begin a new group (see \code{tspanner} in
#'   \code{\link[htmlTable]{htmlTable}})
#' @param color_cell_by,cell_color apply a color gradient (\code{cell_color})
#'   to each cell (for html output); one of \code{"none"}, \code{"value"}, or
#'   \code{"pct"}; see \code{\link{tabler_stat}}
#' @param statArgs a named list of additional arguments passed to
#'   \code{\link[Gmisc]{getDescriptionStatsBy}}
#' @param align,rgroup,cgroup,tfoot optional arguments passed to
#'   \code{\link[htmlTable]{htmlTable}}; if \code{cgroup = FALSE}, no column
#'   spanners will be shown
#' @param tfoot2 optional footnote(s) appended to \code{tfoot}
#' @param htmlArgs a named list of additional arguments passed to
#'   \code{\link[htmlTable]{htmlTable}}
#' @param zeros a character string used in place of zero cells (non-character
#'   value keeps cells as-is)
#' @param clean_daggers logical or one of \code{"letters"}, \code{"numbers"};
#'   values other than \code{FALSE} will replace default dagger characters
#'
#' @family tabler
#'
#' @seealso
#' \code{\link{tabler_stat}}; \code{rawr:::get_tabler_stat_n}
#'
#' \code{rawr:::guess_digits}; \code{rawr:::name_or_index}
#'
#' \code{rawr:::tabler_stat_list}; \code{rawr:::tabler_stat_html}
#'
#' @examples
#' sapply(mtcars[1:6], rawr:::guess_digits)
#' Map(rawr::roundr, mtcars[1:6], sapply(mtcars[1:6], rawr:::guess_digits))
#'
#' rawr:::get_tabler_stat_n(mtcars$gear)
#'
#'
#' ## typical usage
#' mt <- within(mtcars, {
#'   cyl  <- factor(cyl)
#'   mpg2 <- factor(+(mpg > 20), 0:1, c('&le; 20', '&gt; 20'))
#' })
#'
#' tabler_stat2(mt, c('mpg', 'cyl', 'wt'))
#' tabler_stat2(mt, c('mpg', 'cyl', 'wt'), 'vs')
#' tabler_stat2(mt, c('mpg', 'cyl', 'wt'), 'vs', group = 'wt')
#' tabler_stat2(mt, c('mpg', 'cyl', 'wt'), 'vs',
#'   group = c(group1 = 1, 'Weight' = 3))
#' tabler_stat2(mt, c('mpg', 'cyl', 'wt'), 'vs',
#'   FUN = list('kruskal', 'fisher', FALSE))
#'
#' tabler_stat2(
#'  mt, c('mpg', 'cyl', 'wt'), 'vs',
#'  cgroup = c('', 'V/S engine', ''),
#'  rgroup = c('Miles/gallon', 'No. cylinders', 'Weight (1000 lbs)')
#' )
#'
#' tabler_stat2(
#'   mt,
#'   varname = c(MPG = 'mpg', MPG = 'mpg2', Cylinders = 'cyl', Weight = 'wt'),
#'   byvar = c('V/S engine' = 'vs'),
#'   confint = c('mpg2', 'cyl'),
#'   zeros = NULL,
#'   digits = c(wt = 2)
#'   # digits = c('3' = 2) ## equivalently
#' )
#' 
#' 
#' ## multiple column variables
#' tabler_stat2(mt, c('mpg', 'cyl', 'wt'), c('vs', 'gear'), FUN = NA)
#' 
#' 
#' \dontrun{
#' ## experimental -- row- or column-merge tables
#' h1 <- tabler_stat2(mt, c('mpg', 'cyl', 'wt'), 'vs')
#' h2 <- tabler_stat2(mt, c('mpg', 'cyl', 'wt'), 'am')
#' h3 <- tabler_stat2(mt, c('mpg', 'cyl', 'wt'), 'gear')
#' 
#' rawr:::combine_tabler_stat2(list(h1, h2), how = 'rbind')
#' rawr:::combine_tabler_stat2(list(table1 = h1, table2 = h2), how = 'rbind')
#' 
#' rawr:::combine_tabler_stat2(list(h1, h2, h3), how = 'cbind')
#' rawr:::combine_tabler_stat2(list(t1 = h1, t2 = h2, t3 = h3), how = 'cbind')
#' }
#'
#' @export

tabler_stat2 <- function(data, varname, byvar = NULL,
                         varname_label = names(varname), byvar_label = names(byvar),
                         digits = NULL, FUN = NULL,
                         confint = FALSE, survmedian = FALSE,
                         survtime = FALSE, time = 0,
                         total = TRUE, n = NULL,
                         include_na_in_prop = TRUE, iqr = FALSE,
                         format_pval = TRUE, color_pval = TRUE, correct = FALSE,
                         color_missing = TRUE, dagger = TRUE,
                         group = NULL, color_cell_by = 'none',
                         cell_color = palette()[1:2], statArgs = NULL,
                         align = NULL, rgroup = NULL, cgroup = NULL,
                         tfoot = NULL, tfoot2 = NULL, htmlArgs = NULL,
                         zeros = '-', clean_daggers = FALSE) {
  data <- as.data.frame(data)
  nv   <- length(varname)
  
  nm <- names(varname)
  if (!is.null(nm))
    names(varname)[!nzchar(nm)] <- varname[!nzchar(nm)]
  varname_label <- varname_label %||% varname
  byvar_label   <- byvar_label %||% byvar

  if (!all(c(varname, byvar) %in% names(data))) {
    stop(
      sprintf('%s not found in data',
              toString(shQuote(setdiff(c(varname, byvar), names(data)))))
    )
  }
  
  surv <- varname[sapply(varname, function(x) inherits(data[, x], 'Surv'))]
  survmedian <- if (length(surv) & !all(surv %in% c(survmedian, survtime)))
    surv else survmedian
  survmedian <- setdiff(survmedian, survtime)

  stopifnot(
    # length(byvar) %in% 0:1,
    byvar %in% names(data),
    nv == length(varname_label),
    is.null(FUN) ||
      !is.null(names(FUN)) ||
      is.null(names(FUN)) & length(FUN) == 1L ||
      nv == length(FUN)
  )

  if (is.character(group)) {
    ## need to shift group indices up for duplicated/combined varnames
    dup <- which(duplicated(varname_label))
    grp <- match(group, varname, nomatch = NA)
    grp <- sapply(grp, function(x) x - sum(dup < x))
    group <- na.omit(setNames(grp, names(group)))
  }

  l <- tabler_stat_list(
    data, varname, byvar, varname_label, byvar_label, digits, FUN,
    format_pval, color_pval, color_missing, dagger, color_cell_by,
    cell_color, confint, survmedian, survtime, time,
    include_na_in_prop, iqr, statArgs, total
  )

  tabler_stat_html(
    l, align, rgroup, cgroup, tfoot, tfoot2, htmlArgs,
    zeros, group, correct, format_pval, clean_daggers, total, n
  )
}

tabler_stat_list <- function(data, varname, byvar, varname_label = varname,
                             byvar_label = byvar, digits = NULL, FUN = NULL,
                             format_pval = TRUE, color_pval = TRUE,
                             color_missing = TRUE, dagger = TRUE,
                             color_cell_by = 'none', cell_color = NULL,
                             confint = NULL, survmedian = NULL,
                             survtime = NULL, time = 0,
                             include_na_in_prop = TRUE,
                             iqr = FALSE, statArgs = NULL, total = TRUE) {
  cg <- ncg <- NULL
  obv <- byvar
  
  if (is.null(byvar)) {
    FUN   <- NA
    byvar <- '_by_var_'
    data[, byvar] <- factor(1L, 1:2)
  }
  
  if (length(byvar) > 1L) {
    sep <- '__xxx__'
    obv <- byvar
    data[, '_by_var_n_'] <- interaction(data[, byvar], drop = TRUE, sep = sep)
    byvar <- '_by_var_n_'
  }
  
  nv <- length(varname)
  byvar <- byvar[1L]

  odata <- data
  data <- data[, c(varname, byvar)]
  data[, byvar] <- as.factor(data[, byvar])
  .data <- data

  dig <- sapply(data[, -ncol(data), drop = FALSE], guess_digits)
  digits <- if (is.null(digits))
    dig
  else {
    if (length(digits) == 1L & is.null(names(digits)))
      rep_len(digits, nv)
    else replace(dig, name_or_index(names(digits), varname), digits)
  }

  fun <- setNames(vector('list', nv), varname)
  FUN <- if (is.null(FUN))
    fun
  else if (is.null(names(FUN)) & length(FUN) == nv)
    FUN
  else {
    if (length(FUN) == 1L & is.null(names(FUN)))
      rep_len(list(FUN), nv)
    else replace(fun, name_or_index(names(FUN), varname), FUN)
  }

  cell_color <- if (is.null(cell_color))
    list(palette()[1:2])
  else if (islist(cell_color))
    cell_color else list(cell_color)
  cell_color <- rep_len(cell_color, nv)

  color_cell_by <- rep_len(color_cell_by, nv)

  data <- rep_len(list(data), nv)
  pval <- any(!is.na(FUN))
  
  cf <- list(statArgs$continuous_fn %||% eval(formals(tabler_stat)$continuous_fn))
  statArgs$continuous_fn <- NULL
  ff <- list(statArgs$factor_fn %||% eval(formals(tabler_stat)$factor_fn))
  statArgs$factor_fn <- NULL
  
  statArgs <- if (!length(statArgs))
    NULL else statArgs

  l <- do.call('Map', c(list(
    f = tabler_stat, data, varname, byvar, digits, FUN,
    list(format_pval), color_pval, color_missing, dagger,
    color_cell_by, cell_color,
    list(confint %||% ''), list(survmedian %||% ''), list(survtime %||% '')),
    time, include_na_in_prop, iqr, total, list(cf), list(ff), statArgs)
  )
  
  tbl <- lapply(l, function(x)
    x[!(duplicated(x, fromLast = TRUE) &
          duplicated(rownames(x), fromLast = TRUE)), , drop = FALSE]
  )
  names(l) <- names(tbl) <- varname_label

  res <- do.call('rbind', tbl)

  dup <- table(names(tbl))
  if (length(dup <- dup[dup > 1L])) {
    for (dd in names(dup)) {
      ii <- names(tbl) %in% dd
      tbl[[which(ii)[1L]]] <- do.call('rbind', tbl[ii])
      tbl <- tbl[-which(ii)[-1L]]
    }
  }

  rgroup   <- names(tbl)
  n.rgroup <- unname(sapply(rgroup, function(x) nrow(tbl[[x]]) %||% 1L))
  cgroup   <- c('', byvar_label, '')
  n.cgroup <- c(1L, nlevels(.data[, byvar]), 1L)
  
  if (identical(total, FALSE)) {
    cgroup <- cgroup[-1L]
    n.cgroup <- n.cgroup[-1L]
  }

  if (!pval) {
    cgroup   <- head(cgroup, -1L)
    n.cgroup <- head(n.cgroup, -1L)
  }
  
  if (length(obv) > 1L) {
    tmp <- tmp1 <- data.frame(table(odata[, obv]))
    tmp[, sprintf('id_%s', obv)] <- lapply(tmp[, -ncol(tmp)], function(x)
      rleid(as.integer(x)))
    tmp <- tmp[tmp$Freq > 0, ]
    ncg <- lapply(seq_along(obv[-1L]), function(ii) {
      x <- tmp[, sprintf('id_%s', obv)[ii]]
      y <- tmp[, sprintf('id_%s', obv)[ii + 1L]]
      as.vector(tapply(x, y, lunique))
    })
    if (length(ncg) > 2L)
      ncg[-(1:2)] <- lapply(ncg[-(1:2)], unique)
    
    ncg <- do.call('rbindx', rev(ncg))
    rownames(ncg) <- rev(obv[-1L])
    
    cg <- lapply(seq.int(nrow(ncg)), function(ii) {
      r <- if (ii < nrow(ncg)) {
        n <- na.omit(ncg[ii, ])
        as.vector(tapply(ncg[nrow(ncg), ], rep(seq_along(n), n), sum))
      } else ncg[ii, ]
      rn <- rownames(ncg)[ii]
      as.character(tmp[, rn][cumsum(r)])
    })
    cg <- do.call('rbindx', cg)
    
    if (!identical(total, FALSE)) {
      cg <- cbind('', cg)
      ncg <- cbind(1L, ncg)
    }
    
    if (pval) {
      cg <- t(apply(cbind(cg, NA), 1L, function(x) {
        x[which(is.na(x))[1L]] <- ''
        x
      }))
      ncg <- t(apply(cbind(ncg, NA), 1L, function(x) {
        x[which(is.na(x))[1L]] <- 1L
        x
      }))
    }
    
    colnames(res) <- gsub(sprintf('%s.*', sep), '', colnames(res))
    l <- lapply(l, function(x) {
      colnames(x) <- gsub(sprintf('%s.*', sep), '', colnames(x))
      x
    })
  }
  
  structure(
    class = 'htmlStat',
    list(
      output_data = res, rgroup = rgroup, n.rgroup = n.rgroup,
      cgroup = cg %||% cgroup, n.cgroup = ncg %||% n.cgroup, pval = pval,
      data = .data, byvar = byvar, l = l
    )
  )
}

tabler_stat_html <- function(l, align = NULL, rgroup = NULL, cgroup = NULL,
                             tfoot = NULL, tfoot2 = NULL, htmlArgs = NULL,
                             zeros = NULL, group = NULL, correct = FALSE,
                             format_pval = TRUE, clean_daggers = FALSE,
                             total = TRUE, n = NULL) {
  stopifnot(inherits(l, 'htmlStat'))

  tr <- function(x) {
    gsub('\\s{2,}', ' ', x)
  }

  if (noby <- (l$byvar == '_by_var_')) {
    l$cgroup <- l$cgroup[1L]
    l$n.cgroup <- 1L
  }

  cn <- c(get_tabler_stat_n(l$data[, l$byvar], n = n), '<em>p-value</em>')
  if (identical(total, FALSE))
    cn <- cn[-1L]
  
  cn <- gsub('__xxx__.*?(?=<br)', '', cn, perl = TRUE)
  
  colnames(l$output_data) <- if (noby)
    cn[1L] else if (l$pval) cn else head(cn, -1L)

  res <- gsub('%', '', l$output_data, fixed = TRUE)
  p <- c(
    '0 (0)' = '^\\s*0\\s*\\(\\s*0\\s*\\)\\s*$',
    '0 (0)' = '(?<=>)\\s*0\\s*\\(\\s*0\\s*\\)\\s*', ## targets % missing row with tags
    '0' = '^\\s*0\\s*$',
    'NA (NA - NA)' = '^\\s*NA\\s*\\(\\s*NA\\s*-\\s*NA\\s*\\)\\s*$'
  )
  if (is.character(zeros))
    res <- gsub(paste(p, collapse = '|'), zeros, res, perl = TRUE)

  ## text/daggers used in footnotes
  tt <- strsplit(sapply(l$l, attr, 'tfoot'), ', (?=<sup>)', perl = TRUE)
  dg <- lapply(l$l, function(x)
    gsub('<sup>([^<]+)</sup>|.', '\\1', x[1L, ncol(x)]))
  tf <- Map(function(x, y)
    if (nchar(x) == 0L)
      NULL else grep(x, y, value = TRUE), dg, tt)
  lf <- unique(unlist(tf))
  tf <- toString(lf)


  if (!is.null(tfoot2))
    tf <- sprintf('%s<br />%s', tf, paste0(tfoot2, collapse = '<br />'))

  ## tspanner
  if (!is.null(group)) {
    group <- group[group <= length(l$n.rgroup)]
    names(group) <- names(group) %||% as.character(group)
    group <- sort(group)
    nt <- insert(l$n.rgroup, group)
    nt <- cum_reset(nt, NA, function(x) sum(x, na.rm = TRUE))
    ts <- names(group)
    ts <- if (1 %ni% group)
      c('', ts)
    else if (0 %in% nt) {
      nt <- nt[nt > 0]
      ts
    } else ts

    htmlArgs <- modifyList(
      htmlArgs %||% list(),
      list(n.tspanner = nt, tspanner = ts)
    )
  }

  ## extract p-values and use correction method
  method <- if (isTRUE(correct))
    'fdr' else if (is.character(correct)) correct else 'none'
  pvn <- sapply(seq_along(l$l), function(ii)
    attr(l$l[[ii]], 'p.value') %||% NA)
  if (!identical(correct, FALSE)) {
    nc  <- ncol(res)
    wh  <- nzchar(res[, nc])
    pvc <- p.adjust(pvn, method, length(sort(pvn))) ## only non na pvalues
    res <- cbind(res, res[, nc])
    res[, nc + 1L][wh] <- color_pval(pvc, format_pval = format_pval)
    colnames(res)[nc + 1L] <-
      gsub('(?=p)', paste(method, ''), colnames(res)[nc], perl = TRUE)
    l$n.cgroup[length(l$n.cgroup)] <- l$n.cgroup[length(l$n.cgroup)] + 1L
  }

  if (!identical(clean_daggers, FALSE)) {
    old <- gsub('</sup>.*', '</sup>', lf)
    dag <- if (isTRUE(clean_daggers) | clean_daggers %in% 'letters')
      letters else seq_along(old)
    new <- sprintf('<sup>%s</sup>', dag[seq_along(old)])

    m <- gregexpr('<sup>.+</sup>', res)
    r <- lapply(regmatches(res, m), function(x)
      as.character(factor(x, old, new)))

    res[] <- `regmatches<-`(res, m, FALSE, r)

    ## update footnote
    tf <- unique(gsub('.*>', '', unlist(tt)))
    tf <- toString(paste0(new, tf))
  }

  args <- list(
    x = res, align = align %||% strrep('c', ncol(res)),
    rgroup = rgroup %||% l$rgroup, n.rgroup = l$n.rgroup,
    cgroup = cgroup %||% l$cgroup, n.cgroup = l$n.cgroup,
    css.cell = 'padding: 0px 5px 0px; white-space: nowrap;',
    tfoot = tr(tfoot %||% sprintf('<font size=1>%s</font>', tf))
  )
  
  if (identical(cgroup, FALSE))
    args$cgroup <- args$n.cgroup <- NULL
  
  args <- modifyList(args, htmlArgs %||% list())
  ht <- do.call(htmlTable::htmlTable, args)

  structure(ht, class = 'htmlTable', p.value = pvn, call = args)
}

combine_tabler_stat2 <- function(l, correct = FALSE, format_pval = TRUE,
                                 how = c('rbind', 'cbind'), headers = names(l),
                                 htmlArgs = list()) {
  lget <- function(l, what, attr = FALSE) {
    lapply(l, function(x) if (attr)
      attr(x, what) else x[[what]])
  }
  tr <- function(x) {
    gsub('\\s{2,}', ' ', x)
  }
  
  p <- unlist(lget(l, 'p.value', TRUE))
  l <- lget(l, 'call', TRUE)
  
  how <- match.arg(how)
  res <- do.call(how, lget(l, 'x'))

  method <- if (isTRUE(correct))
    'fdr' else if (is.character(correct)) correct else 'none'
  if (!identical(correct, FALSE)) {
    nc  <- ncol(res)
    wh  <- nzchar(res[, nc])
    pvc <- p.adjust(p, method, length(sort(p))) ## only non na pvalues
    res <- cbind(res, res[, nc])
    res[, nc + 1L][wh] <- color_pval(pvc, format_pval = format_pval)
    colnames(res)[nc + 1L] <-
      gsub('(?=p)', paste(method, ''), colnames(res)[nc], perl = TRUE)
    l[[1L]]$n.cgroup[length(l[[1L]]$n.cgroup)] <-
      l[[1L]]$n.cgroup[length(l[[1L]]$n.cgroup)] + 1L
  }

  tf <- gsub('</?font[^>]*?>', '', unlist(lget(l, 'tfoot')))
  tf <- unique(unlist(strsplit(tf, ', ')))

  args <- list(
    x = res, align = l[[1L]]$align,
    rgroup = do.call('c', lget(l, 'rgroup')),
    n.rgroup = do.call('c', lget(l, 'n.rgroup')),
    cgroup = l[[1L]]$cgroup, n.cgroup = l[[1L]]$n.cgroup,
    css.cell = 'padding: 0px 5px 0px; white-space: nowrap;',
    tfoot = sprintf('<font size=1>%s</font>', toString(tf))
  )
  
  if (how == 'cbind') {
    args$align <- paste(lget(l, 'align'), collapse = '')
    args$rgroup <- lget(l, 'rgroup')[[1L]]
    args$n.rgroup <- lget(l, 'n.rgroup')[[1L]]
    args$cgroup <- do.call('c', lget(l, 'cgroup'))
    args$n.cgroup <- do.call('c', lget(l, 'n.cgroup'))
    
    if (!is.null(headers)) {
      ncg <- sapply(lget(l, 'x'), ncol)
      lcg <- headers
      length(ncg) <- length(lcg) <- length(args$cgroup)
      args$n.cgroup <- rbind(ncg, args$n.cgroup)
      args$cgroup <- rbind(lcg, args$cgroup)
    }
  }
  
  if (how == 'rbind' && !is.null(headers)) {
    args$tspanner <- headers
    args$n.tspanner <- sapply(lget(l, 'x'), nrow)
  }
  
  structure(
    do.call(htmlTable::htmlTable, modifyList(args, htmlArgs)),
    class = 'htmlTable', p.value = p, call = args
  )
}

guess_digits <- function(x, default = 0L) {
  if (!inherits(x, 'numeric'))
    return(default)

  co <- capture.output(cat(x))
  co <- strsplit(co, '\\s+')[[1L]]

  nch <- max(nchar(sub('.*?(?:\\.|$)', '', co)))
  dig <- if (nch >= 4L)
    1L else nch

  if (dig)
    dig else default
}

get_tabler_stat_n <- function(x, pct = TRUE, use_labels = TRUE,
                              total = 'Total', n = NULL) {
  fmt <- '%s<br /><font weight=normal; size=1>n = %s (%s)</font>'
  if (!pct)
    fmt <- gsub(' (%s)', '', fmt, fixed = TRUE)
  
  x <- as.factor(x)
  l <- if (use_labels)
    levels(x) else rep_len(total, nlevels(x))
  
  t <- table(x)
  if (length(n) == 1L)
    n <- rep_len(n, length(t) + 1L)
  if (!is.null(n))
    stopifnot(length(n) == length(t) + 1L)
  N <- n %||% sum(t)
  n <- c(if (is.null(n)) sum(t) else n[1L], t)
  p <- roundr(n / N * 100, 0)[-1L]
  o <- Vectorize('sprintf')
  o <- if (pct)
    o(c(total, l), roundr(n, 0), c('%', p), fmt = fmt)
  else o(c(total, l), roundr(n, 0), fmt = fmt)

  drop(o)
}

#' Describe multiple functions
#' 
#' Concatenate one or more functions for output in \code{\link{tabler_stat}}.
#' 
#' @param x a vector to be described by each \code{FUN}
#' @param FUN a list of functions that each return a \emph{named} string; if
#'   \code{FUN} is a named list, all non zero length names will take
#'   precedence; if neither are available, names will be guessed based on
#'   the list
#' @param ... ignored
#' 
#' @examples
#' describeFuns(1:10, FUN = list(mean = mean, median = median))
#' describeFuns(1:10, FUN = list(Gmisc::describeMean, Gmisc::describeMedian))
#' describeFuns(1:10, FUN = list(function(...) Gmisc::describeMedian(..., iqr = FALSE)))
#' 
#' 
#' ## typical usage
#' ff <- list(
#'   'Median (full range)' = function(...) Gmisc::describeMedian(..., iqr = FALSE),
#'   Gmisc::describeMean,
#'   Min = min, Max = max
#' )
#'   
#' tabler_stat2(
#'   mtcars, c('mpg', 'wt'), 'gear', FUN = NA,
#'   statArgs = list(continuous_fn = function(x, ...) describeFuns(x, FUN = ff))
#' )
#' 
#' @export

describeFuns <- function(x, FUN, ...) {
  l <- vector('list', length(FUN))
  for (ii in seq_along(l))
    l[[ii]] <- FUN[[ii]](x)
  
  ## guess names of funs if not given
  nm <- rbind(names(FUN), sapply(l, names))
  if (nrow(nm) > 1L)
    nm <- unlist(apply(nm, 2L, function(x)
      if (nzchar(x[1L])) x[1L] else x[2L]))
  
  setNames(
    paste(l, collapse = '<br />'),
    paste(nm, collapse = '<br />&ensp;')
  )
}

#' Response table
#'
#' Convenience function to calculate proportions and confidence intervals and
#' format for easy display.
#'
#' @param x a factor variable of responses; responses should be ordered as
#'   CR, PR, SD, PD, NE or similar (i.e., best to worst)
#' @param r_or_better if integer(s), indicates the levels of \code{x} that
#'   are to be combined with better responses; for example, if
#'   \code{r_or_better = 3} (default), then any occurrence of level 1, 2, or 3
#'   of \code{x} is treated as a response, and the proportion and confidence
#'   interval are calculated for the aggregate
#'
#'   if \code{FALSE}; levels of \code{x} are estimated independently
#' @param conf,frac,show_conf,pct.sign additional arguments passed to
#'   \code{\link{binconr}}
#' @param digits number of digits past the decimal point to keep
#' @param total logical or numeric; if \code{TRUE}, a column with the total,
#'   i.e., \code{length(x)} is added; if numeric, \code{length(x)} and,
#'   optionally, fraction and percent out of \code{total} is added
#' @param two_stage \code{FALSE} (default, assumes exact binomial CIs are
#'   desired) or a vector of length 3 with the 1) maximum number responses in
#'   the first stage that can be observed \emph{without} continuing; 2) the
#'   sample size in the first stage; and 3) the sample size in the second stage
#'
#'   if more than three integers are given, the remaining should indicate the
#'   column(s) which should be calculated as two-stage CIs
#'
#' @family tabler
#'
#' @seealso
#' \code{\link{bincon}}; \code{\link{binconr}}; \code{\link{response}}
#'
#' @examples
#' set.seed(1)
#' r <- c('CR', 'PR', 'SD', 'PD', 'NE')
#' x <- factor(sample(r, 30, replace = TRUE), r)
#'
#' tabler_resp(x)
#' tabler_resp(x, 3)
#' tabler_resp(x, 'PR')
#' tabler_resp(x, 'PR', total = 50)
#'
#' ## note NAs are removed
#' y <- replace(x, 1:10, value = NA)
#' tabler_resp(x, FALSE)
#' tabler_resp(y, FALSE)
#'
#'
#' ## two-stage designs
#' two_stage <- c(r1 = 2, n1 = 10, n2 = 20)
#' tabler_resp(x, two_stage = c(two_stage))
#' 
#' ## two-stage only for SD or better and PR or better
#' tabler_resp(x, two_stage = c(two_stage, 6:7))
#'
#' ## compare
#' bincon(c(2, 20), c(10, 20), method = 'two-stage') ## SD or better
#' bincon(c(2, 16), c(10, 20), method = 'two-stage') ## PR or better
#' 
#' ## one-stage methods should not be used
#' bincon(c(20, 16), 30, method = 'exact')
#'
#'
#' ## typical usage
#' ht <- htmlTable::htmlTable(
#'   rbind(
#'     '95% CI' = tabler_resp(x),
#'     '90% CI' = tabler_resp(x, conf = 0.9),
#'     'Simple' = tabler_resp(x, frac = FALSE, pct.sign = FALSE,
#'                            show_conf = FALSE, digits = 1),
#'     'Two-stage' = tabler_resp(x, two_stage = c(2, 10, 20, 6:7))
#'   ),
#'   caption = 'Table of responses with exact binomial and
#'     two-stage<sup>&dagger;</sup>confidence intervals.',
#'   css.cell = 'padding: 0 10 0px; white-space: nowrap;',
#'   cgroup = c('Evaluation', 'Outcome (95% CI)'),
#'   n.cgroup = c(nlevels(x), 2L)
#' )
#' structure(ht, class = 'htmlTable')
#'
#' @export

tabler_resp <- function(x, r_or_better = levels(x)[3:2], conf = 0.95,
                        digits = 0L, frac = TRUE, show_conf = TRUE,
                        pct.sign = TRUE, total = FALSE, two_stage = FALSE) {
  x  <- as.factor(x)
  rs <- names(table(x))
  lx <- length(x)

  if (is.character(r_or_better))
    r_or_better <- if (length(wh <- match(r_or_better, rs)))
      wh else {
        warning('Failed to guess \'r_or_better\'')
        ## take first 3 levels
        3:1
      }

  res <- c(
    resp1(x, rs, conf, digits, frac, show_conf, pct.sign, FALSE),
    if (is.numeric(r_or_better))
      rev(
        r_or_better1(x, rev(rs), conf, digits, frac, show_conf, pct.sign, FALSE)
      )[r_or_better]
    else NULL
  )

  if (!identical(two_stage, FALSE)) {
    ## define specific CIs to use for two-stage
    two_idx <- tail(two_stage, -3L)
    ## if none are given then do all
    if (!length(two_idx))
      two_idx <- seq_along(res)
    two_stage <- two_stage[1:3]

    if (two_stage[1L] > two_stage[2L] || two_stage[1L] > two_stage[3L])
      stop(
        'For two-stage designs, \'two_stage\' should be a vector of ',
        'length 3 giving:\n\t1) the max number of successes in the first ',
        'stage that can be observed _without_ continuing; ',
        '\n\t2) the number entered in the first stage; and ',
        '\n\t3) additional entered in the second stage'
      )

    res2 <- c(
      resp1(x, rs, conf, digits, frac, show_conf, pct.sign, two_stage),
      if (is.numeric(r_or_better))
        rev(
          r_or_better1(x, rev(rs), conf, digits, frac,
                       show_conf, pct.sign, two_stage)
        )[r_or_better]
      else NULL
    )

    res[two_idx] <- res2[two_idx]
  } else two_idx <- NULL

  tot <- if (is.numeric(total))
    sprintf('%s/%s (%s%%)', lx, total, roundr(lx / total * 100, digits))
  else lx

  tot <- c(Total = if (!pct.sign)
    gsub('%', '', tot, fixed = TRUE) else tot)

  if (!frac)
    tot <- gsub('/\\S+', '', tot)

  res <- c(if (total)
    tot else NULL, res)

  attr(res, 'two_stage') <- if (length(two_idx)) {
    res[two_idx] <- paste0(res[two_idx], '<sup>&dagger;</sup>')
    setNames(two_idx, names(res)[two_idx])
  } else NULL

  res
}

resp1 <- function(x, r, conf, digits, frac, show_conf, pct.sign, two) {
  # rawr:::resp1(x, levels(x),     0.9, 0L, TRUE, TRUE, TRUE, FALSE)
  # rawr:::resp1(x, c('CR', 'PR'), 0.9, 0L, TRUE, TRUE, TRUE, FALSE)
  tbl <- table(x)[rev(r)]

  res <- if (all(is.na(x)))
    rep('-', length(r))
  else sapply(tbl, function(X)
    if (identical(two, FALSE))
      binconr(X, sum(tbl), conf, digits, TRUE, frac,
              show_conf, pct.sign, 'exact', TRUE)
    else
      binconr(c(two[1L], X), two[2:3], conf, digits, TRUE, frac,
              show_conf, pct.sign, 'two-stage', TRUE)
  )

  setNames(res, rev(r))
}

r_or_better1 <- function(x, r, conf, digits, frac, show_conf, pct.sign, two) {
  # rawr:::r_or_better1(x, unique(x), 0.9, 0L, TRUE, TRUE, TRUE, FALSE)
  x[x %ni% r] <- NA
  x <- na.omit(x)

  res <- if (all(is.na(x)))
    rep('-', length(r))
  else
    sapply(seq_along(r), function(X)
      if (identical(two, FALSE))
        binconr(sum(x %in% r[X:length(r)]), length(x), conf,
                digits, TRUE, frac, show_conf, pct.sign, 'exact', TRUE)
      else
        binconr(c(two[1L], sum(x %in% r[X:length(r)])), two[2:3], conf,
                digits, TRUE, frac, show_conf, pct.sign, 'two-stage', TRUE)
    )

  setNames(res, paste(r, 'or better'))
}

#' Match CTCAE codes
#'
#' Convenience function to convert CTCAE (version 3 or 4) toxicity codes or
#' descriptions with their appropriate matches. Especially useful in data from
#' paper trials where only the toxicity codes are reported excluding (the more
#' meaningful) descriptions.
#'
#' @param ... character string(s) of toxicity codes (usually of the form
#'   \code{AB123} but can handle \code{AB-123} or \code{AB 123}) or keyword(s)
#'   to be matched in the toxicity description
#' @param version version number; default is CTCAE v4
#'
#' @return
#' A data frame of matches with toxicity codes, descriptions, and categories
#' corresponding to the CTCAE version used.
#'
#' The version is stored as an attribute, \code{attr(., "version")}.
#'
#' @examples
#' codes <- sample(rawr::ctcae_v4$tox_code, 10)
#' match_ctc(codes)
#'
#' match_ctc('injury', version = 3L)
#' match_ctc('aortic', 'arterial')
#'
#' @export

match_ctc <- function(..., version = 4L) {
  x <- unlist(list(...))

  ctc <- if (version %ni% 3:4)
    stop('\'version\' should be 3 or 4')
  else if (version == 3L)
    rawr::ctcae_v3 else rawr::ctcae_v4

  ## guess if input is code or description
  idx <- if (any(grepl('([A-Za-z -])([0-9])', x)))
    match(gsub('\\s*|-', '', x, perl = TRUE), ctc[, 'tox_code'])
  else grep(paste(x, collapse = '|'), ctc[, 'tox_desc'], ignore.case = TRUE)

  if (anyNA(idx[nzchar(as.character(x)) & !is.na(x)]))
    warning(
      'CTCAE version may be incorrect - ',
      'try version = ', ifelse(version == 4, 3, 4),
      call. = FALSE
    )
  
  ctc <- ctc[idx, ]
  rownames(ctc) <- NULL
  
  structure(ctc, version = sprintf('CTCAE v%s', version))
}

#' Find highest grade toxicities
#'
#' Returns a subset of input data with worst toxicity grade per toxicity
#' code per patient, ie, sorts and removes duplicates.
#'
#' @param data toxicity data frame
#' @param id column name with identifier
#' @param desc column name with toxicity descriptions (or codes)
#' @param grade column name with toxiticity grades; should be a factor
#'   with the desired order to be properly sorted, i.e., least to most severe
#' @param code,version if \code{code} is given, \code{\link{match_ctc}} will
#'   match this column from \code{data} with the CTCAE \code{version} given and
#'   return toxicity descriptions rather than codes; if showing toxicity codes
#'   is desired, use \code{desc} instead
#'
#'   note that \code{version} should be either \code{3} or \code{4} (default)
#'   corresponding to \code{\link{ctcae_v3}} or \code{\link{ctcae_v4}},
#'   respectively
#'
#' @return
#' A filtered data frame with attributes:
#'
#' \item{\code{attr(., "data")}}{the input data frame sorted by \code{id},
#'   \code{desc}, and \code{grade}}
#' \item{\code{attr(., "duplicates"}}{the indices of rows removed from
#'   \code{attr(., "data")} which correspond to duplicate \code{desc} per
#'   \code{id} with equal or lesser \code{grade}s}
#'
#' @seealso
#' \code{\link{match_ctc}}, \code{\link{tabler_by}}; \code{\link{tabler_by2}}
#'
#' @examples
#' set.seed(1)
#' f <- function(x, ...) sample(x, 100, replace = TRUE, ...)
#'
#' tox <- data.frame(id = rep(1:10, 10), phase = 1:2,
#'                   code = f(rawr::ctcae_v4$tox_code[1:10]),
#'                   grade = factor(f(1:3, prob = c(.3, .4, .3))))
#'
#' ## get worst tox by CTCAE code
#' ## this will convert the code to description strings
#' tox1 <- tox_worst(tox, id = 'id', grade = 'grade', code = 'code')
#'
#' ## or by formatted descriptions and grade
#' tox$desc <- factor(match_ctc(tox$code)$tox_desc)
#' tox2 <- tox_worst(tox, id = 'id', grade = 'grade', desc = 'desc')
#'
#' ## both methods are equivalent
#' stopifnot(identical(tox1, tox2))
#'
#'
#' ## these rows have been removed from attr(tox1, 'data')
#' attr(tox1, 'duplicates')
#'
#' stopifnot(
#'   all.equal(tox1, attr(tox1, 'data')[-attr(tox1, 'duplicates'), ],
#'             check.attributes = FALSE)
#' )
#'
#'
#' ## use tabler_by/tabler_by2 to summarize
#' tabler_by(tox1, 'desc', 'grade', n = 10, pct.sign = FALSE)
#' tabler_by2(tox1, 'desc', 'grade', stratvar = 'phase')
#'
#' @export

tox_worst <- function(data, id = 'id', desc = 'desc', grade = 'grade',
                      code = NULL, version = 4L) {
  if (!is.factor(data[, grade]))
    stop('\'grade\' should be a factor with proper order')

  if (!is.null(code)) {
    ctc <- match_ctc(data[, code], version = version)
    data$desc <- factor(ctc$tox_desc)
    desc <- 'desc'
  }

  data <- data[order(data[, id], data[, desc], -xtfrm(data[, grade])), ]
  idx  <- which(duplicated(data[, c(id, desc)]))

  structure(
    if (length(idx))
      data[-idx, ] else data,
    data = data, duplicates = idx
  )
}

#' Count formatter
#'
#' Formats and prints a \emph{named} vector of counts with percents.
#'
#' @param x named vector of counts (a summary or table) or a long vector
#'   of character strings or factors
#' @param n total number of observations; if not given, the length of
#'   \code{x} is used
#' @param lowcase logical; names will be lowercase if \code{TRUE}, upper
#'   case if \code{FALSE}, and unchanged for any other value
#' @param frac logical; if \code{TRUE}, counts are shown as fractions of
#'   \code{n}
#' @param digits number of digits past the decimal point to keep
#' @param which optional integer or character vector to select or re-order
#'   the output; note that this does not change the counts or percentages
#' @param conf,show_conf optional arguments controlling binomial confidence
#'   intervals, passed to \code{\link{binconr}}
#'
#' @examples
#' x <- setNames(3:1, c('Gold', 'Silver', 'Bronze'))
#' countr(x)
#' countr(x, conf = 0.95)
#' countr(x, n = 10, frac = TRUE)
#' countr(x, n = 10, frac = TRUE, which = 2)
#'
#' ## using a character/factor vector, not counts
#' countr(names(x))
#' countr(names(x), which = 1)
#' countr(names(x), which = c(3, 1), frac = TRUE)
#' countr(names(x), which = 'Silver', lowcase = TRUE)
#'
#' @export

countr <- function(x, n, lowcase = NA, frac = FALSE, digits = 0L,
                   which = seq_along(x), conf = NULL, show_conf = TRUE) {
  if (inherits(x, 'table') || (!is.null(names(x)) & is.numeric(x))) {
    ## if x is a table or a _named_ vector (of counts)
    n <- if (missing(n))
      sum(x) else n
    x <- as.table(x)
  } else {
    n <- if (missing(n))
      length(x) else n
    x <- table(x)
  }

  if (is.na(lowcase) || !is.logical(lowcase))
    lowcase <- NULL

  n <- setNames(rep_len(n, length(x)), names(x))[which]
  x <- x[which]
  
  if (!is.null(conf)) {
    conf <- Map(binconr, x, n, conf = conf, show_conf = show_conf, percent = TRUE)
    conf <- gsub('.*\\(|\\)', '', conf)
  }
  
  res <- sprintf(
    '%s (n = %s%s; %s%%%s)',
    if (isTRUE(lowcase))
      tolower(names(x))
    else if (identical(lowcase, FALSE))
      toupper(names(x)) else names(x),
    roundr(x, 0L),
    if (frac)
      paste0('/', n) else '',
    roundr(as.numeric(x) / n * 100, digits),
    if (is.null(conf))
      '' else paste(';', conf)
  )
  
  iprint(res)
}

#' Date parse
#'
#' Parses day, month, and year columns to the standard date format.
#'
#' For two-digit years, the \code{origin} year should be specified; otherwise,
#' the default of 1900 will be used. For \code{NA} year, month, or day,
#' \code{origin} is used for defaults, i.e., \code{origin = c(15, 6, 2000)}
#' will convert missing days to day 15, missing months to June, and missing
#' years to 2000.
#'
#' @param d,m,y day, month, year as single integers or vectors
#' @param origin a vector of length three giving the origins for \code{d},
#'   \code{m}, and \code{y}, respectively; see details
#'
#' @return
#' A vector of \code{\link{Date}}-formatted strings.
#'
#' @examples
#' dmy(25, 7, 13)
#' dmy(25, 7, 2013)
#' dmy(NA, NA, 2000:2009)
#'
#' set.seed(1)
#' dd <- data.frame(
#'   id = 1:10,
#'   day = sample(1:31, 10),
#'   month = sample(1:12, 10),
#'   year = sample(1000:2500, 10)
#' )
#'
#' cbind(dd, dt = with(dd, dmy(day, month, year)))
#'
#'
#' ## NAs will be filled with corresponding values of origin
#' dd[, -1] <- lapply(dd[, -1], function(x) {x[sample(nrow(dd), 5)] <- NA; x})
#' cbind(dd, dt = with(dd, dmy(day, month, year, origin = c(15, 6, 2000))))
#'
#' @export

dmy <- function(d, m, y, origin = c(1, 1, 1900)) {
  f <- function(a, b) {
    suppressWarnings(a <- as.numeric(a))
    ifelse(is.na(a), b, a)
  }

  d[is.na(d)] <- origin[1L]
  m[is.na(m)] <- origin[2L]
  y[is.na(y)] <- origin[3L]

  y <- ifelse(nchar(y) <= 2, f(y, 0) + origin[3L], f(y, 0))

  as.Date(sprintf('%04s-%02s-%02s', y, f(m, origin[2L]), f(d, origin[1L])))
}

#' Combine html tables
#'
#' Wrapper to easily combine a list of data frames or matrices into html
#' tables using the \pkg{htmlTable} package. \code{combine_table2} can join
#' tables vertically or horizontally (common column and row names are not
#' required).
#'
#' @param x a list of matrices or data frames
#' @param tspanner,n.tspanner table spanner labels and number of rows,
#'   respectively, passed to \code{\link[htmlTable]{htmlTable}}; if missing,
#'   \code{names(x)} and \code{sapply(x, nrow)} are used
#' @param cgroup,n.cgroup table column labels and number of columns for each,
#'   respectively, passed to \code{\link[htmlTable]{htmlTable}}; if missing,
#'   \code{names(x)} and \code{sapply(x, ncol)} are used
#' @param how method to join objects, by row (\code{"rbind"}) or column
#'   (\code{"cbind"}) binding
#' @param ... additional arguments passed to \code{\link[htmlTable]{htmlTable}}
#'
#' @examples
#' sp <- lapply(split(mtcars, rep(1:3, c(1, 11, 20))), as.matrix)
#'
#' ## basic table
#' combine_table(sp)
#' combine_table(sp, letters[1:3], c(2, 5, 25))
#'
#' ## adding more options
#' combine_table(
#'   caption = 'Table 1: <code>mtcars</code> data set',
#'   sp, tspanner = sapply(sp, function(x) num2char(nrow(x))),
#'   css.cell = 'padding: 0 10 5px;',
#'   css.tspanner = 'text-align: center; color: red; font-style: italic;'
#' )
#'
#' combine_table2(sp, how = 'c', cgroup = LETTERS[1:3])
#'
#' @export

combine_table <- function(x, tspanner, n.tspanner, ...) {
  x <- if (!islist(x))
    list(x) else x

  n.tspanner <- if (missing(n.tspanner))
    sapply(x, function(xx) nrow(xx) %||% 1L) else n.tspanner
  tspanner <- if (missing(tspanner))
    names(x) %||% rep(' ', each = length(n.tspanner)) else tspanner

  ht <- htmlTable::htmlTable(
    do.call('rbind', x), tspanner = tspanner, n.tspanner = n.tspanner, ...
  )

  structure(ht, class = 'htmlTable')
}

#' @rdname combine_table
#' @export
combine_table2 <- function(x, tspanner, n.tspanner, cgroup, n.cgroup,
                           how = c('rbind', 'cbind'), ...) {
  x <- if (!islist(x))
    list(x) else x
  how <- switch(match.arg(how), rbind = 'rbindx', cbind = 'cbindx')

  if (how %in% c('rbind', 'rbindx')) {
    n.tspanner <- if (missing(n.tspanner))
      sapply(x, function(xx) nrow(xx) %||% 1L) else n.tspanner
    tspanner <- if (missing(tspanner))
      names(x) %||% rep(' ', each = length(n.tspanner)) else tspanner
    if (missing(cgroup))
      cgroup <- NULL
    if (missing(n.cgroup))
      n.cgroup <- NULL
  } else {
    n.cgroup <- if (missing(n.cgroup))
      sapply(x, function(xx) ncol(xx) %||% 1L) else n.cgroup
    cgroup <- if (missing(cgroup))
      names(x) %||% rep(' ', each = length(n.cgroup)) else cgroup
    if (missing(tspanner))
      tspanner <- NULL
    if (missing(n.tspanner))
      n.tspanner <- NULL
  }

  ht <- htmlTable::htmlTable(
    do.call(how, x), ...,
    tspanner = tspanner, n.tspanner = n.tspanner,
    cgroup = cgroup, n.cgroup = n.cgroup
  )

  structure(ht, class = 'htmlTable')
}

#' Inject div
#'
#' Inject an html division tag with style attribute.
#'
#' @param x a matrix or data frame
#' @param where an \code{nx2} matrix of row and column indices or vector (of
#'   the form c(row, col, row, col, ...)) specifying which cells to select; if
#'   \code{where} is missing, \code{style} is recycled for all cells
#' @param style vector of character string(s) applied to each cell, recycled
#'   if necessary
#'
#' @seealso
#' \code{\link[htmlTable]{htmlTable}}
#'
#' @examples
#' ht <- htmlTable::htmlTable(
#'   inject_div(head(cars), c(2, 2), style = 'border: dashed 1px;')
#' )
#' structure(ht, class = 'htmlTable')
#'
#' ## if where is missing, style is recycled over all cells
#' ht <- htmlTable::htmlTable(
#'   inject_div(head(mtcars),
#'              style = c('color: red;', 'color: blue', 'border: dashed 1px;')
#'   )
#' )
#' structure(ht, class = 'htmlTable')
#' 
#' ## where as a matrix
#' ht <- htmlTable::htmlTable(
#'   inject_div(head(cars),
#'              rbind(c(2, 2), c(2, 1), c(5, 2)),
#'              'background-color: yellow;')
#' )
#' structure(ht, class = 'htmlTable')
#' 
#' ## where as a vector
#' ht <- htmlTable::htmlTable(
#'   inject_div(head(cars),
#'              c(2,2,2,1,5,2),
#'              c('background-color: red; color: white;',
#'                'border: solid 1px;',
#'                'font-weight: 900; color: blue;')
#'   )
#' )
#' structure(ht, class = 'htmlTable')
#'
#' @export

inject_div <- function(x, where, style) {
  if (missing(style))
    return(x)
  
  inject <- function(x, where, what) {
    where <- if (missing(where) & !missing(what) || !length(where))
      which(row(x) > 0L, arr.ind = TRUE)
    else matrix(where, ncol = 2L, byrow = !is.matrix(where))
    
    what <- rep_len(what, nrow(where))
    mat  <- matrix('', nrow(x), ncol(x))
    mat[where] <- what
    
    mat
  }
  
  style <- inject(x, where, style)
  where <- style != ''

  x[where] <- sprintf('<div style=\'%s\'>%s</div>',
                      gsub(';*$', ';', style[where]), x[where])
  x
}

#' Letter case
#'
#' Convert a text string to several common cases.
#'
#' This function supports the following letter cases:
#'
#' \tabular{lll}{
#' \tab \code{first} \tab \code{Only the first letter is uppercase} \cr
#' \tab \code{upcase} \tab \code{Each Word Is Uppercase} \cr
#' \tab \code{downcase} \tab \code{tHE oPPOSITE oF uPCASE} \cr
#' \tab \code{camelcase} \tab \code{TheStringWillBeInCamelCase} \cr
#' \tab \code{upper} \tab \code{EQUIVALENT TO }\code{\link{toupper}} \cr
#' \tab \code{lower} \tab \code{equivalent to }\code{\link{tolower}} \cr
#' \tab \code{lowup} \tab \code{aLtErNaTiNg cAsEs} \cr
#' \tab \code{uplow} \tab \code{ThE OpPoSiTe oF LoWuP} \cr
#' }
#'
#' @param x a text string
#' @param case a case to use; can be (unambiguously) abbreviated; see details
#' @param translate logical; if \code{TRUE}, strings will be translated to
#'   upper- or lowercase \emph{before} \code{case} is applied to ensure that
#'   the case of all characters of \code{x} is uniform
#'
#' @examples
#' cases <- eval(formals(case)$case)
#' x <- 'the quick brown fox'
#' sapply(cases, case, x = x)
#'
#' ## all cases are vectorized
#' sapply(cases, case, x = strsplit(x, ' ')[[1]])
#'
#' case('upCASE', 'upcase', translate = FALSE) ## default
#' case('upCASE', 'upcase', translate = TRUE)
#'
#' @export

case <- function(x, case = c('first', 'upcase', 'downcase', 'camelcase',
                             'upper', 'lower', 'lowup', 'uplow'),
                 translate = FALSE) {
  alternating <- function(x, seq) {
    x <- strsplit(x, '')
    x <- lapply(x, function(y) {
      substring(y, seq, seq) <- toupper(substring(y, seq, seq))
      paste(y, collapse = '')
    })
    unlist(x)
  }

  case <- match.arg(case)

  if (translate) {
    x <- if (case %in% c('upper', 'uplow'))
      toupper(x) else tolower(x)
  }

  case <- switch(
    case,
    first     = '(^.)',
    upcase    = '(\\b.)',
    downcase  = '(?<=[a-z])(.)',
    camelcase = 'camelcase',
    upper     = {x <- toupper(x); TRUE},
    lower     = {x <- tolower(x); TRUE},
    lowup     = {x <- alternating(x, 0:1); TRUE},
    uplow     = {x <- alternating(x, 1:0); TRUE}
  )

  if (isTRUE(case))
    x
  else if (case == 'camelcase')
    gsub(' ', '', Recall(x, 'upcase'))
  else gsub(case, '\\U\\1', x, perl = TRUE)
}

#' Write an \code{htmlTable} to file
#'
#' Write an \code{\link[htmlTable]{htmlTable}} object to a file with optional
#' html attributes.
#'
#' @param x a string, e.g., the return of \code{\link[htmlTable]{htmlTable}}
#' @param file a character string naming the file to print to; \code{""},
#'   the default, prints to the console; other values (\code{NULL}, logicals,
#'   etc.) return \code{x}
#' @param attributes logical; if \code{TRUE}, html attributes are added and
#'   default border color names are replaced with hexadecimal values
#'
#' @examples
#' x <- htmlTable::htmlTable(head(cars))
#'
#' write_htmlTable(x)
#' write_htmlTable(x, attributes = FALSE)
#'
#' @export

write_htmlTable <- function(x, file = '', attributes = TRUE) {
  if (attributes) {
    x <- gsub('(?i)gr[ea]y\\s*(?=;)', '#bebebe', x, perl = TRUE)

    x <- paste(
      '<!DOCTYPE html>\n<html>\n<body>',
      x,
      '</body>\n</html>',
      sep = '\n'
    )

    x <- structure(x, class = 'htmlTable', html = TRUE)
  }

  if (!is.character(file))
    x else cat(x, file = file)
}

#' Align html
#'
#' Align text in columns of an \code{\link[htmlTable]{htmlTable}} at a
#' specific location (similar to \code{align} or \code{eqnarray} environments
#' in latex).
#'
#' @param x an object of class \code{\link[htmlTable]{htmlTable}}
#' @param sep a character string used as the center of alignment
#' @param where a character string or regular expression (see examples)
#'   defining where strings should be aligned; the easiest method is to use
#'   \code{"&&"} at the desired alignment point
#' @param min_width minimum width of the span tag; too narrow will not
#'   align strings but too wide adds whitespace
#'
#' @examples
#' tmp <- within(cars, {
#'   align2 <- sprintf('%s&&(%s)', speed, dist)
#'   align1 <- sprintf('%s (%s)', speed, dist)
#'   raw    <- sprintf('%s - (%s)', speed, dist)
#' })
#'
#' ht <- htmlTable::htmlTable(
#'   head(tmp), n.cgroup = 2:3, cgroup = c('raw', 'align'),
#'   caption = 'caption', rnames = FALSE
#' )
#'
#' ## default
#' structure(ht, class = 'htmlTable')
#'
#' ## align at '&&'
#' structure(html_align(ht), class = 'htmlTable')
#' structure(html_align(ht, ' --- '), class = 'htmlTable')
#'
#' ## align at '&&' or ' '
#' ## the regex should capture the left text in group 1, use non-capture
#' ## for separating text, andn capture the right text in group 2
#' structure(
#'   html_align(ht, '&nbsp;', '(\\d+)(?: |&&)([()0-9]+)'),
#'   class = 'htmlTable'
#' )
#'
#' @export

html_align <- function(x, sep = '&nbsp;', where = '&&', min_width = '35px') {
  stopifnot(inherits(x, 'htmlTable'))
  ok <- identical(where, '&&')

  css <- sprintf(
    '.charalign {
      text-align: center;
      /* font-size: 5pt; */
    }
    .charalign span {
      /* font-size: 11pt; */
      min-width: %s;
      display: inline-block;
      text-align: left;
    }
    .charalign span:first-child {
      text-align: right;
    }', min_width)

  co <- capture.output(print(x, useViewer = FALSE))
  at <- grepl(where, co, perl = TRUE)

  pat <- if (ok)
    sprintf('(?<=>)(.*?)%s(.*?)(?=<)', where) else where
  repl <- sprintf(
    '<div class="charalign"><span>\\1</span>%s<span>\\2</span></div>',
    sep
  )
  co[at] <- gsub(pat, repl, co[at], perl = TRUE)


  res <- paste0(co, collapse = '\n')
  res <- paste0(res, '\n<style>', css, '\n</style>', collapse = '\n')
  attributes(res) <- attributes(x)

  res
}

#' Abbreviations
#' 
#' Extract abbreviations from strings.
#' 
#' @param x a vector of strings to abbreviate
#' @param pattern,n the pattern and number of occurrences to use
#' @param include an optional pattern to match additional strings surrounded
#'   by word boundaries; any non character will omit
#' 
#' @examples
#' x <- c('United States of America', 'the Red, the White, and the Blue')
#' abbr(x)
#' abbr(x, include = NA)
#' abbr(x, include = 'and')
#' abbr(x, '[A-Z][A-z]', include = NA)
#' 
#' @export

abbr <- function(x, pattern = '[A-Z]', n = 1L, include = '[a-z]+') {
  p <- if (is.character(include))
    sprintf('(\\b%s{1,%s}|\\b%s\\b)|.', pattern, n, include)
  else sprintf('\\b(%s{1,%s})|.', pattern, n)
  
  gsub(p, '\\1', x, perl = TRUE)
}

#' html2char
#' 
#' Convert html entities to character or vice versa.
#' 
#' @param x a character or factor vector
#' @param to_html logical; if \code{TRUE}, convert to html entities
#' 
#' @aliases html2char
#' 
#' @examples
#' x <- c('<', '>', '≤', '+-', '+/-')
#' c2h(x)
#' c2h(factor(x))
#' h2c(c2h(x)) == x
#' 
#' h2c(c('&Beta;', '&beta;'))
#' 
#' @export

h2c <- function(x, to_html = FALSE) {
  if (!length(x) || all(is.na(x)) || !(is.character(x) | is.factor(x)))
    return(x)
  greek <- c(
    'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta',
    'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho',
    'Sigma', 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega'
  )
  key <- data.frame(
    html = c(
      'amp', 'pm', 'pm', 'pm', 'lt', 'gt', 'le', 'ge',
      'emsp', 'ensp', 'nbsp', 'deg', 'frac14', 'frac12', 'frac34', 'infin',
      c(rbind(greek, tolower(greek))), 'sigmaf'
    ),
    char = c(
      '&', '+/-', '+/-', '+-', '<', '>', '≤', '≥',
      '  ', ' ', ' ', '°', '¼', '½', '¾', '∞',
      'Α', 'α', 'Β', 'β', 'Γ', 'γ', 'Δ', 'δ', 'Ε', 'ε', 'Ζ', 'ζ',
      'Η', 'η', 'Θ', 'θ', 'Ι', 'ι', 'Κ', 'κ', 'Λ', 'λ', 'Μ', 'μ',
      'Ν', 'ν', 'Ξ', 'ξ', 'Ο', 'ο', 'Π', 'π', 'Ρ', 'ρ', 'Σ', 'σ',
      'Τ', 'τ', 'Υ', 'υ', 'Φ', 'φ', 'Χ', 'χ', 'Ψ', 'ψ', 'Ω', 'ω', 'ς'
    )
  )
  key$html <- sprintf('&%s;', key$html)
  
  if (to_html) {
    to <- key$html
    fr <- key$char
  } else {
    to <- key$char
    fr <- key$html
  }
  
  idx <- seq.int(nrow(key))
  if (is.factor(x)) {
    for (ii in idx)
      levels(x) <- gsub(fr[ii], to[ii], levels(x), fixed = TRUE)
  } else {
    for (ii in idx)
      x <- gsub(fr[ii], to[ii], x, fixed = TRUE)
  }
  
  x
}

#' @rdname h2c
#' @export
c2h <- function(x) {
  h2c(x, to_html = TRUE)
}