R/tableRegression.R

Defines functions confint.geeglm tableRegression

Documented in tableRegression

#' Format coefficient tables of regression models
#' 
#' Formats output from the regression model functions: \code{\link[stats]{lm}},
#' \code{\link[stats]{glm}}, \code{\link[MASS]{glm.nb}},
#' \code{\link[survival]{coxph}}, and \code{\link{Weibull}}.
#' 
#' In \code{stats}: \itemize{ \item If \code{t.value} is chosen, the
#' \code{z.value} might be taken, depending on the model.  \item For lm-models:
#' \code{ci.95} calculates a confidence interval for the estimate.  \item For
#' glm- and coxph-models: \code{ci.95} calculates a confidence interval for the
#' exp(estimate). }
#' 
#' @param model Object of class \code{\link[stats]{lm}},
#' \code{\link[stats]{glm}}, \code{negbin} (obtained by
#' \code{\link[MASS]{glm.nb}}), \code{coxph} (obtained by
#' \code{\link[survival]{coxph}}), and list obtained by \code{\link{Weibull}}.
#' @param stats character vector with stats chosen from "estimate",
#' "exp.estimate", "standarderror", "t.value", "ci.95", and "p.value".
#' @param col.names Character vector of same length and order as in \code{stats}.
#' A percentage sign must be escaped by two backslashes.
#' @param row.names Character vector of row names.
#' @param intercept Logical vector of length one indicating whether to provide
#' an intercept or not. If intercept is set TRUE, the first line of the summary
#' output is removed. If the model is a binomial regression, intercept is set
#' FALSE. Intercepts are not available for Weibull or Cox models, because they
#' do not provide any intercept value.
#' @param text Either "english" (default) or "german" indicating the used
#' language names.
#' @param text.ci Either "english", "german" or "none". The language used to
#' denote confidence interval, see \code{\link{formatCI}}.
#' @param eps.pvalue If \code{strict = FALSE}, p-values smaller than 
#' \code{eps.pvalue} will be formatted as "< eps.pvalue". Otherwise this
#' argument is ignored.
#' @param digits Vector of length \code{stats}, digits used for each column.
#' @param strict Either \code{TRUE} or \code{FALSE} (default). If \code{FALSE}, 
#' p-values are formatted with \code{\link[biostatUZH]{formatPval}}. Otherwise
#' \code{\link[biostatUZH]{formatPvalStrict}} is used to format p-values. 
#' This argument is thus only relevant when the argument \code{stats} is 
#' either \code{NULL} or contains \code{"p.value"}.
#' @param big.mark Character vector as in \code{\link[base]{format}}.
#' @param xtable If TRUE, a Latex table is returned, otherwise a data.frame is
#' returned.
#' @param align See \code{\link[xtable]{xtable}}.
#' @param caption See \code{\link[xtable]{xtable}}.
#' @param label See \code{\link[xtable]{xtable}}.
#' @param vars Specify the variables for which regression summaries should be
#' printed.  The argument \code{vars} takes a string vector with the names of
#' the coefficients in the model.
#' @param ... Arguments passed to \code{\link{print.xtable}}.
#' @return Depending on the value of the \code{xtable} argument, the function
#' either prints and returns LaTeX code representing the produced table of
#' coefficients, or it returns the corresponding data frame.
#' @author Sina Rueeger with contributions by Sebastian Meyer.
#' @seealso \code{\link{tableOR}}, \code{\link[xtable]{xtable}},
#' \code{\link[stats]{lm}}, \code{\link[stats]{glm}},
#' \code{\link[MASS]{glm.nb}} \code{\link[survival]{coxph}},
#' \code{\link{Weibull}}.
#' @examples
#' 
#' ## Linear model
#' ## ---------------
#' mod.lm <- lm(Sepal.Length ~ Sepal.Width, data = iris)
#' mod.lm1 <- lm(Sepal.Length ~ .^2, data = iris) 
#' 
#' tableRegression(model = mod.lm)
#' 
#' ## strict argument is used to force p-values to have a certain number
#' ## of digits (here 4).
#' tableRegression(model = mod.lm, digits = c(2, 2, 4), strict = TRUE)
#' tableRegression(model = mod.lm, digits = c(2, 2, 4), strict = FALSE)
#' 
#' ## choosing columns, columns and row naming in german
#' tableRegression(model = mod.lm1, stats = c("estimate", "t.value", "p.value"),
#'                 text = "german")
#' 
#' ## adapt row names, plus special format for ci
#' tableRegression(model = mod.lm, row.names = c("Intercept", "Width Sepal"),
#'                 text.ci = "none")
#' 
#' ## Poisson model
#' ## (example from ?glm)
#' ## --------------
#' counts <- c(18,17,15,20,10,20,25,13,12)
#' outcome <- gl(3,1,9)
#' treatment <- gl(3,3)
#' d.AD <- data.frame(treatment, outcome, counts)
#' mod.glm.pois <- glm(counts ~ outcome + treatment, family=poisson())
#' tableRegression(model = mod.glm.pois)
#' 
#' 
#' ## Negative binomial model
#' ## --------------
#' mod.glm.nb <- glm.nb(Days ~ Sex + Age, data = quine)
#' tableRegression(model = mod.glm.nb,
#'     caption = paste("NegBin model. Estimated dispersion:",
#'         sprintf("%4.2f ($se=%4.2f$).", mod.glm.nb$theta, mod.glm.nb$SE.theta)),
#'     label = "tab:glm.nb")
#' 
#' 
#' 
#' ## Logistic model
#' ## -------------
#' dat <- survival::rats
#' dat$rx <- factor(dat$rx, labels = c(" (A)", " (B)"))
#' mod.glm.bin <- glm(status ~ litter + rx, family = binomial, data = dat)
#' 
#' tableRegression(model = mod.glm.bin,
#'                 stats = c("estimate", "exp.estimate", "ci.95", "t.value", "p.value"),
#'                 text = "english", digits = rep(3, 5),
#'                 caption = "Here goes the caption.", label = "mod:logit")
#' 
#' ## including intercept
#' tableRegression(model = mod.glm.bin,
#'                 stats = c("estimate", "exp.estimate", "ci.95", "t.value", "p.value"),
#'                 text = "english", digits = rep(3, 5),
#'                 caption = "Here goes the caption.", label = "mod:logit",
#'                 intercept = TRUE)
#' 
#' 
#' ## Cox model
#' ## (example from ?survival::coxph)
#' ## -------------
#' dat <- list(time = c(4, 3, 1, 1, 2, 2, 3), 
#'             status = c(1, 1, 1, 0, 1, 1, 0), 
#'             x = c(0, 2, 1, 1, 1, 0, 0), 
#'             sex = c(0, 0, 0, 0, 1, 1, 1)) 
#'
#' mod.cox <- coxph(Surv(time, status) ~ x, data = dat)
#' mod.cox1 <- coxph(Surv(time, status) ~ x + factor(sex), data = dat)
#' mod.cox2 <- coxph(Surv(time, status) ~ x + strata(sex), data = dat)
#' 
#' tableRegression(model = mod.cox)
#' tableRegression(model = mod.cox1)
#' tableRegression(model = mod.cox2)
#' 
#' 
#' ## Weibull
#' ## (example from WeibullReg)
#' ## -------------
#' data("larynx")
#' mod.wb <- weibullReg(Surv(time, death) ~ factor(stage) + age,
#'                      data = larynx)
#' tableRegression(model = mod.wb)
#'
#' @import MASS
#' @importFrom xtable xtable print.xtable
#' @export
tableRegression <- function(model,
                            stats = NULL,
                            col.names = NULL,
                            row.names = NULL,
                            intercept = NULL,
                            text = c("english", "german"), 
                            text.ci = text,
                            eps.pvalue = 0.0001,
                            digits = NULL,
                            strict = FALSE,
                            big.mark = "'",
                            xtable = TRUE,
                            align = NULL,
                            caption = NULL,
                            label = NULL,
                            vars = NULL,
                            ...) {
    
    stopifnot(inherits(model, c("lm", "glm", "negbin", "coxph", "list"))) 
    # Weibull returns a list
    stopifnot(is.null(stats) ||
              (is.character(stats) &&  all(stats %in%
               c("estimate","exp.estimate", "standarderror", "t.value", "ci.95", "p.value" ))))
    stopifnot(is.null(col.names) || is.character(col.names),
              is.null(row.names) || is.character(row.names),
              is.null(intercept) ||
              (is.logical(intercept) && length(intercept) == 1 && is.finite(intercept)),
              !is.null(text))
    text <- match.arg(arg = text)
    stopifnot(is.character(text.ci),
              is.numeric(eps.pvalue), length(eps.pvalue) == 1, is.finite(eps.pvalue),
              0 < eps.pvalue,
              is.null(digits) ||
              (is.numeric(digits) && all(is.finite(digits)) && all(is.wholenumber(digits))),
              is.logical(strict), length(strict) == 1L, !is.na(strict))
    if(!is.null(stats) && !is.null(digits)){
      if(length(stats) != length(digits)) stop("'digits' must have same length as 'stats'.")
    }
    if(is.null(stats) && !is.null(digits)){
      if(!(length(digits) %in% c(1L, 3L))) 
        stop("If 'stats' is NULL, 'digits' must be length 1 or length 3.")
      if(length(digits) == 1L) digits <- rep(digits, 3L)
    }
    ## big.mark is argument to format and not tested
    stopifnot(is.logical(xtable), length(xtable) == 1, is.finite(xtable))
    ## align, caption, label are arguments to xtable and not tested here
    stopifnot(is.null(vars) || is.character(vars))
    
    raw.col.names.german <- c("Koeffizient", "Exp(Koeffizient)", "Standardfehler", "$t$-Wert", "95\\%-Konfidenzintervall", "$p$-Wert")
    raw.col.names.english <- c("Coefficient", "Exp(Coefficient)", "Standarderror", "$t$-value", "95\\%-confidence interval", "$p$-value")
    raw.stats <- c("estimate", "exp.estimate", "standarderror", "t.value", "ci.95", "p.value")
    
    clm <- class(model)[1]
    if (clm %in% c("glm", "geeglm")) {
        cl <- model$family$family
    } else {
        cl <- clm
    }
    ## lm >> linear model
    ## binomial >> generalized linear model
    ## poisson >> generalized linear model
    ## list >> weibull
    ## coxph >> survival
    ## negbin >> negative binomial model (fit by MASS::glm.nb)
    
    ## LM
    ## -------------
    if (clm == "lm")
    {
        ## intercept
        if(is.null(intercept)) intercept <- TRUE
        
        ## stats
        k <- c(1, 5, 6)
        if(is.null(stats)) stats <- raw.stats[k]
        
        ## col.names >> dependent on stats & text
        ind <- sapply(stats, function(x) which(x == raw.stats)) #which(raw.stats %in% stats)
        if(is.null(col.names))
        {
            if(text == "german") col.names <- raw.col.names.german[ind]
            if(text == "english") col.names <- raw.col.names.english[ind]
        }
        
        ## row.names >> dependent on intercept & text
        if(is.null(row.names))
        {
            row.names <- names(model$coef)[-1]
            if(intercept)
            {
                if(text == "german") intercept.nam <- "Achsenabschnitt"
                if(text == "english") intercept.nam <- "Intercept"
                row.names <- c(intercept.nam, row.names)
            }
            
        }
        
        ## digits
        if(is.null(digits)) digits <- rep(2, length(stats))
        
    } else {
        ## rest
        ## -------------
        
        ## intercept
        if(is.null(intercept))
        {
            ## intercept is omitted when having weibull
            ## or logit or Poisson regression or coxph
            intercept <- ! ((clm %in% c("list", "coxph")) | (cl %in% c("binomial", "quasibinomial", "poisson", "quasipoisson")))
        }
        
        ## stats
        k <- c(2, 5, 6)
        if(is.null(stats)) stats <- raw.stats[k]
        
        ## col.names >> dependent on stats & text
        ind <- sapply(stats, function(x) which(x == raw.stats))#which(raw.stats %in% stats)
        if(is.null(col.names))
        {
            exp.nam <- switch(
                cl,
                "poisson" =, "quasipoisson" =, "negbin" = "Rate Ratio",
                "binomial" =, "quasibinomial" = "Odds Ratio",
                "coxph" = "Hazard Ratio",
                "Exp(Coefficient)"
            )
            
            ind.exp <- raw.stats == "exp.estimate"
            raw.col.names.german[ind.exp] <- exp.nam
            raw.col.names.english[ind.exp] <- exp.nam
            
            if(text == "german") col.names <- raw.col.names.german[ind]
            if(text == "english") col.names <- raw.col.names.english[ind]
        }
        
        ## row.names >> dependent on intercept & text
        if(is.null(row.names))
        {
            if(clm == "list")
            {
                row.names <- rownames(model$coef)[-c(1,2)]
            }else{
                if(clm == "coxph")
                {
                    row.names <- names(model$coefficients)
                }else{
                    row.names <- names(model$coef)[-1]
                }
            }
            
            if(intercept)
            {
                if(text == "german") intercept.nam <- "Achsenabschnitt"
                if(text == "english") intercept.nam <- "Intercept"
                row.names <- c(intercept.nam, row.names)
            }
        }
        
        ## digits
        if(is.null(digits)) digits <- rep(2, length(stats))
        
    }
    
    
    ## warning intercept
    if(intercept & clm %in% c("list", "coxph"))
    {
        warning("Weibull and Cox models do not include an intercept. Set intercept = FALSE")
    }
    
    ## digitis ci
    if("ci.95" %in% stats)
    {
        digits.ci <- digits[stats %in% "ci.95"]
    } else {
        digits.ci <- 2
    }
    
    ## text.ci
    ## if(is.null(text.ci)) text.ci <- text
    
    #col.names <- sub("%", "\\\\%", col.names)
    
    # linear model -----------------------------------------------------------
    ## linear model
    
    if (clm == "lm")
    {
        estimate <- summary(model)$coef[,1]
        exp.estimate <- exp(estimate)
        standarderror <- summary(model)$coef[,2]
        t.value <- summary(model)$coef[,3]
        p.value <- summary(model)$coef[,4]
        ci.95 <- formatCI(confint(model), digits = digits.ci, text = text.ci)
    }
    
    
    
    
    # negbin ------------------------------------------------------------------
    ## negbin
    
    
    if (clm %in% c("negbin")) {
        estimate <- summary(model)$coef[, 1]
        exp.estimate <- exp(estimate)
        standarderror <- summary(model)$coef[, 2]
        t.value <- summary(model)$coef[, 3]
        p.value <- summary(model)$coef[, 4]
        
        # change colnames
        col.names[stats == "exp.estimate"] <- "Rate Ratio"
        
        ## confint for exp.estimate (actually depends on MASS:::confint.glm)
        ci.95 <- formatCI(exp(suppressMessages(confint(model))), 
                          digits = digits.ci, text = text.ci)
    }
    
    
    # glm / gee ---------------------------------------------------------------
    if (clm %in% c("glm", "geeglm")) { # both using the same family
        
        back.trafo <- switch(paste0(c(model$family$family, model$family$link), collapse = "."),
                             "binomial.logit" = list("fct" = exp, "name" = "Odds Ratio"),
                             "quasibinomial.logit" = list("fct" = exp, "name" = "Odds Ratio"),
                             
                             "gaussian.identity" = list("fct" = identity, "name" = "Estimate"),
                             "quasi.identity" = list("fct" = identity, "name" = "Estimate"),
                             
                             "poisson.log" = list("fct" = exp, "name" = "Rate Ratio"),
                             "quasipoisson.log" = list("fct" = exp, "name" = "Rate Ratio"))
        
        # change colnames
        col.names[stats == "exp.estimate"] <- c(back.trafo$name)
        
        # calculating
        estimate <- summary(model)$coef[, 1]
        exp.estimate <- back.trafo$fct(estimate) # bad name for variable
        standarderror <- summary(model)$coef[, 2]
        t.value <- summary(model)$coef[, 3]
        p.value <- summary(model)$coef[, 4]
        
        
        # CI calculation
        ci.95 <- if (clm == "glm") {
                     formatCI(back.trafo$fct(suppressMessages(confint(model))), digits = digits.ci, 
                              text = text.ci)
                 } else {
                     formatCI(back.trafo$fct(confint.geeglm(model)),
                              digits = digits.ci, text = text.ci)
                 }
    }
        
    ## coxmod ------------------------------------------------------------------
    if (clm == "coxph") {
        estimate <- summary(model)$coefficients[,1]
        exp.estimate <- exp(estimate)
        standarderror <- summary(model)$coefficients[,3]
        t.value <- summary(model)$coefficients[,4]
        p.value <- summary(model)$coefficients[,5]
        ci.95 <- formatCI(cbind(summary(model)$conf.int[,3], summary(model)$conf.int[,4]),
                          digits = digits.ci, text = text.ci) 
    }
    
    
    # weibull -----------------------------------------------------------------
    if (clm == "list") {
        estimate <- model$coef[-c(1:2),1]
        exp.estimate <- exp(estimate)
        standarderror <- model$coef[-c(1:2), 2]
        t.value <- NA
        p.value <- 2 * pnorm(- abs(estimate / standarderror))
        ci1 <- estimate - qnorm(0.975) * standarderror
        ci2 <- estimate + qnorm(0.975) * standarderror
        ci.95 <- formatCI(exp(cbind(ci1, ci2)), digits = digits.ci, text = text.ci) 
        col.names[stats == "exp.estimate"] <- c("Hazard Ratio")
    }
    
    
    # bring everything together -----------------------------------------------
    output <- data.frame(estimate, exp.estimate, standarderror, t.value, ci.95, p.value,
                         stringsAsFactors = FALSE)
    
    if(!intercept && !(clm %in% c("list", "coxph"))) {
        ## in weibull and coxph there is anyway no intercept plotted #
        output <- output[-1,]
    }
    
    ## extract return outputs ------------------------------------------
    if (nrow(output) > 1) {
        output.return <- output[, ind]
        colnames(output.return) <- col.names
        rownames(output.return) <- row.names
    } else {
        output.return <- data.frame(output[,ind])
        names(output.return) <- col.names
        rownames(output.return) <- row.names
    }
    
    
    # formatieren des outputs -------------------------------------------------
    for (i in 1:ncol(output.return)) {
      if(stats[i] == "p.value") {
        output.return[,i] <- if(strict){
          formatPvalStrict(output.return[, i], digits = digits[i])
        } else {
          formatPval(as.numeric(as.character(output.return[,i])),
                     break.eps = eps.pvalue)
        }
      } else {
        if(stats[i] != "ci.95") {
          output.return[,i] <-  sapply(output.return[,i], function(x)
            format(as.numeric(as.character(x)), big.mark = big.mark,
                   digits = digits[i], nsmall = digits[i], scientific = FALSE))
          if(stats[i] == "exp.estimate" && nrow(output.return) > 1) {
            output.return[-1,i] <- sapply(output.return[-1,i], function(x)
              format(as.numeric(as.character(x)), digits = digits[i],
                     big.mark = big.mark, nsmall = digits[i], scientific = FALSE))
          }
        }
      }
    }
    
     
    # table -------------------------------------------------------------------
    if (xtable) {
        if (is.null(align))
            align <- paste(rep("r", length(stats) + 1), collapse = "")
        xtab <- xtable::xtable(output.return, caption = caption, label = label, align = align)
        ## options for print.xtable (can be overwritten by ... arguments)
        oopt <- options(
            xtable.include.rownames = TRUE,
            xtable.floating = TRUE,
            xtable.type = "latex",
            xtable.size = "footnotesize",
            xtable.table.placement = "!h",
            xtable.sanitize.colnames.function = identity  # do not escape "$"
        )
        on.exit(options(oopt))
        xtable::print.xtable(xtab, ...)
        return(invisible(xtab))
    } else {
        return(output.return)
    }
}

# # From package broom version 0.5.3 (not exported from broom Namespace)
# in order to avoid external dependencies
# 
# broom:::confint.geeglm

confint.geeglm <- function(object, parm, level = 0.95, ...) {
  cc <- stats::coef(summary(object))
  mult <- stats::qnorm((1+level)/2)
  citab <- with(as.data.frame(cc),
                cbind(lwr=Estimate-mult*Std.err,
                      upr=Estimate+mult*Std.err))
  rownames(citab) <- rownames(cc)
  citab[parm,]
}
felix-hof/biostatUZH documentation built on Sept. 27, 2024, 1:48 p.m.