Nothing
#' Fit Regression Model with Publication-Ready Output
#'
#' Provides a unified interface for fitting various types of regression models
#' with automatic formatting of results for publication. Supports generalized
#' linear models, linear models, survival models, and mixed-effects models with
#' consistent syntax and output formatting. Handles both univariable and
#' multivariable models automatically.
#'
#' @param data Data frame or data.table containing the analysis dataset.
#' Required for formula-based workflow; optional for model-based workflow
#' (extracted from model if not provided).
#'
#' @param outcome Character string specifying the outcome variable name. For
#' survival analysis, use \code{Surv()} syntax from the \pkg{survival} package
#' (\emph{e.g.,} \code{"Surv(time, status)"} or \code{"Surv(os_months, os_status)"}).
#' Required for formula-based workflow; ignored if \code{model} is provided.
#'
#' @param predictors Character vector of predictor variable names to include in
#' the model. All predictors are included simultaneously (multivariable model).
#' For univariable models, provide a single predictor. Can include continuous,
#' categorical (factor), or binary variables. Required for formula-based
#' workflow; ignored if \code{model} is provided.
#'
#' @param model Optional pre-fitted model object to format. When provided,
#' \code{outcome} and \code{predictors} are ignored and the model is formatted
#' directly. Supported model classes include:
#' \itemize{
#' \item \code{glm} - Generalized linear models
#' \item \code{negbin} - Negative binomial models from \code{MASS::glm.nb()}
#' \item \code{lm} - Linear models
#' \item \code{coxph} - Cox proportional hazards models
#' \item \code{lmerMod}, \code{glmerMod} - Mixed-effects models from \pkg{lme4}
#' \item \code{coxme} - Mixed-effects Cox models
#' }
#'
#' @param model_type Character string specifying the type of regression model.
#' Ignored if \code{model} is provided. Options include:
#' \itemize{
#' \item \code{"glm"} - Generalized linear model (default). Supports multiple
#' distributions via the \code{family} parameter including logistic, Poisson,
#' Gamma, Gaussian, and quasi-likelihood models.
#' \item \code{"lm"} - Linear regression for continuous outcomes with normally
#' distributed errors. Equivalent to \code{glm} with \code{family = "gaussian"}.
#' \item \code{"coxph"} - Cox proportional hazards model for time-to-event
#' survival analysis. Requires \code{Surv()} outcome syntax.
#' \item \code{"clogit"} - Conditional logistic regression for matched
#' case-control studies or stratified analyses.
#' \item \code{"negbin"} - Negative binomial regression for overdispersed count
#' data (requires \pkg{MASS} package). Estimates an additional dispersion parameter
#' compared to Poisson regression.
#' \item \code{"lmer"} - Linear mixed-effects model for hierarchical or clustered
#' data with continuous outcomes (requires \pkg{lme4} package).
#' \item \code{"glmer"} - Generalized linear mixed-effects model for hierarchical
#' or clustered data with non-normal outcomes (requires \pkg{lme4} package).
#' \item \code{"coxme"} - Cox mixed-effects model for clustered survival data
#' (requires coxme package).
#' }
#'
#' @param family For GLM and GLMER models, specifies the error distribution and link
#' function. Can be a character string, a family function, or a family object.
#' Ignored for non-GLM/GLMER models.
#'
#' \strong{Binary/Binomial outcomes:}
#' \itemize{
#' \item \code{"binomial"} or \code{binomial()} - Logistic regression for binary
#' outcomes (0/1, TRUE/FALSE). Returns odds ratios (OR). Default.
#' \item \code{"quasibinomial"} or \code{quasibinomial()} - Logistic regression
#' with overdispersion. Use when residual deviance >> residual df.
#' \item \code{binomial(link = "probit")} - Probit regression (normal CDF link).
#' \item \code{binomial(link = "cloglog")} - Complementary log-log link for
#' asymmetric binary outcomes.
#' }
#'
#' \strong{Count outcomes:}
#' \itemize{
#' \item \code{"poisson"} or \code{poisson()} - Poisson regression for count
#' data. Returns rate ratios (RR). Assumes mean = variance.
#' \item \code{"quasipoisson"} or \code{quasipoisson()} - Poisson regression
#' with overdispersion. Use when variance > mean.
#' }
#'
#' \strong{Continuous outcomes:}
#' \itemize{
#' \item \code{"gaussian"} or \code{gaussian()} - Normal/Gaussian distribution
#' for continuous outcomes. Equivalent to linear regression.
#' \item \code{gaussian(link = "log")} - Log-linear model for positive continuous
#' outcomes. Returns multiplicative effects.
#' \item \code{gaussian(link = "inverse")} - Inverse link for specific applications.
#' }
#'
#' \strong{Positive continuous outcomes:}
#' \itemize{
#' \item \code{"Gamma"} or \code{Gamma()} - Gamma distribution for positive,
#' right-skewed continuous data (\emph{e.g.,} costs, lengths of stay). When
#' passed as a string, resolves to log link for interpretable multiplicative
#' effects. The canonical inverse link can be specified explicitly with
#' \code{Gamma(link = "inverse")}.
#' \item \code{Gamma(link = "log")} - Gamma with log link (equivalent to
#' \code{"Gamma"} string shorthand).
#' \item \code{Gamma(link = "inverse")} - Gamma with inverse (canonical) link.
#' \item \code{Gamma(link = "identity")} - Gamma with identity link for additive
#' effects on positive outcomes.
#' \item \code{"inverse.gaussian"} or \code{inverse.gaussian()} - Inverse Gaussian
#' for positive, highly right-skewed data.
#' }
#'
#' For negative binomial regression (overdispersed counts), use
#' \code{model_type = "negbin"} instead of the \code{family} parameter.
#'
#' See \code{\link[stats]{family}} for additional details and options.
#'
#' @param random Character string specifying the random-effects formula for
#' mixed-effects models (\code{glmer}, \code{lmer}, \code{coxme}). Use standard
#' \pkg{lme4}/\pkg{coxme} syntax, \emph{e.g.,} \code{"(1|site)"} for random
#' intercepts by site, \code{"(1|site/patient)"} for nested random effects, or
#' \code{"(1|site) + (1|doctor)"} for crossed random effects. Required when
#' \code{model_type} is a mixed-effects model type. Alternatively, random
#' effects can be included directly in the \code{predictors} vector using the
#' same syntax. Default is \code{NULL}.
#'
#' @param interactions Character vector of interaction terms using colon
#' notation (\emph{e.g.,} \code{c("age:sex", "treatment:stage")}). Interaction terms
#' are added to the model in addition to main effects. Default is \code{NULL}
#' (no interactions).
#'
#' @param strata For Cox or conditional logistic models, character string naming
#' the stratification variable. Creates separate baseline hazards for each
#' stratum level without estimating stratum effects. Default is \code{NULL}.
#'
#' @param cluster For Cox models, character string naming the variable for
#' robust clustered standard errors. Accounts for within-cluster correlation
#' (\emph{e.g.,} patients within hospitals). Default is \code{NULL}.
#'
#' @param weights Character string naming the weights variable in \code{data}.
#' The specified column should contain numeric values for observation weights.
#' Used for weighted regression, survey data, or inverse probability weighting.
#' Default is \code{NULL}.
#'
#' @param conf_level Numeric confidence level for confidence intervals. Must be
#' between 0 and 1. Default is 0.95 (95\% confidence intervals).
#'
#' @param reference_rows Logical. If \code{TRUE}, adds rows for reference
#' categories of factor variables with baseline values (OR/HR/RR = 1,
#' coefficient = 0). Default is \code{TRUE}.
#'
#' @param show_n Logical. If \code{TRUE}, includes the sample size column in
#' the output. Default is \code{TRUE}.
#'
#' @param show_events Logical. If \code{TRUE}, includes the events column in
#' the output (for survival and logistic regression). Default is \code{TRUE}.
#'
#' @param digits Integer specifying the number of decimal places for effect
#' estimates (OR, HR, RR, coefficients). Default is 2.
#'
#' @param p_digits Integer specifying the number of decimal places for
#' \emph{p}-values. Values smaller than \code{10^(-p_digits)} are displayed
#' as \code{"< 0.001"} (for \code{p_digits = 3}), \code{"< 0.0001"} (for
#' \code{p_digits = 4}), etc. Default is 3.
#'
#' @param labels Named character vector or list providing custom display
#' labels for variables. Names should match variable names, values are display
#' labels. Default is \code{NULL}.
#'
#' @param keep_qc_stats Logical. If \code{TRUE}, includes model quality statistics
#' (AIC, BIC, R-squared, concordance, \emph{etc.}) in the raw data attribute for
#' model diagnostics and comparison. Default is \code{TRUE}.
#'
#' @param exponentiate Logical. Whether to exponentiate coefficients. Default
#' is \code{NULL}, which automatically exponentiates for logistic, Poisson,
#' and Cox models, and displays raw coefficients for linear models. Set to
#' \code{TRUE} to force exponentiation or \code{FALSE} to force coefficients.
#'
#' @param conf_method Character string controlling the confidence interval method.
#' If \code{NULL} (default), uses \code{getOption("summata.conf_method", "profile")}.
#' \itemize{
#' \item \code{"profile"} - Profile likelihood intervals for GLM and negative
#' binomial models (via \code{MASS::confint.glm()}), exact \emph{t}-distribution
#' intervals for linear models. Falls back to Wald on profiling failure.
#' Quasi-likelihood families always use Wald (no true likelihood).
#' \item \code{"wald"} - Wald intervals (coefficient \eqn{\pm} \emph{z}
#' \eqn{\times} SE) for all model types. Faster but less accurate near
#' boundary conditions or with small subgroups.
#' }
#' Cox and mixed-effects models use Wald intervals regardless of this setting.
#'
#' @param number_format Character string or two-element character vector
#' controlling thousand and decimal separators in formatted output. Named
#' presets:
#' \itemize{
#' \item \code{"us"} - Comma thousands, period decimal: \code{1,234.56} [default]
#' \item \code{"eu"} - Period thousands, comma decimal: \code{1.234,56}
#' \item \code{"space"} - Thin-space thousands, period decimal: \code{1 234.56}
#' (SI/ISO 31-0)
#' \item \code{"none"} - No thousands separator: \code{1234.56}
#' }
#' Or provide a custom two-element vector \code{c(big.mark, decimal.mark)},
#' \emph{e.g.}, \code{c("'", ".")} for Swiss-style: \verb{1'234.56}.
#'
#' When \code{NULL} (default), uses
#' \code{getOption("summata.number_format", "us")}. Set the global option
#' once per session to avoid passing this argument repeatedly:
#' \preformatted{
#' options(summata.number_format = "eu")
#' }
#'
#' @param verbose Logical. If \code{TRUE}, displays model fitting warnings
#' (\emph{e.g.,} singular fit, convergence issues). If \code{FALSE} (default),
#' routine fitting messages are suppressed while unexpected warnings are
#' preserved. When \code{NULL}, uses
#' \code{getOption("summata.verbose", FALSE)}.
#'
#' @param ... Additional arguments passed to the underlying model fitting
#' function (\code{\link[stats]{glm}}, \code{\link[stats]{lm}},
#' \code{\link[survival]{coxph}}, \code{\link[lme4]{glmer}}, \emph{etc.}). Common
#' options include \code{subset}, \code{na.action}, and model-specific control
#' parameters.
#'
#' @return A data.table with S3 class \code{"fit_result"} containing formatted
#' regression results. The table structure includes:
#' \describe{
#' \item{Variable}{Character. Predictor name or custom label}
#' \item{Group}{Character. For factor variables: category level. For
#' interactions: interaction term. For continuous: typically empty}
#' \item{n}{Integer. Total sample size (if \code{show_n = TRUE})}
#' \item{n_group}{Integer. Sample size for this factor level}
#' \item{events}{Integer. Total number of events (if \code{show_events = TRUE})}
#' \item{events_group}{Integer. Events for this factor level}
#' \item{OR/HR/RR/Coefficient or aOR/aHR/aRR/Adj. Coefficient (95\% CI)}{Character.
#' Formatted effect estimate with confidence interval. Column name depends on
#' model type and scope. Univariable models use: OR, HR, RR, Coefficient.
#' Multivariable models use adjusted notation: aOR, aHR, aRR, Adj. Coefficient}
#' \item{\emph{p}-value}{Character. Formatted \emph{p}-value from Wald test}
#' }
#'
#' The returned object includes the following attributes accessible via \code{attr()}:
#' \describe{
#' \item{model}{The fitted model object (glm, lm, coxph, \emph{etc.}). Access for
#' diagnostics, predictions, or further analysis}
#' \item{raw_data}{data.table. Unformatted numeric results with columns for
#' coefficients, standard errors, confidence bounds, quality statistics, \emph{etc.}}
#' \item{outcome}{Character. The outcome variable name}
#' \item{predictors}{Character vector. The predictor variable names}
#' \item{formula_str}{Character. The complete model formula as a string}
#' \item{model_scope}{Character. "Univariable" (one predictor) or
#' "Multivariable" (multiple predictors)}
#' \item{model_type}{Character. The regression model type used}
#' \item{interactions}{Character vector (if interactions specified).
#' The interaction terms included}
#' \item{strata}{Character (if stratification used). The stratification variable}
#' \item{cluster}{Character (if clustering used). The cluster variable}
#' \item{weights}{Character (if weighting used). The weights variable}
#' \item{significant}{Character vector. Names of predictors with \emph{p}-value
#' below 0.05, suitable for downstream variable selection workflows}
#' }
#'
#' @details
#' \strong{Model Scope Detection:}
#'
#' The function automatically detects whether the model is:
#' \itemize{
#' \item \strong{Univariable}: Single predictor (\emph{e.g.,} \code{predictors = "age"}).
#' Effect estimates are labeled as unadjusted ("OR", "HR", \emph{etc.}), representing
#' crude (unadjusted) association
#' \item \strong{Multivariable}: Multiple predictors (\emph{e.g.,}
#' \code{predictors = c("age", "sex", "treatment")})
#' Effect estimates are labeled as adjusted ("aOR", "aHR", \emph{etc.}), representing
#' associations adjusted for confounding
#' }
#'
#' \strong{Interaction Terms:}
#'
#' Interactions are specified using colon notation and added to the model:
#' \itemize{
#' \item \code{interactions = c("age:treatment")} creates interaction
#' between age and treatment
#' \item Main effects for both variables are automatically included
#' \item Multiple interactions can be specified:
#' \code{c("age:sex", "treatment:stage")}
#' \item For interactions between categorical variables, separate terms are
#' created for each combination of levels
#' }
#'
#' \strong{Stratification (Cox/Conditional Logistic):}
#'
#' The \code{strata} parameter creates separate baseline hazards:
#' \itemize{
#' \item Allows baseline hazard to vary across strata without estimating
#' stratum effects
#' \item Useful when proportional hazards assumption violated across strata
#' \item Example: \code{strata = "center"} for multicenter studies
#' \item Stratification variable is not included as a predictor
#' }
#'
#' \strong{Clustering (Cox Models):}
#'
#' The \code{cluster} parameter computes robust standard errors:
#' \itemize{
#' \item Accounts for within-cluster correlation (\emph{e.g.,} multiple observations
#' per patient)
#' \item Uses sandwich variance estimator
#' \item Does not change point estimates, only standard errors and \emph{p}-values
#' }
#'
#' \strong{Weighting:}
#'
#' The \code{weights} parameter enables weighted regression:
#' \itemize{
#' \item For survey data with sampling weights
#' \item Inverse probability weighting for causal inference
#' \item Frequency weights for aggregated data
#' \item Weights should be in a column of \code{data}
#' }
#'
#' \strong{Mixed-Effects Models (lmer/glmer/coxme):}
#'
#' Mixed effects models handle hierarchical or clustered data:
#' \itemize{
#' \item Use \code{model_type = "lmer"} for continuous/normal outcomes
#' \item Use \code{model_type = "glmer"} with appropriate \code{family} for GLM outcomes
#' \item Use \code{model_type = "coxme"} for survival outcomes with clustering
#' \item Random effects are specified in predictors using lme4 syntax:
#' \itemize{
#' \item \code{"(1|site)"} - Random intercepts by site
#' \item \code{"(treatment|site)"} - Random slopes for treatment by site
#' \item \code{"(1 + treatment|site)"} - Both random intercepts and slopes
#' }
#' \item Include random effects as part of the predictors vector
#' \item Example: \code{predictors = c("age", "treatment", "(1|site)")}
#' }
#'
#' \strong{Effect Measures by Model Type:}
#' \itemize{
#' \item \strong{Logistic} (\code{family = "binomial"/"quasibinomial"}): Odds ratios (OR/aOR)
#' \item \strong{Cox} (\code{model_type = "coxph"}): Hazard ratios (HR/aHR)
#' \item \strong{Poisson/Count} (\code{family = "poisson"/"quasipoisson"}): Rate ratios (RR/aRR)
#' \item \strong{Negative binomial} (\code{model_type = "negbin"}): Rate ratios (RR/aRR)
#' \item \strong{Gamma/Log-link}: Ratios (multiplicative effects)
#' \item \strong{Linear/Gaussian}: Raw coefficient estimates (additive effects)
#' }
#'
#' \strong{Confidence Intervals:}
#'
#' Confidence interval computation is tailored to each model class using the
#' best available method:
#' \itemize{
#' \item \strong{GLM and negative binomial}: Profile likelihood intervals via
#' \code{MASS::confint.glm()}, which invert the profile deviance and account
#' for asymmetry in the likelihood surface. More accurate than the Wald
#' approximation when subgroup sizes are small or estimates are near boundary
#' values. Quasi-likelihood families (\code{quasibinomial}, \code{quasipoisson})
#' fall back to Wald intervals because they lack a true likelihood function.
#' \item \strong{Linear models}: Exact \emph{t}-distribution intervals via
#' \code{confint.lm()}, based on the known sampling distribution under
#' normality.
#' \item \strong{Cox proportional hazards}: Wald intervals (\emph{i.e.,}
#' coefficient \eqn{\pm} \emph{z} \eqn{\times} SE), the standard approach in
#' the survival analysis literature.
#' \item \strong{Mixed-effects models} (lmer, glmer, coxme): Wald intervals.
#' Profile likelihood is available for \pkg{lme4} models via
#' \code{confint(model, method = "profile")} but can be prohibitively slow
#' for complex random-effects structures and is not used by default.
#' }
#'
#' If profile likelihood computation fails for any reason (\emph{e.g.,}
#' non-convergence during profiling), the function falls back silently to Wald
#' intervals.
#'
#' @seealso
#' \code{\link{uniscreen}} for univariable screening of multiple predictors,
#' \code{\link{fullfit}} for complete univariable-to-multivariable workflow,
#' \code{\link{compfit}} for comparing multiple models,
#' \code{\link{m2dt}} for model-to-table conversion
#'
#' @examples
#' # Load example data
#' data(clintrial)
#' data(clintrial_labels)
#' library(survival)
#'
#' # Example 1: Univariable logistic regression
#' uni_model <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = "age"
#' )
#' print(uni_model)
#' # Labeled as "Univariable OR"
#'
#' \donttest{
#'
#' # Example 2: Multivariable logistic regression
#' multi_model <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "sex", "bmi", "treatment"),
#' labels = clintrial_labels
#' )
#' print(multi_model)
#'
#' # Example 3: Cox proportional hazards model
#' cox_model <- fit(
#' data = clintrial,
#' outcome = "Surv(os_months, os_status)",
#' predictors = c("age", "sex", "treatment", "stage"),
#' model_type = "coxph",
#' labels = clintrial_labels
#' )
#' print(cox_model)
#'
#' # Example 4: Model with interaction terms
#' interact_model <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "treatment", "sex"),
#' interactions = c("age:treatment"),
#' labels = clintrial_labels
#' )
#' print(interact_model)
#'
#' # Example 5: Cox model with stratification
#' strat_model <- fit(
#' data = clintrial,
#' outcome = "Surv(os_months, os_status)",
#' predictors = c("age", "sex", "treatment"),
#' model_type = "coxph",
#' strata = "site", # Separate baseline hazards by site
#' labels = clintrial_labels
#' )
#' print(strat_model)
#'
#' # Example 6: Cox model with clustering
#' cluster_model <- fit(
#' data = clintrial,
#' outcome = "Surv(os_months, os_status)",
#' predictors = c("age", "treatment"),
#' model_type = "coxph",
#' cluster = "site", # Robust SEs accounting for site clustering
#' labels = clintrial_labels
#' )
#' print(cluster_model)
#'
#' # Example 7: Linear regression
#' linear_model <- fit(
#' data = clintrial,
#' outcome = "bmi",
#' predictors = c("age", "sex", "smoking"),
#' model_type = "lm",
#' labels = clintrial_labels
#' )
#' print(linear_model)
#'
#' # Example 8: Poisson regression for equidispersed count data
#' # fu_count has variance ~= mean, appropriate for standard Poisson
#' poisson_model <- fit(
#' data = clintrial,
#' outcome = "fu_count",
#' predictors = c("age", "stage", "treatment", "surgery"),
#' model_type = "glm",
#' family = "poisson",
#' labels = clintrial_labels
#' )
#' print(poisson_model)
#' # Returns rate ratios (RR/aRR)
#'
#' # Example 9: Negative binomial regression for overdispersed counts
#' # ae_count has variance > mean (overdispersed), use negbin or quasipoisson
#' if (requireNamespace("MASS", quietly = TRUE)) {
#' nb_result <- fit(
#' data = clintrial,
#' outcome = "ae_count",
#' predictors = c("age", "treatment", "diabetes", "surgery"),
#' model_type = "negbin",
#' labels = clintrial_labels
#' )
#' print(nb_result)
#' }
#'
#' # Example 10: Gamma regression for positive continuous outcomes
#' gamma_model <- fit(
#' data = clintrial,
#' outcome = "los_days",
#' predictors = c("age", "treatment", "surgery"),
#' model_type = "glm",
#' family = Gamma(link = "log"),
#' labels = clintrial_labels
#' )
#' print(gamma_model)
#'
#' # Example 11: Access the underlying fitted model
#' result <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "sex", "bmi")
#' )
#'
#' # Get the model object
#' model_obj <- attr(result, "model")
#' summary(model_obj)
#'
#' # Model diagnostics
#' plot(model_obj)
#'
#' # Predictions
#' preds <- predict(model_obj, type = "response")
#'
#' # Example 12: Access raw numeric data
#' raw_data <- attr(result, "raw_data")
#' print(raw_data)
#' # Contains unformatted coefficients, SEs, CIs, AIC, BIC, etc.
#'
#' # Example 13: Multiple interactions
#' complex_model <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "sex", "treatment", "bmi"),
#' interactions = c("age:treatment", "sex:bmi"),
#' labels = clintrial_labels
#' )
#' print(complex_model)
#'
#' # Example 14: Customize output columns
#' minimal <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "sex", "treatment"),
#' show_n = FALSE,
#' show_events = FALSE,
#' reference_rows = FALSE
#' )
#' print(minimal)
#'
#' # Example 15: Different confidence levels
#' ci90 <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "treatment"),
#' conf_level = 0.90 # 90% confidence intervals
#' )
#' print(ci90)
#'
#' # Example 16: Force coefficient display instead of OR
#' coef_model <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "bmi"),
#' exponentiate = FALSE # Show log odds instead of OR
#' )
#' print(coef_model)
#'
#' # Example 17: Confidence interval method
#' # Default: profile likelihood CIs for GLM (more accurate)
#' profile_result <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "treatment"),
#' p_digits = 4,
#' conf_method = "profile"
#' )
#' print(profile_result)
#'
#' # Wald CIs (faster, suitable for simulation or exploratory work)
#' wald_result <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "treatment"),
#' p_digits = 4,
#' conf_method = "wald"
#' )
#' print(wald_result)
#'
#' # Example 18: Check model quality statistics
#' result <- fit(
#' data = clintrial,
#' outcome = "os_status",
#' predictors = c("age", "sex", "treatment", "stage"),
#' keep_qc_stats = TRUE
#' )
#'
#' raw <- attr(result, "raw_data")
#' cat("AIC:", raw$AIC[1], "\n")
#' cat("BIC:", raw$BIC[1], "\n")
#' cat("C-statistic:", raw$c_statistic[1], "\n")
#'
#' # Example 19: Interaction effects - treatment effect modified by stage
#' interaction_model <- fit(
#' data = clintrial,
#' outcome = "Surv(os_months, os_status)",
#' predictors = c("age", "treatment", "stage"),
#' interactions = c("treatment:stage"),
#' model_type = "coxph",
#' labels = clintrial_labels
#' )
#' print(interaction_model)
#' # Shows main effects plus all treatment×stage interaction terms
#'
#' # Example 20: Multiple interactions in logistic regression
#' multi_interaction <- fit(
#' data = clintrial,
#' outcome = "readmission_30d",
#' predictors = c("age", "sex", "surgery", "diabetes"),
#' interactions = c("surgery:diabetes", "age:sex"),
#' labels = clintrial_labels
#' )
#' print(multi_interaction)
#'
#' # Example 21: Quasipoisson for overdispersed count data
#' # Alternative to negative binomial when MASS not available
#' quasi_model <- fit(
#' data = clintrial,
#' outcome = "ae_count",
#' predictors = c("age", "treatment", "diabetes", "surgery"),
#' model_type = "glm",
#' family = "quasipoisson",
#' labels = clintrial_labels
#' )
#' print(quasi_model)
#' # Adjusts standard errors for overdispersion
#'
#' # Example 22: Quasibinomial for overdispersed binary data
#' quasi_logistic <- fit(
#' data = clintrial,
#' outcome = "any_complication",
#' predictors = c("age", "bmi", "diabetes", "surgery"),
#' model_type = "glm",
#' family = "quasibinomial",
#' labels = clintrial_labels
#' )
#' print(quasi_logistic)
#'
#' # Example 23: Gamma regression with identity link for additive effects
#' gamma_identity <- fit(
#' data = clintrial,
#' outcome = "los_days",
#' predictors = c("age", "treatment", "surgery", "any_complication"),
#' model_type = "glm",
#' family = Gamma(link = "identity"),
#' labels = clintrial_labels
#' )
#' print(gamma_identity)
#' # Shows additive effects (coefficients) instead of multiplicative (ratios)
#'
#' # Example 24: Inverse Gaussian regression for highly skewed data
#' inverse_gaussian <- fit(
#' data = clintrial,
#' outcome = "recovery_days",
#' predictors = c("age", "surgery", "pain_score"),
#' model_type = "glm",
#' family = inverse.gaussian(link = "log"),
#' labels = clintrial_labels
#' )
#' print(inverse_gaussian)
#'
#' # Example 25: Linear mixed effects with random intercepts
#' # Accounts for clustering of patients within sites
#' if (requireNamespace("lme4", quietly = TRUE)) {
#' lmer_model <- fit(
#' data = clintrial,
#' outcome = "los_days",
#' predictors = c("age", "treatment", "stage", "(1|site)"),
#' model_type = "lmer",
#' labels = clintrial_labels
#' )
#' print(lmer_model)
#' }
#'
#' # Example 26: Generalized linear mixed effects (logistic with random effects)
#' if (requireNamespace("lme4", quietly = TRUE)) {
#' glmer_model <- fit(
#' data = clintrial,
#' outcome = "readmission_30d",
#' predictors = c("age", "surgery", "los_days", "(1|site)"),
#' model_type = "glmer",
#' family = "binomial",
#' labels = clintrial_labels
#' )
#' print(glmer_model)
#' }
#'
#' # Example 27: Cox mixed effects for clustered survival data
#' if (requireNamespace("coxme", quietly = TRUE)) {
#' coxme_model <- fit(
#' data = clintrial,
#' outcome = "Surv(os_months, os_status)",
#' predictors = c("age", "treatment", "stage", "(1|site)"),
#' model_type = "coxme",
#' labels = clintrial_labels
#' )
#' print(coxme_model)
#' }
#'
#' # Example 28: Random slopes - treatment effect varies by site
#' if (requireNamespace("lme4", quietly = TRUE)) {
#' random_slopes <- fit(
#' data = clintrial,
#' outcome = "los_days",
#' predictors = c("age", "treatment", "stage", "(treatment|site)"),
#' model_type = "lmer",
#' labels = clintrial_labels
#' )
#' print(random_slopes)
#' }
#'
#' # Example 29: Format a pre-fitted model (model-based workflow)
#' # Useful for models fitted outside of fit()
#' pre_fitted <- glm(os_status ~ age + sex + treatment,
#' family = binomial, data = clintrial)
#' result <- fit(model = pre_fitted,
#' data = clintrial,
#' labels = clintrial_labels)
#' print(result)
#'
#' }
#'
#' @family regression functions
#' @export
fit <- function(data = NULL,
outcome = NULL,
predictors = NULL,
model = NULL,
model_type = "glm",
family = "binomial",
random = NULL,
interactions = NULL,
strata = NULL,
cluster = NULL,
weights = NULL,
conf_level = 0.95,
reference_rows = TRUE,
show_n = TRUE,
show_events = TRUE,
digits = 2,
p_digits = 3,
labels = NULL,
keep_qc_stats = TRUE,
exponentiate = NULL,
conf_method = NULL,
number_format = NULL,
verbose = NULL,
...) {
## Check if we're using model-based workflow
## Resolve verbose setting
verbose <- if (is.null(verbose)) getOption("summata.verbose", FALSE) else verbose
## Resolve number formatting marks
validate_number_format(number_format)
marks <- resolve_number_marks(number_format)
if (!is.null(model)) {
## Model-based workflow: format existing model
## Get data from model if not provided
if (is.null(data)) {
data <- get_model_data(model)
if (is.null(data)) {
stop("Cannot extract data from model. Please provide 'data' parameter.")
}
}
## Convert to data.table
if (!data.table::is.data.table(data)) {
data <- data.table::as.data.table(data)
}
## Store data in model for m2dt
if (isS4(model)) {
attr(model, "data") <- data
} else {
model$data <- data
attr(model, "data") <- data
}
## Convert to readable format using m2dt
raw_data <- m2dt(data = data,
model = model,
conf_level = conf_level,
keep_qc_stats = keep_qc_stats,
include_intercept = FALSE,
reference_rows = reference_rows,
skip_counts = (!show_n && !show_events),
conf_method = conf_method)
## Propagate cached confint from m2dt to model for downstream reuse
cached_ci <- attr(raw_data, "cached_confint")
if (!is.null(cached_ci)) {
attr(model, "cached_confint") <- cached_ci
attr(model, "cached_confint_level") <- attr(raw_data, "cached_confint_level")
}
## Format results for publication
formatted <- format_model_table(raw_data,
show_n = show_n,
show_events = show_events,
digits = digits,
p_digits = p_digits,
labels = labels,
exponentiate = exponentiate,
conf_level = conf_level,
marks = marks)
## Extract formula and predictors from model
formula_str <- deparse(stats::formula(model))
## Attach metadata
data.table::setattr(formatted, "model", model)
data.table::setattr(formatted, "raw_data", raw_data)
data.table::setattr(formatted, "formula_str", formula_str)
data.table::setattr(formatted, "model_scope", raw_data$model_scope[1])
data.table::setattr(formatted, "model_type", raw_data$model_type[1])
## Extract significant predictors (p < 0.05)
if ("p_value" %in% names(raw_data) && "variable" %in% names(raw_data)) {
sig_vars <- unique(
raw_data[!is.na(p_value) & p_value < 0.05]$variable
)
data.table::setattr(formatted, "significant", sig_vars)
}
class(formatted) <- c("fit_result", class(formatted))
return(formatted)
}
## Formula-based workflow: fit new model
## Validate required parameters for formula-based workflow
if (is.null(data)) {
stop("'data' parameter is required when not providing a pre-fitted model.")
}
if (is.null(outcome)) {
stop("'outcome' parameter is required when not providing a pre-fitted model.")
}
if (is.null(predictors)) {
stop("'predictors' parameter is required when not providing a pre-fitted model.")
}
## Store original data name for reference
data_name <- deparse(substitute(data))
## Convert to data.table once at the start
if (!data.table::is.data.table(data)) {
data <- data.table::as.data.table(data)
}
## Validate inputs and auto-correct model type if needed
validation <- validate_fit_inputs(
data = data,
outcome = outcome,
predictors = predictors,
model_type = model_type,
family = if (model_type %in% c("glm", "glmer")) family else NULL,
conf_level = conf_level,
digits = digits,
p_digits = p_digits,
auto_correct_model = TRUE
)
## Apply any auto-corrections
if (validation$auto_corrected) {
model_type <- validation$model_type
}
## Resolve Gamma family string to log link
## The canonical link for Gamma is inverse (1/μ), which produces
## uninterpretable coefficients. The log link yields exp(β) = ratio,
## consistent with the multiplicative effect interpretation used for
## OR, HR, and RR throughout summata.
if (model_type %in% c("glm", "glmer") && is.character(family) &&
tolower(family) == "gamma") {
family <- stats::Gamma(link = "log")
}
## Validate random effects for mixed-effects models
## Support both explicit 'random' parameter AND random effects embedded in predictors
mixed_types <- c("glmer", "lmer", "coxme")
has_random_in_predictors <- any(grepl("\\|", predictors))
if (model_type %in% mixed_types && is.null(random) && !has_random_in_predictors) {
stop("'random' parameter is required for mixed-effects models (", model_type, ").\n",
"Example: random = \"(1|site)\" for random intercepts by site.\n",
"Alternatively, include random effects in 'predictors' using the same syntax.")
}
if (!is.null(random) && !model_type %in% mixed_types) {
warning("'random' parameter is ignored for non-mixed-effects models (", model_type, ").")
random <- NULL
}
## Extract random effects from predictors if present (for mixed-effects models)
## The random effects get added to the formula but aren't treated as regular predictors
if (model_type %in% mixed_types && has_random_in_predictors) {
random_mask <- grepl("\\|", predictors)
random_in_preds <- predictors[random_mask]
fixed_predictors <- predictors[!random_mask]
## Combine explicit random with embedded random (if both provided)
if (!is.null(random)) {
message("Note: Random effects specified in both 'random' parameter and 'predictors'. ",
"Combining: ", random, " + ", paste(random_in_preds, collapse = " + "))
random <- paste(c(random, random_in_preds), collapse = " + ")
} else {
random <- paste(random_in_preds, collapse = " + ")
}
## Update predictors to only include fixed effects
predictors <- fixed_predictors
}
## Build formula string
if (!is.null(interactions)) {
formula_str <- paste(outcome, "~",
paste(c(predictors, interactions), collapse = " + "))
} else {
formula_str <- paste(outcome, "~", paste(predictors, collapse = " + "))
}
## Add random effects if provided (for lmer/glmer - coxme uses same formula syntax)
if (!is.null(random) && model_type %in% c("glmer", "lmer", "coxme")) {
formula_str <- paste(formula_str, "+", random)
}
## Add strata if provided (for Cox models)
if (!is.null(strata) && model_type %in% c("coxph", "clogit")) {
formula_str <- paste(formula_str, "+ strata(", strata, ")")
}
formula <- stats::as.formula(formula_str)
## Fit model based on type - use 'data' directly
model <- quiet_fit(quote({
if (model_type == "glm") {
if (!is.null(weights)) {
stats::glm(formula, data = data, family = family,
weights = data[[weights]], ...)
} else {
stats::glm(formula, data = data, family = family, ...)
}
} else if (model_type == "negbin") {
if (!requireNamespace("MASS", quietly = TRUE))
stop("Package 'MASS' required for negative binomial models")
if (!is.null(weights)) {
MASS::glm.nb(formula, data = data,
weights = data[[weights]], ...)
} else {
MASS::glm.nb(formula, data = data, ...)
}
} else if (model_type == "lm") {
if (!is.null(weights)) {
stats::lm(formula, data = data, weights = data[[weights]], ...)
} else {
stats::lm(formula, data = data, ...)
}
} else if (model_type == "coxph") {
if (!requireNamespace("survival", quietly = TRUE))
stop("Package 'survival' required for Cox models")
if (!is.null(cluster)) {
survival::coxph(formula, data = data,
cluster = data[[cluster]], ...)
} else {
survival::coxph(formula, data = data, ...)
}
} else if (model_type == "clogit") {
if (!requireNamespace("survival", quietly = TRUE))
stop("Package 'survival' required for conditional logistic regression")
survival::clogit(formula, data = data, ...)
} else if (model_type == "coxme") {
if (!requireNamespace("coxme", quietly = TRUE))
stop("Package 'coxme' required for mixed effects Cox models")
coxme::coxme(formula, data = data, ...)
} else if (model_type == "lmer") {
if (!requireNamespace("lme4", quietly = TRUE))
stop("Package 'lme4' required for linear mixed effects models")
lme4::lmer(formula, data = data, ...)
} else if (model_type == "glmer") {
if (!requireNamespace("lme4", quietly = TRUE))
stop("Package 'lme4' required for mixed effects models")
lme4::glmer(formula, data = data, family = family, ...)
} else {
stop("Unsupported model type: ", model_type)
}
}), verbose = verbose)
## Store the data directly in the model
if (isS4(model)) {
attr(model, "data") <- data
} else {
model$data <- data
attr(model, "data") <- data
}
## Attach metadata as attributes
data.table::setattr(model, "formula_str", formula_str)
data.table::setattr(model, "predictors", predictors)
data.table::setattr(model, "model_type", model_type)
data.table::setattr(model, "data_name", data_name)
if (!is.null(interactions)) {
data.table::setattr(model, "interactions", interactions)
}
if (!is.null(random)) {
data.table::setattr(model, "random", random)
}
if (!is.null(strata)) {
data.table::setattr(model, "strata", strata)
}
if (!is.null(cluster)) {
data.table::setattr(model, "cluster", cluster)
}
if (!is.null(weights)) {
data.table::setattr(model, "weights", weights)
}
## Convert to readable format using m2dt
raw_data <- m2dt(data = data,
model = model,
conf_level = conf_level,
keep_qc_stats = keep_qc_stats,
include_intercept = FALSE,
reference_rows = reference_rows,
skip_counts = (!show_n && !show_events),
conf_method = conf_method)
## Propagate cached confint from m2dt to model for downstream reuse
## (avoids redundant profiling in forest plot functions)
cached_ci <- attr(raw_data, "cached_confint")
if (!is.null(cached_ci)) {
attr(model, "cached_confint") <- cached_ci
attr(model, "cached_confint_level") <- attr(raw_data, "cached_confint_level")
}
## Format results for publication
formatted <- format_model_table(raw_data,
show_n = show_n,
show_events = show_events,
digits = digits,
p_digits = p_digits,
labels = labels,
exponentiate = exponentiate,
conf_level = conf_level,
marks = marks)
## Attach metadata
data.table::setattr(formatted, "model", model)
data.table::setattr(formatted, "raw_data", raw_data)
data.table::setattr(formatted, "outcome", outcome)
data.table::setattr(formatted, "predictors", predictors)
data.table::setattr(formatted, "formula_str", formula_str)
data.table::setattr(formatted, "model_scope", raw_data$model_scope[1])
data.table::setattr(formatted, "model_type", raw_data$model_type[1])
## Extract significant predictors (p < 0.05)
if ("p_value" %in% names(raw_data) && "variable" %in% names(raw_data)) {
sig_vars <- unique(
raw_data[!is.na(p_value) & p_value < 0.05]$variable
)
data.table::setattr(formatted, "significant", sig_vars)
}
## Add metadata from model fitting
if (!is.null(interactions)) {
data.table::setattr(formatted, "interactions", interactions)
}
if (!is.null(random)) {
data.table::setattr(formatted, "random", random)
}
if (!is.null(strata)) {
data.table::setattr(formatted, "strata", strata)
}
if (!is.null(cluster)) {
data.table::setattr(formatted, "cluster", cluster)
}
if (!is.null(weights)) {
data.table::setattr(formatted, "weights", weights)
}
class(formatted) <- c("fit_result", class(formatted))
return(formatted)
}
#' Print method for fit results
#'
#' Displays a summary header with model scope (Univariable/Multivariable),
#' model type, formula, sample size, and event count before printing the
#' formatted results table.
#'
#' @param x fit_result object.
#' @param ... Additional arguments passed to print methods.
#' @return Invisibly returns the input object \code{x}. Called for its
#' side effect of printing a formatted summary to the console.
#' @keywords internal
#' @family regression functions
#' @export
print.fit_result <- function(x, ...) {
cat("\n", attr(x, "model_scope"), " ", attr(x, "model_type"), " Model\n", sep = "")
cat("Formula: ", attr(x, "formula_str"), "\n", sep = "")
## Get sample size from raw results
raw <- attr(x, "raw_data")
if (!is.null(raw)) {
cat("n = ", raw$n[1], sep = "")
if (!is.na(raw$events[1])) cat(", Events = ", raw$events[1], sep = "")
cat("\n")
}
cat("\n")
NextMethod("print", x)
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.