R/ancova.R
In rbmi: Reference Based Multiple Imputation

Documented in ancova ancova_single

#' Analysis of Covariance
#'
#' Performs an analysis of covariance between two groups returning the estimated
#' "treatment effect" (i.e. the contrast between the two treatment groups) and
#' the least square means estimates in each group.
#'
#' @param data A `data.frame` containing the data to be used in the model.
#' @param vars A `vars` object as generated by [set_vars()]. Only the `group`,
#' `visit`, `outcome` and `covariates` elements are required. See details.
#' @param visits An optional character vector specifying which visits to
#' fit the ancova model at. If `NULL`, a separate ancova model will be fit to the
#' outcomes for each visit (as determined by `unique(data[[vars$visit]])`).
#' See details.
#' @param weights Character, either `"proportional"` (default) or `"equal"`. Specifies the
#' weighting strategy to be used for categorical covariates when calculating the lsmeans.
#' See details.
#'
#' @details
#' The function works as follows:
#'
#' 1. Select the first value from `visits`.
#' 2. Subset the data to only the observations that occurred on this visit.
#' 3. Fit a linear model as  `vars$outcome ~ vars$group + vars$covariates`.
#' 4. Extract the "treatment effect" & least square means for each treatment group.
#' 5. Repeat points 2-3 for all other values in `visits`.
#'
#' If no value for `visits` is provided then it will be set to
#' `unique(data[[vars$visit]])`.
#'
#' In order to meet the formatting standards set by [analyse()] the results will be collapsed
#' into a single list suffixed by the visit name, e.g.:
#'```
#'list(
#'    trt_visit_1 = list(est = ...),
#'    lsm_ref_visit_1 = list(est = ...),
#'    lsm_alt_visit_1 = list(est = ...),
#'    trt_visit_2 = list(est = ...),
#'    lsm_ref_visit_2 = list(est = ...),
#'    lsm_alt_visit_2 = list(est = ...),
#'    ...
#')
#'```
#' Please note that "ref" refers to the first factor level of `vars$group` which does not necessarily
#' coincide with the control arm. Analogously, "alt" refers to the second factor level of `vars$group`.
#' "trt" refers to the model contrast translating the mean difference between the second level and first level.
#'
#' If you want to include interaction terms in your model this can be done
#' by providing them to the `covariates` argument of [set_vars()]
#' e.g. `set_vars(covariates = c("sex*age"))`.
#'
#'
#' ## Weighting
#'
#' `"proportional"` is the default scheme that is used. This is equivalent to standardization,
#' i.e. the lsmeans in
#' each group are equal to the predicted mean outcome from the ancova model for
#' that group based on baseline characteristics of all subjects regardless of
#' their assigned group. The alternative weighting scheme, `"equal"`, creates hypothetical
#' patients by expanding out all combinations of the models categorical covariates. The
#' lsmeans are then calculated as the average of
#' the predicted mean outcome for these hypothetical patients assuming they come from each
#' group in turn.
#'
#' In short:
#'   - `"proportional"` weights categorical covariates based upon their frequency of occurrence
#' in the data.
#'   - `"equal"` weights categorical covariates equally across all theoretical combinations.
#'
#' @seealso [analyse()]
#' @seealso [stats::lm()]
#' @seealso [set_vars()]
#' @export
ancova <- function(data, vars, visits = NULL, weights = c("proportional", "equal")) {

    outcome <- vars[["outcome"]]
    group <- vars[["group"]]
    covariates <- vars[["covariates"]]
    visit <- vars[["visit"]]
    weights <- match.arg(weights)

    expected_vars <- c(extract_covariates(covariates), outcome, group)

    assert_that(
        ! any(visit %in% expected_vars),
        msg = paste0(
            "The `vars$visit` variable cannot be a covariate in an ANCOVA model. ",
            "Please adjust `vars$covariates` accordingly"
        )
    )

    for (var in c(visit, expected_vars)) {
        assert_that(
            var %in% names(data),
            msg = sprintf("Variable `%s` doesn't exist in data", var)
        )
    }

    assert_that(
        is.character(outcome),
        length(outcome) == 1,
        msg = "`vars$outcome` must be a length 1 character"
    )

    assert_that(
        is.character(group),
        length(group) == 1,
        msg = "`vars$group` must be a length 1 character"
    )

    assert_that(
        is.character(visit),
        length(visit) == 1,
        msg = "`vars$visit` must be a length 1 character"
    )

    assert_that(
        is.character(covariates) | is.null(covariates),
        msg = "`covariates` must be a character vector"
    )

    assert_that(
        is.null(visits) | is.character(visits),
        msg = "`visits` must be NULL or a character vector"
    )

    if (is.null(visits)) {
        visits <- unique(data[[visit]])
    }

    for (i in visits) {
        assert_that(
            i %in% data[[visit]],
            msg = sprintf("Visit `%s` does not appear in `data[[vars$visit]]`", i)
        )
    }

    res <- lapply(
        visits,
        function(x) {
            data2 <- data[data[[visit]] == x, ]
            res <- ancova_single(data2, outcome, group, covariates, weights)
            names(res) <- paste0(names(res), "_", x)
            return(res)
        }
    )
    return(unlist(res, recursive = FALSE))
}



#' Implements an Analysis of Covariance (ANCOVA)
#'
#' @description
#' Performance analysis of covariance. See [ancova()] for full details.
#'
#' @param data The `data.frame` containing all of the data required for the model.
#' @param outcome Character, the name of the outcome variable in `data`.
#' @param group Character, the name of the group variable in `data`.
#' @param covariates Character vector containing the name of any additional covariates
#' to be included in the model as well as any interaction terms.
#' @param weights Character, specifies whether to use "proportional" or "equal" weighting for each
#' categorical covariate combination when calculating the lsmeans.
#'
#' @details
#' - `group` must be a factor variable with only 2 levels.
#' - `outcome` must be a continuous numeric variable.
#'
#' @examples
#' \dontrun{
#' iris2 <- iris[ iris$Species %in% c("versicolor", "virginica"), ]
#' iris2$Species <- factor(iris2$Species)
#' ancova_single(iris2, "Sepal.Length", "Species", c("Petal.Length * Petal.Width"))
#' }
#' @seealso [ancova()]
#' @importFrom stats lm coef vcov df.residual
ancova_single <- function(data, outcome, group, covariates, weights = c("proportional", "equal")) {

    weights <- match.arg(weights)
    assert_that(
        is.factor(data[[group]]),
        length(levels(data[[group]])) == 2,
        length(unique(data[[group]])) == 2,
        msg = "`data[[vars$group]]` must be a factor variable with 2 levels"
    )

    # Manually convert to dummary variables to make extraction easier
    data[[group]] <- as.numeric(data[[group]]) - 1

    data2 <- data[, c(extract_covariates(covariates), outcome, group)]

    frm <- as_simple_formula(outcome, c(group, covariates))

    mod <- lm(formula = frm, data = data2)

    args <- list(model = mod, .weights = weights)
    args[[group]] <- 0
    lsm0 <- do.call(lsmeans, args)

    args[[group]] <- 1
    lsm1 <- do.call(lsmeans, args)

    x <- list(
        trt = list(
            est = coef(mod)[[group]],
            se = sqrt(vcov(mod)[group, group]),
            df = df.residual(mod)
        ),
        lsm_ref = lsm0,
        lsm_alt = lsm1
    )
    return(x)
}