simtrial: Clinical Trial Simulation

Documented in wlr wlr.counting_process wlr.default wlr.tte_data

#  Copyright (c) 2024 Merck & Co., Inc., Rahway, NJ, USA and its affiliates.
#  All rights reserved.
#
#  This file is part of the simtrial program.
#
#  simtrial is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

#' Weighted logrank test
#'
#' @param data Dataset (generated by [sim_pw_surv()]) that has been cut by
#'   [counting_process()], [cut_data_by_date()], or [cut_data_by_event()].
#' @param weight Weighting functions, such as [fh()], [mb()], and
#'   [early_zero()].
#' @param return_variance A logical flag that, if `TRUE`, adds columns
#'   estimated variance for weighted sum of observed minus expected;
#'   see details; Default: `FALSE`.
#' @param ratio randomization ratio (experimental:control).
#'  - If the `data` is generated by simtrial, such as
#'    + `data = sim_pw_surv(...) |> cut_data_by_date(...)`
#'    + `data = sim_pw_surv(...) |> cut_data_by_event(...)`
#'    + `data = sim_pw_surv(...) |> cut_data_by_date(...) |> counting_process(...)`
#'    + `data = sim_pw_surv(...) |> cut_data_by_event(...) |> counting_process(...)`
#'   there is no need to input the `ratio`, as simtrial gets the `ratio` via the
#'   `block` arguments in [sim_pw_surv()].
#'  - If the `data` is a custom dataset (see Example 2) below,
#'    + Users are suggested to input the planned randomization ratio to `ratio`;
#'    + If not, simtrial takes the empirical randomization ratio.
#'
#' @return A list containing the test method (`method`),
#' parameters of this test method (`parameter`),
#' point estimate of the treatment effect (`estimate`),
#' standardized error of the treatment effect (`se`),
#' Z-score (`z`), p-values (`p_value`).
#'
#' @importFrom data.table setDF setDT
#'
#' @export
#'
#' @details
#' - \eqn{z} - Standardized normal Fleming-Harrington weighted logrank test.
#' - \eqn{i} - Stratum index.
#' - \eqn{d_i} - Number of distinct times at which events occurred in
#'   stratum \eqn{i}.
#' - \eqn{t_{ij}} - Ordered times at which events in stratum
#'   \eqn{i}, \eqn{j = 1, 2, \ldots, d_i} were observed;
#'   for each observation, \eqn{t_{ij}} represents the time post study entry.
#' - \eqn{O_{ij.}} - Total number of events in stratum \eqn{i} that occurred
#'   at time \eqn{t_{ij}}.
#' - \eqn{O_{ije}} - Total number of events in stratum \eqn{i} in the
#'   experimental treatment group that occurred at time \eqn{t_{ij}}.
#' - \eqn{N_{ij.}} - Total number of study subjects in stratum \eqn{i}
#'   who were followed for at least duration.
#' - \eqn{E_{ije}} - Expected observations in experimental treatment group
#'   given random selection of \eqn{O_{ij.}} from those in
#'   stratum \eqn{i} at risk at time \eqn{t_{ij}}.
#' - \eqn{V_{ije}} - Hypergeometric variance for \eqn{E_{ije}} as
#'   produced in `Var` from [counting_process()].
#' - \eqn{N_{ije}} - Total number of study subjects in
#'   stratum \eqn{i} in the experimental treatment group
#'   who were followed for at least duration \eqn{t_{ij}}.
#' - \eqn{E_{ije}} - Expected observations in experimental group in
#'   stratum \eqn{i} at time \eqn{t_{ij}} conditioning on the overall number
#'   of events and at risk populations at that time and sampling at risk
#'   observations without replacement:
#'   \deqn{E_{ije} = O_{ij.} N_{ije}/N_{ij.}}
#' - \eqn{S_{ij}} - Kaplan-Meier estimate of survival in combined
#'   treatment groups immediately prior to time \eqn{t_{ij}}.
#' - \eqn{\rho, \gamma} - Real parameters for Fleming-Harrington test.
#' - \eqn{X_i} - Numerator for signed logrank test in stratum \eqn{i}
#'   \deqn{X_i = \sum_{j=1}^{d_{i}} S_{ij}^\rho(1-S_{ij}^\gamma)(O_{ije}-E_{ije})}
#' - \eqn{V_{ij}} - Variance used in denominator for Fleming-Harrington
#'   weighted logrank tests
#'   \deqn{V_i = \sum_{j=1}^{d_{i}} (S_{ij}^\rho(1-S_{ij}^\gamma))^2V_{ij})}
#'   The stratified Fleming-Harrington weighted logrank test is then computed as:
#'   \deqn{z = \sum_i X_i/\sqrt{\sum_i V_i}.}
#'
#' @examples
#' # ---------------------- #
#' #      Example 1         #
#' #  Use dataset generated #
#' #     by simtrial        #
#' # ---------------------- #
#' x <- sim_pw_surv(n = 200) |> cut_data_by_event(100)
#'
#' # Example 1A: WLR test with FH wights
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5))
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5), return_variance = TRUE)
#'
#' # Example 1B: WLR test with MB wights
#' x |> wlr(weight = mb(delay = 4, w_max = 2))
#'
#' # Example 1C: WLR test with early zero wights
#' x |> wlr(weight = early_zero(early_period = 4))
#'
#' # Example 1D
#' # For increased computational speed when running many WLR tests, you can
#' # pre-compute the counting_process() step first, and then pass the result of
#' # counting_process() directly to wlr()
#' x <- x |> counting_process(arm = "experimental")
#' x |> wlr(weight = fh(rho = 0, gamma = 1))
#' x |> wlr(weight = mb(delay = 4, w_max = 2))
#' x |> wlr(weight = early_zero(early_period = 4))
#'
#' # ---------------------- #
#' #      Example 2         #
#' #  Use cumsum dataset    #
#' # ---------------------- #
#' x <- data.frame(treatment = ifelse(ex1_delayed_effect$trt == 1, "experimental", "control"),
#'                 stratum = rep("All", nrow(ex1_delayed_effect)),
#'                 tte = ex1_delayed_effect$month,
#'                 event = ex1_delayed_effect$evntd)
#'
#' # Users can specify the randomization ratio to calculate the statistical information under H0
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5), ratio = 2)
#'
#' x |>
#'   counting_process(arm = "experimental") |>
#'   wlr(weight = fh(rho = 0, gamma = 0.5), ratio = 2)
#'
#' # If users don't provide the randomization ratio, we will calculate the emperical ratio
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5))
#'
#' x |>
#'   counting_process(arm = "experimental") |>
#'   wlr(weight = fh(rho = 0, gamma = 0.5))
wlr <- function(data, weight, return_variance = FALSE, ratio = NULL) {
  UseMethod("wlr", data)
}

#' @rdname wlr
#' @export
wlr.default <- function(data, weight, return_variance = FALSE, ratio = NULL) {

  if (!all(c("tte", "event", "stratum", "treatment") %in% colnames(data))) {
    stop('Input must have the columns "tte", "event", "stratum", and "treatment"')
  }

  wlr.tte_data(data = data, weight = weight, return_variance = return_variance, ratio = ratio)
}


#' @rdname wlr
#' @export
wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL) {
  # if the `data` is NOT generated by sim_pw_surv
  # - if user input the randomization ratio, we will directly take its values
  # - otherwise, we calculate the empirical ratio
  if (!"generate_by_simpwsurv" %in% names(attributes(data))) {
    if (is.null(ratio)) {
      ratio <- sum(data$treatment == "experimental") / sum(data$treatment == "control")
    }
  } else {
  # if the `data` is generated by sim_pw_surv, the take the ratio from the attributes of `data`
    ratio <- attributes(data)$ratio
  }

  x <- data |> counting_process(arm = "experimental")
  wlr.counting_process(x, weight, return_variance = FALSE, ratio = ratio)
}

#' @rdname wlr
#' @export
wlr.counting_process <- function(data, weight, return_variance = FALSE, ratio = NULL) {
  x <- data

  if (is.null(ratio)) {
    ratio <- attributes(data)$ratio
  }
  q_e <- ratio / (1 + ratio)
  q_c <- 1 - q_e

  # initialize the output
  ans <- list()
  ans$method <- "WLR"

  # calculate z score
  if (inherits(weight, "fh")) {
    x <- x |> fh_weight(rho = weight$rho, gamma = weight$gamma)
    ans$parameter <- paste0("FH(rho=", weight$rho, ", gamma=", weight$gamma, ")")
  } else if (inherits(weight, "mb")) {
    x <- x |> mb_weight(delay = weight$delay, w_max = weight$w_max)
    ans$parameter <- paste0("MB(delay = ", weight$delay, ", max_weight = ", weight$w_max, ")")
  } else if (inherits(weight, "early_period")) {
    x <- x |> early_zero_weight(early_period = weight$early_period, fail_rate = weight$fail_rate)
    ans$parameter <- paste0("Xu 2017 with first ", round(weight$early_period, 4), " months of 0 weights")
  }

  # calculate the treatment estimation
  ans$estimate <- sum(x$o_minus_e * x$weight)

  # calculate the se
  ans$se <- sqrt(sum(x$var_o_minus_e * x$weight^2))

  # calculate z-score
  ans$z <- -ans$estimate / ans$se

  # calculate the statistcial information
  x$event_ctrl <- x$event_total - x$event_trt
  weighted_event_trt <- sum(x$event_trt[x$event_trt > 0] * x$weight[x$event_trt > 0]^2)
  weighted_event_ctrl <- sum(x$event_ctrl[x$event_ctrl > 0] * x$weight[x$event_ctrl > 0]^2)
  ans$info <- (1 / weighted_event_trt + 1 / weighted_event_ctrl)^(-1)
  ans$info0 <- sum(x$event_total * q_c * q_e * x$weight^2)

  return(ans)
}