# Copyright (c) 2024 Merck & Co., Inc., Rahway, NJ, USA and its affiliates.
# All rights reserved.
#
# This file is part of the simtrial program.
#
# simtrial is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#' Weighted logrank test
#'
#' @param data Dataset (generated by [sim_pw_surv()]) that has been cut by
#' [counting_process()], [cut_data_by_date()], or [cut_data_by_event()].
#' @param weight Weighting functions, such as [fh()], [mb()], and
#' [early_zero()].
#' @param return_variance A logical flag that, if `TRUE`, adds columns
#' estimated variance for weighted sum of observed minus expected;
#' see details; Default: `FALSE`.
#' @param ratio randomization ratio (experimental:control).
#' - If the `data` is generated by simtrial, such as
#' + `data = sim_pw_surv(...) |> cut_data_by_date(...)`
#' + `data = sim_pw_surv(...) |> cut_data_by_event(...)`
#' + `data = sim_pw_surv(...) |> cut_data_by_date(...) |> counting_process(...)`
#' + `data = sim_pw_surv(...) |> cut_data_by_event(...) |> counting_process(...)`
#' there is no need to input the `ratio`, as simtrial gets the `ratio` via the
#' `block` arguments in [sim_pw_surv()].
#' - If the `data` is a custom dataset (see Example 2) below,
#' + Users are suggested to input the planned randomization ratio to `ratio`;
#' + If not, simtrial takes the empirical randomization ratio.
#'
#' @return A list containing the test method (`method`),
#' parameters of this test method (`parameter`),
#' point estimate of the treatment effect (`estimate`),
#' standardized error of the treatment effect (`se`),
#' Z-score (`z`), p-values (`p_value`).
#'
#' @importFrom data.table setDF setDT
#'
#' @export
#'
#' @details
#' - \eqn{z} - Standardized normal Fleming-Harrington weighted logrank test.
#' - \eqn{i} - Stratum index.
#' - \eqn{d_i} - Number of distinct times at which events occurred in
#' stratum \eqn{i}.
#' - \eqn{t_{ij}} - Ordered times at which events in stratum
#' \eqn{i}, \eqn{j = 1, 2, \ldots, d_i} were observed;
#' for each observation, \eqn{t_{ij}} represents the time post study entry.
#' - \eqn{O_{ij.}} - Total number of events in stratum \eqn{i} that occurred
#' at time \eqn{t_{ij}}.
#' - \eqn{O_{ije}} - Total number of events in stratum \eqn{i} in the
#' experimental treatment group that occurred at time \eqn{t_{ij}}.
#' - \eqn{N_{ij.}} - Total number of study subjects in stratum \eqn{i}
#' who were followed for at least duration.
#' - \eqn{E_{ije}} - Expected observations in experimental treatment group
#' given random selection of \eqn{O_{ij.}} from those in
#' stratum \eqn{i} at risk at time \eqn{t_{ij}}.
#' - \eqn{V_{ije}} - Hypergeometric variance for \eqn{E_{ije}} as
#' produced in `Var` from [counting_process()].
#' - \eqn{N_{ije}} - Total number of study subjects in
#' stratum \eqn{i} in the experimental treatment group
#' who were followed for at least duration \eqn{t_{ij}}.
#' - \eqn{E_{ije}} - Expected observations in experimental group in
#' stratum \eqn{i} at time \eqn{t_{ij}} conditioning on the overall number
#' of events and at risk populations at that time and sampling at risk
#' observations without replacement:
#' \deqn{E_{ije} = O_{ij.} N_{ije}/N_{ij.}}
#' - \eqn{S_{ij}} - Kaplan-Meier estimate of survival in combined
#' treatment groups immediately prior to time \eqn{t_{ij}}.
#' - \eqn{\rho, \gamma} - Real parameters for Fleming-Harrington test.
#' - \eqn{X_i} - Numerator for signed logrank test in stratum \eqn{i}
#' \deqn{X_i = \sum_{j=1}^{d_{i}} S_{ij}^\rho(1-S_{ij}^\gamma)(O_{ije}-E_{ije})}
#' - \eqn{V_{ij}} - Variance used in denominator for Fleming-Harrington
#' weighted logrank tests
#' \deqn{V_i = \sum_{j=1}^{d_{i}} (S_{ij}^\rho(1-S_{ij}^\gamma))^2V_{ij})}
#' The stratified Fleming-Harrington weighted logrank test is then computed as:
#' \deqn{z = \sum_i X_i/\sqrt{\sum_i V_i}.}
#'
#' @examples
#' # ---------------------- #
#' # Example 1 #
#' # Use dataset generated #
#' # by simtrial #
#' # ---------------------- #
#' x <- sim_pw_surv(n = 200) |> cut_data_by_event(100)
#'
#' # Example 1A: WLR test with FH wights
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5))
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5), return_variance = TRUE)
#'
#' # Example 1B: WLR test with MB wights
#' x |> wlr(weight = mb(delay = 4, w_max = 2))
#'
#' # Example 1C: WLR test with early zero wights
#' x |> wlr(weight = early_zero(early_period = 4))
#'
#' # Example 1D
#' # For increased computational speed when running many WLR tests, you can
#' # pre-compute the counting_process() step first, and then pass the result of
#' # counting_process() directly to wlr()
#' x <- x |> counting_process(arm = "experimental")
#' x |> wlr(weight = fh(rho = 0, gamma = 1))
#' x |> wlr(weight = mb(delay = 4, w_max = 2))
#' x |> wlr(weight = early_zero(early_period = 4))
#'
#' # ---------------------- #
#' # Example 2 #
#' # Use cumsum dataset #
#' # ---------------------- #
#' x <- data.frame(treatment = ifelse(ex1_delayed_effect$trt == 1, "experimental", "control"),
#' stratum = rep("All", nrow(ex1_delayed_effect)),
#' tte = ex1_delayed_effect$month,
#' event = ex1_delayed_effect$evntd)
#'
#' # Users can specify the randomization ratio to calculate the statistical information under H0
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5), ratio = 2)
#'
#' x |>
#' counting_process(arm = "experimental") |>
#' wlr(weight = fh(rho = 0, gamma = 0.5), ratio = 2)
#'
#' # If users don't provide the randomization ratio, we will calculate the emperical ratio
#' x |> wlr(weight = fh(rho = 0, gamma = 0.5))
#'
#' x |>
#' counting_process(arm = "experimental") |>
#' wlr(weight = fh(rho = 0, gamma = 0.5))
wlr <- function(data, weight, return_variance = FALSE, ratio = NULL) {
UseMethod("wlr", data)
}
#' @rdname wlr
#' @export
wlr.default <- function(data, weight, return_variance = FALSE, ratio = NULL) {
if (!all(c("tte", "event", "stratum", "treatment") %in% colnames(data))) {
stop('Input must have the columns "tte", "event", "stratum", and "treatment"')
}
wlr.tte_data(data = data, weight = weight, return_variance = return_variance, ratio = ratio)
}
#' @rdname wlr
#' @export
wlr.tte_data <- function(data, weight, return_variance = FALSE, ratio = NULL) {
# if the `data` is NOT generated by sim_pw_surv
# - if user input the randomization ratio, we will directly take its values
# - otherwise, we calculate the empirical ratio
if (!"generate_by_simpwsurv" %in% names(attributes(data))) {
if (is.null(ratio)) {
ratio <- sum(data$treatment == "experimental") / sum(data$treatment == "control")
}
} else {
# if the `data` is generated by sim_pw_surv, the take the ratio from the attributes of `data`
ratio <- attributes(data)$ratio
}
x <- data |> counting_process(arm = "experimental")
wlr.counting_process(x, weight, return_variance = FALSE, ratio = ratio)
}
#' @rdname wlr
#' @export
wlr.counting_process <- function(data, weight, return_variance = FALSE, ratio = NULL) {
x <- data
if (is.null(ratio)) {
ratio <- attributes(data)$ratio
}
q_e <- ratio / (1 + ratio)
q_c <- 1 - q_e
# initialize the output
ans <- list()
ans$method <- "WLR"
# calculate z score
if (inherits(weight, "fh")) {
x <- x |> fh_weight(rho = weight$rho, gamma = weight$gamma)
ans$parameter <- paste0("FH(rho=", weight$rho, ", gamma=", weight$gamma, ")")
} else if (inherits(weight, "mb")) {
x <- x |> mb_weight(delay = weight$delay, w_max = weight$w_max)
ans$parameter <- paste0("MB(delay = ", weight$delay, ", max_weight = ", weight$w_max, ")")
} else if (inherits(weight, "early_period")) {
x <- x |> early_zero_weight(early_period = weight$early_period, fail_rate = weight$fail_rate)
ans$parameter <- paste0("Xu 2017 with first ", round(weight$early_period, 4), " months of 0 weights")
}
# calculate the treatment estimation
ans$estimate <- sum(x$o_minus_e * x$weight)
# calculate the se
ans$se <- sqrt(sum(x$var_o_minus_e * x$weight^2))
# calculate z-score
ans$z <- -ans$estimate / ans$se
# calculate the statistcial information
x$event_ctrl <- x$event_total - x$event_trt
weighted_event_trt <- sum(x$event_trt[x$event_trt > 0] * x$weight[x$event_trt > 0]^2)
weighted_event_ctrl <- sum(x$event_ctrl[x$event_ctrl > 0] * x$weight[x$event_ctrl > 0]^2)
ans$info <- (1 / weighted_event_trt + 1 / weighted_event_ctrl)^(-1)
ans$info0 <- sum(x$event_total * q_c * q_e * x$weight^2)
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.