Nothing
#' @title Risk-adjusted Bernoulli CUSUM
#'
#' @description This function can be used to construct a risk-adjusted Bernoulli
#' CUSUM chart for survival data.
#' It requires the specification of one of the following combinations of parameters
#' as arguments to the function:
#' \itemize{
#' \item \code{glmmod} & \code{theta}
#' \item \code{p0} & \code{theta}
#' \item \code{p0} & \code{p1}
#' }
#'
#'
#' @param data A \code{data.frame} containing the following named columns for each subject:
#' \describe{
#' \item{\code{entrytime}:}{time of entry into study (numeric);}
#' \item{\code{survtime}:}{time from entry until event (numeric);}
#' \item{\code{censorid}:}{censoring indicator (0 = right censored, 1 = observed)
#' (integer);}
#' } and optionally additional covariates used for risk-adjustment.
#' @param followup The value of the follow-up time to be used to determine event time.
#' Event time will be equal to \code{entrytime + followup} for each subject.
#' @param glmmod Generalized linear regression model used for risk-adjustment as produced by
#' the function \code{\link[stats:glm]{glm()}}. Suggested: \cr
#' \code{glm(as.formula("(survtime <= followup) & (censorid == 1) ~ covariates"), data = data)}. \cr
#' Alternatively, a list containing the following elements:
#' \describe{
#' \item{\code{formula}:}{a \code{\link[stats:formula]{formula()}} in the form \code{~ covariates};}
#' \item{\code{coefficients}:}{a named vector specifying risk adjustment coefficients
#' for covariates. Names must be the same as in \code{formula} and colnames of \code{data}.}
#' }
#' @param theta The \eqn{\theta}{\theta} value used to specify the odds ratio
#' \eqn{e^\theta}{e^\theta} under the alternative hypothesis.
#' If \eqn{\theta >= 0}{\theta >= 0}, the chart will try to detect an increase
#' in hazard ratio (upper one-sided). If \eqn{\theta < 0}{\theta < 0},
#' the chart will look for a decrease in hazard ratio (lower one-sided).
#' Note that \deqn{p_1 = \frac{p_0 e^\theta}{1-p_0 +p_0 e^\theta}.}{p1 = (p0 * e^\theta)/(1-p0+p0 * e^\theta).}
#' @param p0 The baseline failure probability at \code{entrytime + followup} for individuals.
#' @param p1 The alternative hypothesis failure probability at \code{entrytime + followup} for individuals.
#' @param h (optional): Control limit to be used for the procedure.
#' @param stoptime (optional): Time after which the value of the chart should no longer be determined.
#' @param assist (optional): Output of the function \code{\link[success:parameter_assist]{parameter_assist()}}
#' @param twosided (optional): Should a two-sided Bernoulli CUSUM be constructed?
#' Default is \code{FALSE}.
#'
#'
#' @details The Bernoulli CUSUM chart is given by
#' \deqn{S_n = \max(0, S_{n-1} + W_n),}{S_n = max(0, S_{n-1} + W_n),} where
#' \deqn{W_n = X_n \ln \left( \frac{p_1 (1-p_0)}{p_0(1-p_1)} \right) + \ln \left( \frac{1-p_1}{1-p_0} \right)}{W_n = X_n ln((p_1 * (1-p_0))/(p_0 * (1-p_1))) + ln((1-p_1)/(1-p_0))}
#' and \eqn{X_n}{X_n} is the outcome of the \eqn{n}{n}-th (chronological) subject in the data. In terms of the Odds Ratio:
#' \deqn{W_n = X_n \ln \left( e^\theta \right) + \ln \left( \frac{1}{1-p_0 + e^\theta p_0} \right)}{W_n = X_n ln(exp(theta)) + ln((1)/(1-p_0 - exp(theta) * p_0))}
#' For a risk-adjusted procedure (when \code{glmmod} is specified), a patient specific baseline failure probability \eqn{p_{0i}}{p_(0i)} is modelled using logistic regression first.
#' Instead of the standard practice of displaying patient numbering on the
#' x-axis, the time of outcome is displayed.
#'
#'
#' @return An object of class \code{bercusum} containing:
#' \itemize{
#' \item \code{CUSUM}: A \code{data.frame} containing the following named columns:
#' \describe{
#' \item{\code{time}:}{times at which chart is constructed;}
#' \item{\code{value}:}{value of the chart at corresponding times;}
#' \item{\code{numobs}:}{number of observations at corresponding times.}
#' }
#' \item \code{call}: the call used to obtain output;
#' \item \code{glmmod}: coefficients of the \code{\link[stats:glm]{glm()}} used
#' for risk-adjustment, if specified;
#' \item \code{stopind}: indicator for whether the chart was stopped by the
#' control limit.
#' }
#'
# There are \code{\link[cgrcusum:plot.bercusum]{plot}} and
# \code{\link[cgrcusum:runlength.bercusum]{runlength}} methods for "bercusum" objects.
#'
#' @importFrom stats predict.glm
#' @importFrom stats model.matrix
#' @export
#'
#' @author Daniel Gomon
#'
#' @seealso \code{\link[success]{plot.bercusum}}, \code{\link[success]{runlength.bercusum}}
#'
#'
#' @examples
#' #We consider patient outcomes 100 days after their entry into the study.
#' followup <- 100
#' #Determine a risk-adjustment model using a generalized linear model.
#' #Outcome (failure within 100 days) is regressed on the available covariates:
#' exprfitber <- as.formula("(survtime <= followup) & (censorid == 1)~ age + sex + BMI")
#' glmmodber <- glm(exprfitber, data = surgerydat, family = binomial(link = "logit"))
#' #Construct the Bernoulli CUSUM on the 1st hospital in the data set.
#' bercus <- bernoulli_cusum(data = subset(surgerydat, unit == 1), glmmod = glmmodber,
#' followup = followup, theta = log(2))
#' #Plot the Bernoulli CUSUM
#' plot(bercus)
bernoulli_cusum <- function(data, followup, glmmod, theta, p0, p1, h, stoptime,
assist, twosided = FALSE){
entrytime <- otime <- NULL
if(!missing(assist)){
list2env(assist, envir = environment())
}
call <- match.call()
#exp(theta) is the Odds Ratio under the alternative hypothesis
#Supply either of the following combinations:
#1. glmmodel + theta 2. p0 + theta 3. p0 + p1
#Relationship between p1 and theta: p1 = (p0*exp(theta))/((1-p0)*(1+p0*exp(theta)))
#First perform a logistic regression model on in-control data to obtain RA probs
#data must contain entrytime, survtime and the covariates to RA on
#glmmodel must be either of class glm or contain $formula and $coefficients
#stoptime is the time until which the CUSUM chart should be constructed
#------------------------------DATA CHECKS----------------------------------------#
#Basic data checks (global for BK, CGR and Bernoulli)
if(missing(data)){
stop("Please provide data to construct chart.")
} else{
data <- check_data(data)
}
#Check that followup is a numeric value greater than 0
if(!all(is.numeric(followup), length(followup) == 1, followup > 0)){
stop("Argument followup must be a single numeric value larger than 0.")
}
if(!missing(stoptime)){
data <- subset(data, entrytime + followup <= stoptime)
}
#---------------------------FUNCTION BODY---------------------------#
#Boolean indicating whether chart has been stopped by control limit h
stopind = FALSE
hnull <- missing(h)
#Some checks:
if(nrow(data) == 0){
warning("No failures observed in specified time frame.
Decrease 'followup' or consider a larger time frame for construction.
Returning trivial chart.")
Gt <- data.frame(time = c(0), value = c(0), numobs = c(0))
colnames(Gt) = c("time", "value", "numobs")
Ber <- list(CUSUM = Gt,
call = call,
stopind = stopind)
if(!missing(glmmod)){
Ber$glmmod <- glmmod$coefficients
}
if(!missing(h)){Ber$h <- h}
class(Ber) <- "bercusum"
Ber
return(Ber)
} else{
min_entrytime <- min(data$entrytime)
}
#Order the data by subject entry time
data <- data[order(data$entrytime),]
#Determine whether patient had failure. Censored observations do
#not count as failures
data$outcome <- as.integer((data$survtime <= followup) & (data$censorid == 1))
data$otime <- data$entrytime + followup
j <- 1
numobs <- 0
if(!missing(p1)){
if(missing(p0) & missing(theta)){
stop("Please also provide a value for p0 or theta.")
} else if(missing(p0) & !missing(theta)){
p0 <- p1/(exp(theta) - exp(theta)*p1 + p1)
}
theta <- log((p1*(1-p0))/(p0*(1-p1)))
}
if(isTRUE(twosided)){
theta = abs(theta)
}
#If twosided chart is required, determine the chart in two directions
if(isTRUE(twosided)){
Gt <- data.frame(time = c(min_entrytime), val_up = c(0), val_down = c(0), numobs = c(0))
Gtval_up <- 0
Gtval_down <- 0
if(!hnull && length(h) == 1){
h <- sort(c(-h, h))
} else if(!hnull && length(h) == 2){
if(!all(sign(sort(h)) == c(-1, 1))){
stop("When specifying 2 control limits the two values should have reverse signs.")
} else{
h <- sort(h)
}
} else if(!hnull && length(h) > 2){
stop("Please provide 1 or 2 values for the control limit.")
}
} else if(isFALSE(twosided)){
Gt <- data.frame(time = c(min_entrytime), value = c(0), numobs = c(0))
Gtval <- 0
if(!hnull){
if(length(h) > 1){
stop("Please provide only 1 value for the control limit")
}
if(theta >= 0){
h = abs(h)
} else{
h = -abs(h)
}
}
}
#pre-calculate risk-adjustment
if(!missing(glmmod)){
if(inherits(glmmod, "glm")){
fixprobs <- predict(glmmod, newdata = data, type = "response")
} else{
mmatrix <- model.matrix(glmmod$formula, data)
coeffs <- glmmod$coefficients[colnames(mmatrix)]
fixprobs <- c(1/(1 + exp(-mmatrix %*% coeffs)))
}
}
#Loop over all unique observation times and determine value of the chart
for(i in unique(data$otime)){
#Only interested in subjects with failure time at single time point
tempdata_ind <- which(data$otime == i)
tempdata <- data[tempdata_ind,]
numobs <- numobs + nrow(tempdata)
#If risk-adjustment has been specified, calculate risk scores.
#Otherwise, use specified failure probabilities
if(!missing(glmmod)){
tempprobs <- fixprobs[tempdata_ind]
tempsecondval <- sum(log(1/(1-tempprobs + exp(theta)*tempprobs)))
if(isTRUE(twosided)){
tempsecondval_down <- sum(log(1/(1-tempprobs + exp(-theta)*tempprobs)))
}
}else if(!missing(p0)){
if(!missing(theta)){
tempsecondval <- log((1/(1-p0 + exp(theta)*p0))^(nrow(tempdata)))
if(isTRUE(twosided)){
tempsecondval_down <- log((1/(1-p0 + exp(-theta)*p0))^(nrow(tempdata)))
}
} else if(!missing(p1)){
tempsecondval <- log(((1-p1)/(1-p0))^(nrow(tempdata)))
}
} else{ stop("Please supply a value of theta or p1 or a glmmod")}
#Determine chart values
if(isTRUE(twosided)){
#Determine W_n to update the value of the CUSUM with
Wn_upper <- sum(tempdata$outcome)*theta + tempsecondval
Wn_lower <- -sum(tempdata$outcome)*theta + tempsecondval_down
Gtval_up <- max(0, Gtval_up + Wn_upper)
Gtval_down <- min(0, Gtval_down - Wn_lower)
Gt <- rbind(Gt, c(i, Gtval_up, Gtval_down, numobs))
} else if(isFALSE(twosided)){
#Determine W_n to update the value of the CUSUM with
Wn <- sum(tempdata$outcome)*theta + tempsecondval
if(theta >= 0){
Gtval <- max(0, Gtval + Wn)
} else if(theta < 0){
Gtval <- min(0, Gtval - Wn)
}
Gt <- rbind(Gt, c(i, Gtval, numobs))
}
#Determine whether to stop if h is specified
if (!hnull){
if(isTRUE(twosided)){
if(length(h) == 2){
if( (Gtval_up >= h[2]) | (Gtval_down <= h[1]) ) {stopind = TRUE; break}
} else if(length(h) == 1){
if( (abs(Gtval_up) >= abs(h)) | (abs(Gtval_down) >= abs(h)) ) {stopind = TRUE; break}
}
} else if(isFALSE(twosided)){
if( abs(Gtval) >= abs(h) ) {stopind = TRUE; break}
}
}
j <- j+1
}
if(isTRUE(twosided)){
colnames(Gt) <- c("time", "val_up", "val_down", "numobs")
} else if(isFALSE(twosided)){
colnames(Gt) = c("time", "value", "numobs")
}
Ber <- list(CUSUM = Gt,
call = call,
stopind = stopind)
if(!missing(glmmod)){
Ber$glmmod <- glmmod$coefficients
}
if(!missing(h)){Ber$h <- h}
class(Ber) <- "bercusum"
Ber
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.