#' Risk-adjusted funnel plot
#'
#' @description This function allows to construct a risk-adjusted funnel plot
#' for comparing survival proportion between instances.
#'
#' @param data A \code{data.frame} with rows representing subjects and the
#' following named columns: \describe{
#' \item{\code{entrytime}:}{time of entry into study (numeric);}
#' \item{\code{survtime}:}{time from entry until event (numeric);}
#' \item{\code{censorid}:}{censoring indicator (0 = right censored, 1 = observed),
#' (integer);}
#' \item{\code{instance}:}{integer or character indicating which instance
#' (f.e. hospital) the observation belongs to.}
#' } and optionally additional covariates used for risk-adjustment.
#' @param ctime Construction time at which the funnel plot
#' should be determined. Maximum possible time used when not specified.
#' @param p0 The baseline failure probability at \code{entrytime + followup} for individuals.
#' If not specified, average failure proportion over whole data is used instead.
#' @param glmmod A generalized linear regression model as produced by
#' the function \code{\link[stats:glm]{glm()}}. Recommended: \cr
#' \code{glm(as.formula("(survtime <= followup) & (censorid == 1) ~ covariates"), data = data)}. \cr
#' Alternatively, a list with the following elements:
#' \describe{
#' \item{\code{formula}:}{a \code{\link[stats:formula]{formula()}} in the form \code{~ covariates};}
#' \item{\code{coefficients}:}{a named vector specifying risk adjustment coefficients
#' for covariates. Names must be the same as in \code{formula} and colnames of \code{data}.}
#' }
#' @param followup The followup time for every individual. At what time
#' after subject entry do we consider the outcome?
#' @param conflev A vector of confidence levels of interest. Default is c(0.95, 0.99).
#'
#' @return An object of class "funnelplot" containing:
#' \itemize{
#' \item \code{data}: A \code{data.frame} containing:
#' \describe{
#' \item{\code{instance}:}{instance number/name;}
#' \item{\code{observed}:}{observed number of failures at instance;}
#' \item{\code{expected}:}{expected (risk-adjusted) number of failures at instance;}
#' \item{\code{numtotal}}{total number of individuals considered at this instance;}
#' \item{\code{p}:}{(risk-adjusted) proportion of failure at instance;}
#' \item{\code{conflevels}:}{worse/normal/better performance than expected at
#' specified confidence levels.}
#' }
#' \item \code{call}: the call used to obtain output
#' \item \code{plotdata}: data used for plotting confidence intervals
#' \item \code{conflev}: specified confidence level(s)
#' \item \code{p0}: (Estimated) baseline failure probability
#' }
# There are \code{\link[cgrcusum:plot.funnelplot]{plot}} and
# \code{\link[cgrcusum:summary.funnelplot]{summary}} methods for "funnelplot" objects.
#'
#' @importFrom stats predict.glm
#' @importFrom stats qnorm
#' @export
#'
#' @author Daniel Gomon
#' @family quality control charts
#' @seealso \code{\link[cgrcusum]{plot.funnelplot}}, \code{\link[cgrcusum]{summary.funnelplot}}
#'
#'
#' @examples
#' #Determine a risk-adjustment model using a generalized linear model.
#' #Outcome (survival in first 100 days) is regressed on the available covariates:
#' exprfitfunnel <- as.formula("(survtime <= 100) & (censorid == 1)~ age + sex + BMI")
#' surgerydat$instance <- surgerydat$hosp_num
#' glmmodfun <- glm(exprfitfunnel, data = surgerydat, family = binomial(link = "logit"))
#' #Determine the necessary values to produce a funnel plot
#' funnel <- funnel_plot(data = surgerydat, ctime = 3*365, glmmod = glmmodfun, followup = 100)
#' #Produce a funnel plot!
#' plot(funnel)
funnel_plot <- function(data, ctime, p0, glmmod, followup, conflev = c(0.95, 0.99)){
entrytime <- instance <- NULL
call <- match.call()
#Basic data checks (global for BK, CGR and Bernoulli & funnel)
if(missing(data)){
stop("Please provide data to construct chart.")
} else{
data <- check_data(data)
}
#First perform a logistic regression to obtain coefficients for the Risk-adjustment, then specify them later
#dat has to be a dataframe containing at least entrytime (time of entry), survtime (time until failure), and additionally the covariates to RA on, it also has to contain $instance indicating the hospital in question
#glmmodel is the risk-adjustment model, either an object of class "glm" or $formula and $coefficients
#followup is time until which we consider outcomes, usually 365 (1 year) as we consider 1 year post transplant
#Specify institute name or number in dat$instance
#conflev indicates the confidence levels at which to plot the boundaries
#time is the chronological time at which the FUNNEL chart should be constructed, we remove non-qualifying cases
if(!missing(ctime)){
newdata <- subset(data, entrytime + followup <= ctime)
} else{
newdata <- data
}
if(missing(p0)){
warning("No value provided for null (hypothesis) failure probability. Determining using average over whole data set.", immediate. = TRUE)
p0 <- length(which((newdata$survtime <= followup) & (newdata$censorid == 1)))/length(newdata$survtime)
}
plotframe <- data.frame(instance = character(), observed = double(), expected = double(), numcases = double())
for(j in unique(newdata$instance)){
tempdata <- subset(newdata, instance == j)
tempnum <- length(tempdata$survtime)
if(!missing(glmmod)){
if(inherits(glmmod, "glm")){
tempprobs <- predict(glmmod, newdata = tempdata, type = "response")
tempexpec <- sum(tempprobs)
} else{
mmatrix <- model.matrix(glmmod$formula, tempdata)
coeffs <- glmmod$coefficients[colnames(mmatrix)]
tempexpec <- sum(c(1/(1 + exp(-mmatrix %*% coeffs))))
}
} else{
tempexpec <- tempnum * p0
}
tempexpec <- ifelse(tempexpec > nrow(tempdata), nrow(tempdata), tempexpec)
tempobs <- length(which((tempdata$survtime <= followup) & (tempdata$censorid == 1)))
temprow <- data.frame(as.character(j), tempobs, tempexpec, tempnum)
plotframe <- rbind(plotframe, temprow)
}
colnames(plotframe) = c("instance", "observed", "expected", "numtotal")
plotframe$p <- plotframe$observed/plotframe$expected * p0
plotframe$p <- ifelse(plotframe$p > 1, 1, plotframe$p)
boundplotframe <- data.frame(number = double(),conflev = double(),lower = double(), upper = double())
plotseq <- seq(max(1, min(plotframe$numtotal)-10), max(plotframe$numtotal) +10, by = 1)
findbounds <- function(t, conflev){
return(c(p0 - qnorm(conflev) * sqrt((p0*(1-p0))/t),p0 + qnorm(conflev) * sqrt((p0*(1-p0))/t)))
}
for(k in 1:length(conflev)){
temprow2 <- data.frame(plotseq, rep(conflev[k], length(plotseq)), t(sapply(plotseq, function(t) findbounds(t, conflev[k]))))
boundplotframe <- rbind(boundplotframe, temprow2)
tempchar <- character(length = nrow(plotframe))
for(i in 1:nrow(plotframe)){
tempbounds <- findbounds(plotframe$numtotal[i], conflev = conflev[k])
if(plotframe$p[i] > tempbounds[2]){
tempchar[i] <- "worse"
} else if(plotframe$p[i] < tempbounds[1]){
tempchar[i] <- "better"
} else{
tempchar[i] <- "normal"
}
}
plotframe <- cbind(plotframe, tempchar)
colnames(plotframe)[ncol(plotframe)] <- as.character(conflev[k])
}
colnames(boundplotframe) = c("numtotal", "conflev", "lower", "upper")
funnelp <- list(data = plotframe,
call = call,
plotdata = boundplotframe,
conflev = conflev,
p0 = p0)
class(funnelp) <- "funnelplot"
funnelp
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.