Nothing
#' Calculates estimates of the mean cost with censored data
#'
#' @description This function calculates the mean cost for right-censored cost
#' data over a period of L time units (days, months, years,...)
#'
#' @details The function returns four estimates. The first two are simple and biased
#' downwards, and included for comparison. The estimates are:
#'
#' - AS: "Available Sample estimator" - The simple sample mean
#'
#' - CC: "Complete Case estimator" - The mean of fully observed cases
#'
#' - BT: "Weighted Complete Case estimator" - Bang and Tsiatis's estimator
#'
#' - ZT: "Weighted Available estimator" - Zhao and Tian's estimator
#'
#' The function needs the following in a dataframe:
#'
#' - id: The id separating each individual
#'
#' - cost: The total cost, or if start and stop provided the specific cost
#'
#' - start: Start of cost
#'
#' - stop: End of cost, if one time cost then start = stop
#'
#' - delta: Event variable, 1 = event, 0 = no event
#'
#' - surv: Survival
#'
#' @param x A dataframe with columns: id, cost, delta and surv. If Cost history is available it can be specified by: start and stop,
#' @param L Limit. Mean cost is calculated up till L, if not specified L = max(surv)
#' @param addInterPol This parameter affects the interpolation of cost between two observed times. Defaults to zero.
#'
#' @return An object of class "ccobject".
#'
#' @examples
#' hcost
#' ccmean(hcost, L = 1461, addInterPol = 1)
#'
#' @references
#' \insertRef{Bang2000}{ccostr}
#'
#' \insertRef{Zhao2001}{ccostr}
#'
#' @export
#' @importFrom Rdpack reprompt
#' @importFrom rlang .data
#' @importFrom data.table as.data.table := setDTthreads
#' @import ggplot2 dplyr survival msm knitr tibble
ccmean <- function(x, L = max(x$surv), addInterPol = 0) {
if( !("id" %in% names(x)) | !("cost" %in% names(x)) | !("delta" %in% names(x)) | !("surv" %in% names(x)) )
stop('Rename colums to: "id", "cost", "delta" and "surv"')
#################################################################
## section 1: ##
## Basic adjustments ##
#################################################################
maxsurv <- max(x$surv) # if L is not set this is equal to limit
## Adjust cost and survival times for data with/without cost history
if( ("start" %in% names(x)) & ("stop" %in% names(x)) ) { #With Cost history
# Subset to estimation period
x$delta[x$surv >= L] <- 1
x$surv <- pmin(x$surv, L)
x <- subset(x, x$start <= L)
# Adjust overlapping costs and arranging data
x <- x %>%
mutate(cost = ifelse(.data$stop > .data$surv,
.data$cost * ((.data$surv - .data$start + addInterPol)/(.data$stop - .data$start + addInterPol)),
.data$cost),
stop = pmin(.data$stop, L)) %>%
arrange(.data$surv, .data$delta)
# Some calculations do not use cost history, so collapsed by ID
xf <- x %>%
group_by(.data$id) %>%
summarize(cost = sum(.data$cost, na.rm = TRUE),
delta = last(.data$delta),
surv = first(.data$surv))
} else if (length(x$id) > length(unique(x$id))) {
stop('No cost history but non-unique id tags')
} else { # Without cost history
message('No cost history found, can be set by: "start" and "stop"')
xf <- x %>%
mutate(cost = ifelse(.data$surv > L,
.data$cost * ((L + addInterPol)/(.data$surv + addInterPol)),
.data$cost),
delta = as.numeric(.data$surv >= L | .data$delta == 1),
surv = pmin(.data$surv, L)) %>%
arrange(.data$surv, .data$delta)
}
#################################################################
## section 2: ##
## Naive (Avaiable Sample estimator) ##
#################################################################
# Basic naive estimate from mean of the complete samples
AS <- mean(xf$cost)
AS_var <- var(xf$cost) / nrow(xf)
# Results of AS
AS_full <- c(AS,
AS_var,
sqrt(AS_var),
AS - 1.96 * sqrt(AS_var),
AS + 1.96 * sqrt(AS_var))
#################################################################
## section 3: ##
## Naive (Complete Case estimator) ##
#################################################################
# Basic naive estimate from mean of the complete cases
CC <- mean(xf$cost[xf$delta == 1])
CC_var <- var(xf$cost[xf$delta == 1]) / sum(xf$delta)
# Results of CC
CC_full <- c(CC,
CC_var,
sqrt(CC_var),
CC - 1.96 * sqrt(CC_var),
CC + 1.96 * sqrt(CC_var))
#################################################################
## section 4: ##
## Lin's estimator (1997) ##
#################################################################
# WORKING ESTIMATOR, BUT DUE TO SIMILARITY WITH BT IT IS NOT ACTIVE
#
# # Kaplan-meier estimate for censoring distribution get censoring times to define intervals
# sc <- summary(survfit(Surv(xf$surv, xf$delta == 0) ~ 1))
# censBreaks <- c(0, sc$time, Inf)
#
# # calculate average costs of patients deceased within each interval
# a <- subset(xf, delta == 1) %>%
# mutate(ints = cut(surv, breaks = censBreaks, dig.lab = 5)) %>%
# group_by(ints) %>%
# summarise(mean = mean(cost))
#
# # Get survival times for intervals
# sd <- survfit(Surv(xf$surv, xf$delta == 1) ~ 1)
# intLow <- as.numeric(gsub("\\(", "", sapply(strsplit(as.character(a$ints), ","), function(x) x[[1]])))
# intHigh <- as.numeric(gsub("\\]", "", sapply(strsplit(as.character(a$ints), ","), function(x) x[[2]])))
# svLow <- summary(sd, times = intLow)$surv
# svHigh <- c(summary(sd, times = intHigh)$surv)
# if(length(svHigh) < length(svLow)){ ## Add zero if last value of intHigh is Inf
# svHigh <- c(svHigh,0)
# }
#
# # Gathering the data in a new dataframe
# d <- data.frame(a, "survDif" = svLow-svHigh)
#
# # calculating Lin's T estimator of total costs
# LinT <- sum(d$survDif*d$mean, na.rm=T)
#################################################################
## section 5: ##
## Bang and Tsiatis's estimator (2000) ##
#################################################################
# Kaplan-Meier estimate for the censoring distribution
sc <- summary(survfit(Surv(xf$surv, xf$delta == 0) ~ 1), times = xf$surv)
sct <- data.frame(sc$time, sc$surv)
sct$sc.surv[sct$sc.surv == 0] <- min(sct$sc.surv[sct$sc.surv != 0])
sct <- unique(sct)
# Kaplan-Meier estimate for the survival distribution
s <- summary(survfit(Surv(xf$surv, xf$delta) ~ 1), times = xf$surv)
st <- data.frame(s$time, s$surv)
st <- unique(st)
# Merge probabilities of censoring and survival to date
t <- merge(xf, sct, by.x = "surv", by.y = "sc.time", all.x = T)
t <- merge(t, st, by.x = "surv", by.y = "s.time", all.x = T)
# Calculation of the BT estimator
BT <- mean((t$cost * t$delta) / t$sc.surv)
# Variance of the BT estimator
n <- length(t$cost)
t$GA <- rep(0, n)
t$GB <- rep(0, n)
for(i in 1:n){
if(t$delta[i] == 1) next
t2 <- subset(t, surv >= t$surv[i])
t$GA[i] <- (1 / (n*t$s.surv[i])) * sum(t2$delta * t2$cost^2 / t2$sc.surv)
t$GB[i] <- (1 / (n*t$s.surv[i])) * sum(t2$delta * t2$cost / t2$sc.surv)
}
BT_var <- 1 / n * (mean(t$delta * (t$cost-BT)^2 / t$sc.surv) +
mean(((1 - t$delta) / t$sc.surv^2 ) * (t$GA - t$GB^2)))
# Results of the BT estimator
BT_full <- c(BT,
BT_var,
sqrt(BT_var),
BT - 1.96 * sqrt(BT_var),
BT + 1.96 * sqrt(BT_var))
#################################################################
## section 6: ##
## Zhao and Tian's estimator (2001) ##
#################################################################
if( ("start" %in% names(x)) & ("stop" %in% names(x)) ) {
# Matrice and vectors to be filled with the for loop
runCostMatrix <- matrix(0, nrow = nrow(t), ncol = nrow(t))
t$mcostlsurv <- 0
t$mcostlsurvSq <- 0
setDTthreads(1)
surv <- NULL
start <- NULL # Hack to avoid NSE error in data.table
cost <- NULL
# For each censored individual the cost is calculated for longer
# surviving individuals up till time ti
for(i in 1:nrow(t)){
if(t$delta[i] == 1){
next
} else {
# Use of DT for improved speed
DT <- data.table::as.data.table(x)[start <= t$surv[i]]
DT <- DT[,cost := ifelse(stop > t$surv[i], (cost/(stop - start + addInterPol)) * (t$surv[i] - start + addInterPol), cost)]
t_data2 <- DT[, list(cost = sum(cost), surv = first(surv)), by = list(id)]
# Store in runCostMatrix for kept ids
idIndex <- t$id %in% t_data2$id
ids <- t$id[idIndex]
runCost <- t_data2$cost
names(runCost) <- t_data2$id
runCostMatrix[idIndex, i] <- runCost[as.character(ids)]
# Get mean runCost for longer surviving ids
t$mcostlsurv[i] <- mean(t_data2$cost[t_data2$surv >= t$surv[i]])
t$mcostlsurvSq[i] <- mean(t_data2$cost[t_data2$surv >= t$surv[i]]^2)
}
}
# Calculation of the ZT estimator
ZT <- BT + mean(((1 - t$delta) * ((t$cost - t$mcostlsurv) / t$sc.surv)), na.rm = TRUE)
# Variance of the ZT estimator
n <- nrow(t)
t$gm <- rep(0,n)
t$gmm <- rep(0,n)
for(i in 1:n){
if(t$delta[i] == 1) next
t$gm[i] <- (1 / (n * t$s.surv[i])) * sum(as.numeric(t$surv >= t$surv[i]) * t$delta * runCostMatrix[,i] / t$sc.surv)
t$gmm[i] <- (1 / (n * t$s.surv[i])) * sum(as.numeric(t$surv >= t$surv[i]) * t$delta * t$cost * runCostMatrix[,i] / t$sc.surv)
}
ZT_var <- BT_var - (2 / n^2) * sum(((1 - t$delta) / t$sc.surv^2) * (t$gmm - t$GB * t$gm)) +
(1 / n^2) * sum(((1 - t$delta) / t$sc.surv^2) * (t$mcostlsurvSq - t$mcostlsurv^2))
# Results of the ZT estimator
ZT_full <- c(ZT,
ZT_var,
sqrt(ZT_var),
ZT - 1.96 * sqrt(ZT_var),
ZT + 1.96 * sqrt(ZT_var))
} else {
ZT <- NA
ZT_full <- rep(NA,5)
}
#################################################################
## section 7: ##
## Results ##
#################################################################
# Calculation of median survival time
svl1 <- survival::survfit(Surv(xf$surv, xf$delta == 1) ~ 1)
svl2 <- summary(svl1)[["table"]]
# Results of all estimators are compiled
results <- list(Text = c("ccostr - Estimates of mean cost with censored data"),
Data = data.frame("Observations" = nrow(x),
"Individuals" = nrow(xf),
"FullyObserved" = sum(xf$delta == 1),
"Limits" = L,
"TotalTime" = sum(xf$surv),
"MaxSurvival" = maxsurv,
row.names = "N"),
First = data.frame(AS, CC, BT, ZT),
Estimates = data.frame("AS" = AS_full,
"CC" = CC_full,
"BT" = BT_full,
"ZT" = ZT_full,
row.names = c("Estimate", "Variance", "SE", "0.95LCL", "0.95UCL")),
Survival = svl2
)
# The output is given as an S3 class
class(results) <- "ccobject"
results
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.