Nothing
#' Commenges-Andersen test for heterogeneity
#'
#' @param object A \code{coxph} object with a \code{cluster()} statement in the right-hand side of the formula.
#' @param id Optionally, a vector determining the grouping to be tested. See details.
#'
#' @return A named vector containing the test statistic, variance, and p-value
#' @export
#'
#' @details The Cox model with a \code{+cluster()} statement has the same point estimates
#' as the one without that statmenet. The only difference is in the adjusted standard errors.
#' In some cases, a model with \code{+cluster()} statments can't be fitted. For example, when there
#' are no covariates. In that case, a vector may be passed on in the \code{cluster} argument.
#'
#' @importFrom tibble rownames_to_column
#' @importFrom survival basehaz
#' @examples
#' mcox1 <- coxph(Surv(time, status) ~ rx + sex + cluster(litter),
#' rats, model = TRUE, x = TRUE)
#' ca_test(mcox1)
#'
#' mcox2 <- coxph(Surv(time, status) ~ 1, rats, x = TRUE)
#' ca_test(mcox2, rats$litter)
#' @references Commenges, D. and Andersen, P.K., 1995. Score test of homogeneity for survival data. Lifetime Data Analysis, 1(2), pp.145-156.
ca_test <- function(object, id = NULL) {
# Check input
if(!inherits(object, "coxph"))
warning("input should be a coxph object")
if(is.null(object$model) & is.null(id))
stop("object should be created with model=TRUE or id specified")
if(length(grep("cluster", names(object$model)))==0)
if(is.null(id)) stop("could not find cluster; please specify in the Cox model or id")
if(!is.null(id) & length(grep("cluster", names(object$model)))!=0)
warning("cluster specified in both arguments, ignoring id")
if(!any(names(object) == "x"))
stop("object should be created with x=TRUE")
# the real linear predictors
if(!is.null(object$means)) {
lp <- object$linear.predictors +
as.numeric(object$means %*% object$coefficients)
elp <- exp(lp)
} else {
lp <- object$linear.predictors
elp <- exp(lp)
}
Y <- object$y
if(ncol(Y) == 2) Y <- cbind(0, Y)
Y <- as.data.frame(cbind(Y[,1], Y[,2], Y[,3], elp = elp))
if(length(grep("strata", names(object$model)))==0)
Y$strata <- rep(1, nrow(Y)) else
Y$strata <- object$model[,grep("strata", names(object$model))]
Ysp <- split(Y, Y$strata)
time <- sort(unique(Y[,2]))
timesp <- lapply(Ysp, function(x) sort(unique(x[,2])))
# this gives for every tstart (line variable) after which event time did it come
# indx2 <- mapply(function(x,y) findInterval(x[,1], y),Ysp, timesp, SIMPLIFY = FALSE)
indx2 <- lapply(Ysp, function(x) findInterval(x[,1], time))
# This gives up t0 which time point does each line last
# time_to_stop <- mapply(function(x,y) match(x[,2], y),
# Ysp, timesp, SIMPLIFY = FALSE)
time_to_stop <- lapply(Ysp, function(x) match(x[,2], time))
# Time for S0_ht = for each strata, sum of Yij(t) elp(ij) within that strata
# indx2_1 <- indx2[[1]]
# tts_1 <- time_to_stop[[1]]
# Y1 <- Ysp[[1]]
#
# r1 <- lapply(seq_along(time), function(x) {
# sum(Y1$elp[indx2_1 < x & x <= tts_1])
# }) %>%
# do.call(c, .)
#
# r1 %>% str()
S0_ht <- mapply(function(b,c,d) {
lapply(seq_along(time), function(x) {
sum(b$elp[c < x & x <= d])
})
}, Ysp, indx2, time_to_stop, SIMPLIFY = FALSE) %>%
lapply(function(x) do.call(c, x)) %>%
lapply(function(x) {
x[x==0] <- -3
x
})
# The last part is there to avioid dividing 0 by 0
# Time for pij(s) = Y_{ij}(s) exp(beta' x_{ij}) / S0(s)
# this part means: take Y, indx2 is the tstart coded, time_to_stop
# So within each transition, get a
# (strata) for each time point - get Y(t) * exp(beta'x) as a vector of time for each row
# part1 is a list of length(strata) - list of length(all time points) -
# vectors of all rows and how they contribute to that time point
# p1 <- mapply(function(b,c,d) {
# lapply(seq_along(time), function(x) {
# b$elp * as.numeric(c < x & x <= d)
# })
# }, Ysp, indx2, time_to_stop, SIMPLIFY = FALSE)
pij_ht_rowh <- mapply(function(b,c,d) {
lapply(seq_along(time), function(x) {
b$elp * as.numeric(c < x & x <= d)
})
}, Ysp, indx2, time_to_stop, SIMPLIFY = FALSE) %>%
mapply(function(a,b) {
mapply(function(c,d) c/d, a, b, SIMPLIFY = FALSE)
}, ., S0_ht, SIMPLIFY = FALSE)
# need pij_ht
# pij (strata)(all_times)(alllines_within_strata)
if(!is.null(id) & length(grep("cluster", names(object$model)))==0) {
id <- as.numeric(id)
if(length(id) != nrow(Y)) stop("id not of the same length as the data")
varmat <- object$var
} else {
id <- object$model[,grep("cluster", names(object$model))]
varmat <- object$naive.var
}
order_id <- split(match(id, unique(id)), Y$strata)
# order_id (strata)(alllines_within_strata)
pij_hrowh_t <- pij_ht_rowh %>%
lapply(function(x) do.call(cbind, x)) %>%
lapply(function(x) split(x, 1:nrow(x))) %>%
lapply(function(x) do.call(rbind, x))
# This is each for each strata, a matrix with each row a cluster that exists in that strata
# and each column one time point
pi_ht <- mapply(function(a,b) {
rowsum(a, b, reorder = FALSE)
}, pij_hrowh_t, order_id, SIMPLIFY = FALSE)
# Now to calculate the T statistic
M <- split(object$residuals, Y$strata)
#(strata)
Mi_strata <- mapply(function(a,b) {
rowsum(a, b)
}, M, order_id, SIMPLIFY = FALSE)
Mi_1 <- Mi_strata %>%
lapply(as.data.frame) %>%
lapply(tibble::rownames_to_column) %>%
do.call(rbind, .)
Mi <- as.numeric(rowsum(Mi_1$V1, Mi_1$rowname))
# with(rowsum(V1, rowname)) %>%
# as.numeric() # already by cluster
death <- (Y[, 3] == 1)
deathsp <- split(death, Y$strata)
# number of events per time point within each strata
nevent <- lapply(Ysp, function(x) {
lapply(time, function(tp) x[,2] == tp & x[,3] ==1)
}) %>%
lapply(function(x) {
do.call(c, lapply(x, sum))
})
thirdterm <- pi_ht %>%
lapply(function(x) x^2) %>%
lapply(function(x) apply(x, 2, sum)) %>%
mapply(function(a,b) a * b, ., nevent, SIMPLIFY = FALSE) %>%
lapply(sum) %>%
do.call(sum, .)
T_stat <- sum(Mi^2) - sum(death) + thirdterm
# Now the variance of T_stat
# without strata: Hi(S) = 2 {Mi(s-) - sum_l=1^n Ml(s-) pl(s) - pi(s) + sum_l=1^n pl^2(s)}
# with strata:
# Hih(s) = 2{Mi(s-) - sum_l=1^n Ml(s-) plh(s) - pih(s) + sum_l=1^n plh^2(s)}
# Martingale path for each line
# Calculate baseline cumulative hazard
if(!is.null(object$coefficients))
BH <- basehaz(object, centered = FALSE) else
BH <- basehaz(object)
if(is.null(BH$strata)) BH$strata <- rep(1, nrow(BH))
BHsp <- split(BH, BH$strata)
alltp <- sort(unique(BH$time))
# pos_leftsp <- mapply(function(a,b) findInterval(a[,1], b$time), Ysp, BHsp, SIMPLIFY = FALSE)
# pos_rightsp <- mapply(function(a,b) match(a[,2], b$time), Ysp, BHsp, SIMPLIFY = FALSE)
pos_leftsp <- lapply(Ysp, function(x) findInterval(x[,1], alltp))
pos_rightsp <- lapply(Ysp, function(x) match(x[,2], alltp))
# Idea is to have this at ALL event time points, not only those in the strata
cumhazsp <- lapply(BHsp, function(bh) {
approx(x = bh$time, y = bh$hazard, xout = time, method = "constant", yleft = 0)$y
})
# First the path of the - cumulative hazard for each row
Lij_path <- mapply(function(x,y,cumhaz) {
mapply(function(pos_left, pos_right) {
if(pos_left == 0) {
cumhaz[pos_right:length(cumhaz)] <- cumhaz[pos_right] # cumulative hazard doesn't increase after tstop
return(-cumhaz)
}
cumhaz[(pos_left+1):length(cumhaz)] <- cumhaz[(pos_left+1):length(cumhaz)] - cumhaz[pos_left] # substract the hazard before tstart
cumhaz[1:(pos_left+1)] <- 0 # make it 0 before tstop
cumhaz[pos_right:length(cumhaz)] <- cumhaz[pos_right] # cumulative hazard doesn't increase after tstop
return(-cumhaz) # I THINK
}, x, y, SIMPLIFY = FALSE)
}, pos_leftsp, pos_rightsp, cumhazsp, SIMPLIFY = FALSE)
Lij_elp_path <- mapply(function(a,b) {
mapply(function(x,y) x * y, a, b$elp, SIMPLIFY = FALSE)
}, Lij_path, Ysp, SIMPLIFY = FALSE)
Mij_path <- mapply(function(x,y,z) {
mapply(function(a, pos, delta) {
a[pos:length(a)] <- delta + a[pos]
a
}, x,y,z, SIMPLIFY = FALSE)
}, Lij_elp_path, time_to_stop, deathsp, SIMPLIFY = FALSE) %>%
lapply(function(x) {
lapply(x, function(y) c(0, y[-length(y)]))
})
# Now to get Mi so sum these according to clusters
# Each row is th Mi path of a cluster; the cluster is the name of the row
Mit <- do.call(c, Mij_path) %>%
do.call(rbind, .) %>%
rowsum(., group = do.call(c, order_id), reorder = FALSE)
# Elements of Hih(t)
Mit_all <-
Mit[order(as.numeric(rownames(Mit))),]
# clusters by time points
# pi_ht is clusters by time points
# but I walso clusters that are missing within each strata
# and add them there with zeros.
pi_ht_all <- pi_ht %>%
lapply(function(x) {
rnames <- rownames(x)
allnames <- as.character(unique(do.call(c, order_id)))
missnames <- which(!(allnames %in% rnames))
missmat <- matrix(0, nrow = length(missnames),
ncol = ncol(x),
dimnames = list(allnames[missnames]))
wholemat <- rbind(x, missmat)
wholemat[order(as.numeric(rownames(wholemat))),]
})
# First element
# terms 1 and 3
term13 <- lapply(pi_ht_all, function(x) Mit_all - x)
term2 <- lapply(pi_ht_all, function(x) x * Mit_all) %>%
lapply(function(x) apply(x, 2, sum))
term4 <- lapply(pi_ht_all, function(x) x^2) %>%
lapply(function(x) apply(x, 2, sum))
term24 <- mapply(function(a,b) b - a, term2, term4, SIMPLIFY = FALSE)
H_hi_t <- mapply(function(a,b) {
2 * (t(t(a) + b))
}, term13, term24, SIMPLIFY = FALSE)
# Ihat
Ihat <- H_hi_t %>%
lapply(function(x) x^2) %>%
mapply(function(a,b) a * b,
., pi_ht_all, SIMPLIFY = FALSE) %>%
mapply(function(a,b) {
t(t(a) * b)
}, ., nevent, SIMPLIFY = FALSE) %>%
lapply(function(x) apply(x, 1, sum)) %>%
lapply(sum) %>%
do.call(sum, .)
# Jhat
# X <- object$model
xsp <- split(as.data.frame(object$x), Y$strata)
# lapply(function(x) {
# rnames <- rownames(x)
# allnames <- as.character(unique(id))
# missnames <- which(!(allnames %in% rnames))
# missmat <- matrix(0, nrow = length(missnames), ncol = ncol(x), dimnames = list(allnames[missnames]))
# wholemat <- rbind(x, missmat)
# wholemat[order(as.numeric(rownames(wholemat))),]
# })
zipit <- mapply(function(a,b) {
lapply(a, function(vec) {
vec * b
})
}, xsp, pij_hrowh_t, SIMPLIFY = FALSE) %>%
mapply(function(a,b) {
lapply(a, function(x) rowsum(x, b, reorder = FALSE))
}, ., order_id, SIMPLIFY = FALSE)
# zipit is a list(strata)(covariate)(clusters within strata X total times)
# Add the missing cluster from all the matrices in zipit
zipit_all <- lapply(zipit, function(y) {
lapply(y, function(x) {
rnames <- rownames(x)
allnames <- unique(as.character(do.call(c, order_id)))
missnames <- which(!(allnames %in% rnames))
missmat <- matrix(0, nrow = length(missnames), ncol = ncol(x), dimnames = list(allnames[missnames]))
wholemat <- rbind(x, missmat)
wholemat[order(as.numeric(rownames(wholemat))),]
})
})
# all.equal(zipit_all, zipit)
# In the other place:
# This is kind of how it gets calculated in the other place
# theta2 <- pij_hrowh_t %>%
# lapply(as.data.frame) %>%
# mapply(function(a,b) {
# lapply(b, function(bb) a * bb)
# }, xsp, ., SIMPLIFY = FALSE) %>%
# mapply(function(a,b)
# lapply(a, function(x) rowsum(x, b)),
# ., order_id, SIMPLIFY = FALSE)
#
# qit <- H_hi_t %>%
# lapply(function(x) as.list(as.data.frame(x)))
#
# mapply(function(a,b,c) {
# mapply(function(x,y,z) {
# x * y * z
# },a,b,c, SIMPLIFY = FALSE)
# }, nevent, qit, theta2, SIMPLIFY = FALSE) %>%
# lapply(function(x) do.call(rbind, x)) %>%
# lapply(function(x) apply(x, 2, sum))
# Take just the first cluster
# ver1 <- lapply(1:100, function(clus) {
# qit1 <- qit[[1]] %>%
# lapply(function(x) x[clus]) %>%
# do.call(c, .)
#
# theta21 <- theta2[[1]] %>%
# lapply(function(x) x[clus,]) %>%
# do.call(rbind, .)
#
# (nevent[[1]] * qit1 * theta21) %>% apply(., 2, sum)
# })
#
#
#
# # first cluster
# ver2 <- lapply(1:100, function(clus) {
# H1 <- H_hi_t[[1]][clus,]
# zipit_1 <- as.data.frame(lapply(zipit_all[[1]], function(x) x[clus,]))
# (nevent[[1]] * H1 * zipit_1) %>% apply(., 2, sum)
# })
#
# all.equal(ver1, ver2)
#
# ver1 %>%
# do.call(rbind, .) %>%
# apply(2, sum)
# ver2 %>%
# do.call(rbind, .) %>%
# apply(2, sum)
if(!is.null(varmat)) {
J <- mapply(function(a,b,c) {
lapply(b, function(x) {
sum(apply(a * x, 1, function(y) sum(y *c)))
})
}, H_hi_t, zipit_all, nevent, SIMPLIFY = FALSE) %>%
lapply(function(x) do.call(c, x)) %>%
do.call(rbind, .) %>%
apply(., 2, sum)
} else {
J <- 0
varmat <- 0
}
denominator <- Ihat - t(J) %*% varmat %*% J
c(tstat = T_stat, var = denominator, pval = pchisq(T_stat^2 / denominator, 1, lower.tail = FALSE))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.