# Biostats in epidemiology
# ch 10 work
# 10.1 --------------------------------------------------------------------
# Weibull hazard function
time <- seq(0,4,0.01)
weibull.hazard <- function(alpha, lambda, t){
if(alpha <= 0 || lambda <= 0) stop("alpha and lambda must be positive.")
alpha * lambda * (lambda * t)^(alpha - 1)
}
# wh1 <- weibull.hazard(alpha = 3.0, lambda = 0, t = time)
wh1 <- weibull.hazard(alpha = 3.0, lambda = 1, t = time)
wh2 <- weibull.hazard(alpha = 1.5, lambda = 1, t = time)
wh3 <- weibull.hazard(alpha = 1.0, lambda = 1, t = time)
wh4 <- weibull.hazard(alpha = 0.5, lambda = 1, t = time)
# Fig 10.1(a)
plot(time, wh1, ylim = c(0,4), ylab = "Hazard", xlab = "Time", type="l")
lines(time, wh2, lty = 2)
lines(time, wh3, lty = 3)
lines(time, wh4, lty = 4)
legend("topright", legend = paste("alpha =", c(3.0,1.5,1.0,0.5)), lty= 1:4)
# Weibull survival function
weibull.survival <- function(alpha, lambda, t){
if(alpha <= 0 || lambda <= 0) stop("alpha and lambda must be positive.")
exp(-(lambda * t)^alpha)
}
ws1 <- weibull.survival(alpha = 3.0, lambda = 1, t = time)
ws2 <- weibull.survival(alpha = 1.5, lambda = 1, t = time)
ws3 <- weibull.survival(alpha = 1.0, lambda = 1, t = time)
ws4 <- weibull.survival(alpha = 0.5, lambda = 1, t = time)
# Fig 10.1(b)
plot(time, ws1, ylim = c(0,1), ylab = "Survival Probability", xlab = "Time", type="l")
lines(time, ws2, lty = 2)
lines(time, ws3, lty = 3)
lines(time, ws4, lty = 4)
legend("topright", legend = paste("alpha =", c(3.0,1.5,1.0,0.5)), lty= 1:4)
# Example 10.1
# survreg's scale = 1/(rweibull shape)
# survreg's intercept = log(rweibull scale)
dat <- read.csv("BreastCancer.csv")
library(flexsurv)
fitw <- flexsurvreg(formula = Surv(time, status) ~ 1, data = dat, dist="weibull")
# Fig 10.2(b)
plot(fitw, ci=FALSE)
fitws <- summary(fitw)
fitws[[1]]$est
fite <- flexsurvreg(formula = Surv(time, status) ~ 1, data = dat, dist="exp")
# Fig 10.2(a)
plot(fite, ci=FALSE)
fites <- summary(fite)
fites[[1]]$est
library(survival)
m1 <- survreg(Surv(time, status) ~ 1, data=dat, dist = "weibull")
sm1 <- summary(m1)
1/sm1$scale # alpha-hat (shape)
1/exp(sm1$coefficients[1]) # lambda-hat (rate)
# cox-oakes test of exponentiality
# Null: alpha = 1, Weibull shape parameter is 1, hazard function is constant
d <- sum(dat$status) # number of deaths in cohort
n <- sum(dat$time) # total amount of time cohort was under observation
lam0.hat <- d/n # deaths per person-month
si <- log(lam0.hat * dat$time)
ei.hat <- lam0.hat * dat$time
num <- (d + sum(si * (dat$status - ei.hat)))^2
denom <- d + sum((si^2)*ei.hat) - (sum(si*ei.hat))^2 / d
X.co <- num/denom
pchisq(X.co, df = 1, lower.tail = FALSE)
# provides moderate evidence that exponential assumption may not be satisfied
# Funciton: cox-oakes test
cox.oakes <- function(time,status) {
dname <- deparse(substitute(time))
d <- sum(status) # number of deaths in cohort
n <- sum(time) # total amount of time cohort was under observation
lam0.hat <- d/n # deaths per person-month
names(lam0.hat) <- "hazard rate"
si <- log(lam0.hat * time)
ei.hat <- lam0.hat * time
num <- (d + sum(si * (status - ei.hat)))^2
denom <- d + sum((si^2)*ei.hat) - (sum(si*ei.hat))^2 / d
X.co <- num/denom
p.value <- pchisq(X.co, df = 1, lower.tail = FALSE)
RVAL <- list(statistic = c(statistic = X.co), p.value = p.value,
estimate = lam0.hat,
method = "Cox-Oakes Test of Exponentiality",
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
cox.oakes(time = breast.survival$time, status = breast.survival$status)
c.out <- cox.oakes(time = breast.survival$time, status = breast.survival$status)
str(c.out)
# example 10.2
dat2 <- read.csv("ovarian.csv")
# Weibull model
m2 <- survreg(Surv(time, status) ~ 1, data=dat2, dist = "weibull", subset = grade=="High")
sm2 <- summary(m2)
1/sm2$scale # alpha-hat (shape)
1/exp(sm2$coefficients[1]) # lambda-hat (rate)
# Exponential model
m3 <- survreg(Surv(time, status) ~ 1, data=dat2, dist = "exponential", subset = grade=="High")
sm3 <- summary(m3)
1/exp(sm3$coefficients[1]) # lambda-hat (rate); alpha-hat (shape) is 1
# Fig 10.3(a)
library(flexsurv)
fite <- flexsurvreg(formula = Surv(time, status) ~ 1, data = dat2, dist="exp")
plot(fite, ci=FALSE)
# Fig 10.3(b)
fitw <- flexsurvreg(formula = Surv(time, status) ~ 1, data = dat2, dist="weibull")
plot(fitw, ci=FALSE)
cox.oakes(time = dat2$time[dat2$grade=="High"], status = dat2$status[dat2$grade=="High"])
# exact methods for single sample
# Example 10.3
# with n assumed constant (where n is total person time in study), d is a
# poisson random variable with parameter nu = lambda * n
# Null: lambda_0 = 0.4
d <- 2
n <- 10
ppois(q = 2, lambda = n*0.4) * 2
# Table 10.1
d <- 0:12
`D_=_d` <- dpois(d, lambda = n * 0.4)
`D_<=_d` <- ppois(d, lambda = n * 0.4)
`D_>=_d` <- ppois(d - 1, lambda = n * 0.4, lower.tail = FALSE)
cbind(d, `D_=_d`, `D_<=_d`, `D_>=_d`)
1 - ppois(q = 1, lambda = 10*0.024)
ppois(q = 2, lambda = 10*0.723)
f1 <- function(x)1 - ppois(q = 1, lambda = 10*x) - 0.025
f2 <- function(x)ppois(q = 2, lambda = 10*x) - 0.025
uniroot(f1, interval = c(0,10))
uniroot(f2, interval = c(0,10))
# exact.rate.ci <- function(d,n,ci=0.95){
# alpha <- (1-ci)/2
# f1 <- function(x)1 - ppois(q = d-1, lambda = n*x) - alpha
# f2 <- function(x)ppois(q = d, lambda = n*x) - alpha
# f1.out <- uniroot(f1, interval = c(0,n))
# f2.out <- uniroot(f2, interval = c(0,n))
# list(lower = f1.out$root, upper = f2.out$root)
# }
# exact.rate.ci(d = 2, n = 10)
# exact.rate.ci(d = 2, n = 10, ci=0.90)
exact.rate.test <- function(time, status, null=0, conf.level = 0.95){
dname <- deparse(substitute(time))
alternative <- "two.sided"
# estimate
d <- sum(status, na.rm = TRUE)
n <- sum(time, na.rm = TRUE)
est <- d/n
names(est) <- "hazard rate"
names(null) <- names(est)
# CI
alpha <- (1-conf.level)/2
f1 <- function(x)1 - ppois(q = d-1, lambda = n*x) - alpha
f2 <- function(x)ppois(q = d, lambda = n*x) - alpha
f1.out <- uniroot(f1, interval = c(0,n))
f2.out <- uniroot(f2, interval = c(0,n))
CINT <- c(f1.out$root, f2.out$root)
attr(CINT, "conf.level") <- conf.level
# test
p.value <- min( min(ppois(q = d, lambda = n*null), 1 - ppois(q = d-1, lambda = n*null)) * 2, 1)
RVAL <- list(p.value = p.value, estimate = est, null.value = null,
conf.int = CINT, alternative = alternative,
method = "Exact Hazard Rate Test for a single sample",
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
exact.rate.test(time = 10, status = 2, null = 0.4)
with(breast.survival, exact.rate.test(time = time, status = status))
with(breast.survival, exact.rate.test(time = time, status = status, null = 0.01))
hout <- with(breast.survival, exact.rate.test(time = time, status = status, null = 0.01))
str(hout)
# asympotic methods for single sample
rate.test <- function(time, status, null=1, conf.level = 0.95, explicit=TRUE){
dname <- deparse(substitute(time))
alternative <- "two.sided"
# estimate
d <- sum(status, na.rm = TRUE)
n <- sum(time, na.rm = TRUE)
est <- d/n
names(est) <- "hazard rate"
names(null) <- names(est)
# CI
alpha <- (1-conf.level)/2
if(explicit){
CINT <- est + c(-1,1)*((qnorm(1 - alpha)*sqrt(d))/n)
} else {
a <- n^2
b <- -n*(2*d + qnorm(1 - alpha)^2)
c <- d^2
CINT <- (-b + c(-1,1)*sqrt(b^2 - 4*a*c))/(2*a)
}
attr(CINT, "conf.level") <- conf.level
# test
STATISTIC <- ((d - null*n)^2)/(null*n)
names(STATISTIC) <- "X-squared"
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
RVAL <- list(statistic = STATISTIC, p.value = p.value, estimate = est,
null.value = null,
conf.int = CINT, alternative = alternative,
method = "Asymptotic Hazard Rate Test for a single sample",
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
# Examples 10.5 and 10.6
rate.test(time = 10, status = 2, null = 0.4)
rate.test(time = 25, status = 5, null = 0.4)
rate.test(time = 50, status = 10, null = 0.4)
rate.test(time = 10, status = 2, null = 0.4, explicit = FALSE)
rate.test(time = 25, status = 5, null = 0.4, explicit = FALSE)
rate.test(time = 50, status = 10, null = 0.4, explicit = FALSE)
# Example 10.7
with(breast.survival, rate.test(time = time, status = status))
with(breast.survival, rate.test(time = time, status = status, explicit = FALSE))
# 10.2 --------------------------------------------------------------------
# Poisson methods for unstratified survival data
# Example 10.8
n <- with(breast.survival,tapply(time, receptor.level, sum))
d <- with(breast.survival,tapply(status, receptor.level, sum))
# hazard ratio
hr.hat <- (d[1] * n[2])/(d[2] * n[1])
# cox-oakes for low and high receptor levels
with(breast.survival[breast.survival$receptor.level=="low",],
cox.oakes(time = time, status = status))
with(breast.survival[breast.survival$receptor.level=="high",],
cox.oakes(time = time, status = status))
# stratum specific HR estimates
with(breast.survival[breast.survival$receptor.level=="low",],
rate.test(time = time, status = status, explicit = FALSE))
with(breast.survival[breast.survival$receptor.level=="high",],
rate.test(time = time, status = status, explicit = FALSE))
# HR conf interval
exp(log(hr.hat) + c(-1,1)*qnorm(0.975)*sqrt((1/d[1]) + (1/d[2])))
# Wald and LRT tests of association
# we say there is no association between exposure and survival if lamba_1 = lambda_2
m <- hr.hat*(d[2]/n[2])*n[1] + (d[2]/n[2])*n[2]
e1 <- (n[1]*m)/(n[1] + n[2])
e2 <- (n[2]*m)/(n[1] + n[2])
# Wald test
X_w <- (log(hr.hat)^2 * n[1] * n[2] * m)/((n[1] + n[2])^2)
pchisq(X_w, df = 1, lower.tail = FALSE)
# LRT
X_lr <- 2*(d[1] * log(d[1]/e1) + d[2] * log(d[2]/e2))
pchisq(X_lr, df = 1, lower.tail = FALSE)
# Example 10.9
dat <- breast.survival[breast.survival$stage=="III",]
n <- with(dat,tapply(time, receptor.level, sum))
d <- with(dat,tapply(status, receptor.level, sum))
# hazard ratio
hr.hat <- (d[1] * n[2])/(d[2] * n[1])
# HR conf interval
exp(log(hr.hat) + c(-1,1)*qnorm(0.975)*sqrt((1/d[1]) + (1/d[2])))
m <- hr.hat*(d[2]/n[2])*n[1] + (d[2]/n[2])*n[2]
e1 <- (n[1]*m)/(n[1] + n[2])
e2 <- (n[2]*m)/(n[1] + n[2])
# Wald test
X_w <- (log(hr.hat)^2 * n[1] * n[2] * m)/((n[1] + n[2])^2)
pchisq(X_w, df = 1, lower.tail = FALSE)
# LRT
X_lr <- 2*(d[1] * log(d[1]/e1) + d[2] * log(d[2]/e2))
pchisq(X_lr, df = 1, lower.tail = FALSE)
# moderate evidence for an association between receptor level and survival in the stage II cohort
hazard.ratio.test <- function(time, status, exposure, Wald=TRUE, conf.level=0.95,
exact=FALSE){
dname <- deparse(substitute(time))
alternative <- "two.sided"
n <- tapply(time, exposure, sum)
d <- tapply(status, exposure, sum)
if(d[1] == 0 || d[2] == 0) est <- (d[1] + 0.5) * n2 / (d[2] + 0.5) * n[1]
else est <- (d[1] * n[2]) / (d[2] * n[1])
names(est) <- "hazard ratio"
null <- 1
names(null) <- names(est)
alpha <- (1-conf.level)/2
if(!exact){
# hazard ratio
# HR conf interval
CINT <- exp(log(est) + c(-1,1)*qnorm(1 - alpha)*sqrt((1/d[1]) + (1/d[2])))
attr(CINT, "conf.level") <- conf.level
# Wald and LRT tests of association
m <- est*(d[2]/n[2])*n[1] + (d[2]/n[2])*n[2]
e1 <- (n[1]*m)/(n[1] + n[2])
e2 <- (n[2]*m)/(n[1] + n[2])
# Wald test
if(Wald){
STATISTIC <- (log(est)^2 * n[1] * n[2] * m)/((n[1] + n[2])^2)
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
} else {
# LRT
STATISTIC <- 2*(d[1] * log(d[1]/e1) + d[2] * log(d[2]/e2))
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
}
names(STATISTIC) <- "X-squared"
METHOD <- paste(if(Wald) "Wald" else "Likelihood Ratio", "Test of association")
RVAL <- list(statistic = STATISTIC, p.value = p.value, estimate = est, null.value = null,
conf.int = CINT, alternative = alternative,
method = METHOD,
data.name = dname)
} else {
m <- sum(d)
f1 <- function(x)1 - pbinom(d[1]-1, size = m, prob = x) - alpha
f2 <- function(x)pbinom(d[1], size = m, prob = x) - alpha
f1.out <- uniroot(f1, interval = c(0,1))
f2.out <- uniroot(f2, interval = c(0,1))
CINTpi <- c(f1.out$root, f2.out$root)
CINT <- CINTpi*n[2]/((1 - CINTpi)*n[1]) # HR CI
attr(CINT, "conf.level") <- conf.level
# test
# exact test of association
pi_0 <- (null * n[1]) / (null * n[1] + n[2])
p.value <- min( min(pbinom(q = d[1], size = m, prob = pi_0),
1 - pbinom(q = d[1]-1, size = m, prob = pi_0)) * 2, 1)
RVAL <- list(p.value = p.value, estimate = est, null.value = null,
conf.int = CINT, alternative = alternative,
method = "Exact Hazard Ratio Test of Association",
data.name = dname)
}
class(RVAL) <- "htest"
return(RVAL)
}
# Example 10.8
with(breast.survival, hazard.ratio.test(time, status, receptor.level))
with(breast.survival, hazard.ratio.test(time, status, receptor.level, Wald = FALSE))
hrt.out <- with(breast.survival, hazard.ratio.test(time, status, receptor.level, Wald = FALSE))
str(hrt.out)
# Example 10.9
# Receptor Level-Breast Cancer: Stage III
dat <- breast.survival[breast.survival$stage=="III",]
hazard.ratio.test(time = dat$time, status = dat$status, exposure = dat$receptor.level)
hazard.ratio.test(time = dat$time, status = dat$status, exposure = dat$receptor.level,
Wald = FALSE)
# Example 10.10
hazard.ratio.test(time = dat$time, status = dat$status,
exposure = dat$receptor.level,exact = TRUE)
# Fig 10.7
library(flexsurv)
fite <- flexsurvreg(formula = Surv(time, status) ~ receptor, data = breast.survival, dist="exp")
plot(fite, ci=FALSE)
# 10.2.2
# Exact conditional methods for a single 1 x 2 table
dat <- subset(breast.survival, stage=="III")
d <- with(dat, tapply(status,receptor.level, sum))
n <- with(dat, tapply(time,receptor.level, sum))
m <- sum(d)
alpha <- (1-0.95)/2
1 - pbinom(12-1, size = 20, prob = 0.361)
pbinom(12, size = 20, prob = 0.809)
f1 <- function(x)1 - pbinom(d[1]-1, size = m, prob = x) - alpha
f2 <- function(x)pbinom(d[1], size = m, prob = x) - alpha
f1.out <- uniroot(f1, interval = c(0,1))
f2.out <- uniroot(f2, interval = c(0,1))
CINTpi <- c(f1.out$root, f2.out$root)
CINT <- CINTpi*n[2]/((1 - CINTpi)*n[1]) # HR CI
# exact test of association
# n1 <- sum(breast.survival$time[breast.survival$receptor.level=="low" &
# breast.survival$stage=="III"])
# n <- sum(breast.survival$time[breast.survival$stage=="III"])
pi_0 <- n[1]/sum(n)
# or
null <- 1
pi_0 <- (null * n[1]) / (null * n[1] + n[2])
min( min(pbinom(q = d[1], size = m, prob = pi_0),
1 - pbinom(q = d[1]-1, size = m, prob = pi_0)) * 2, 1)
# exact.ratio.test <- function(time, status, group, null=1, conf.level = 0.95, rev=FALSE){
# dname <- deparse(substitute(time))
# alternative <- "two.sided"
# g <- levels(group)
# if(rev) g <- rev(g)
# d1 <- sum(status[group==g[1]])
# m <- sum(status)
# d2<- m - d1
# n1 <- sum(time[group==g[1]])
# n <- sum(time)
# n2 <- n - n1
# # estimate
# if(d1 == 0 || d2 == 0) est <- (d1 + 0.5) * n2 / (d2 + 0.5) * n1
# else est <- (d1 * n2) / (d2 * n1)
# names(est) <- "hazard ratio"
# names(null) <- names(est)
# # CI
# alpha <- (1-conf.level)/2
# f1 <- function(x)1 - pbinom(d1-1, size = m, prob = x) - alpha
# f2 <- function(x)pbinom(d1, size = m, prob = x) - alpha
# f1.out <- uniroot(f1, interval = c(0,1))
# f2.out <- uniroot(f2, interval = c(0,1))
# CINTpi <- c(f1.out$root, f2.out$root)
# CINT <- CINTpi*n2/((1 - CINTpi)*n1) # HR CI
# attr(CINT, "conf.level") <- conf.level
#
# # test
# # exact test of association
# pi_0 <- (null * n1) / (null * n1 + n2)
# p.value <- min( min(pbinom(q = d1, size = m, prob = pi_0), 1 - pbinom(q = d1-1, size = m, prob = pi_0)) * 2, 1)
# RVAL <- list(p.value = p.value, estimate = est, null.value = null,
# conf.int = CINT, alternative = alternative,
# method = "Exact Hazard Ratio Test of Association",
# data.name = dname)
# class(RVAL) <- "htest"
# return(RVAL)
# }
#
# exact.ratio.test(time = dat$time, status = dat$status, group = dat$receptor, rev = TRUE)
# 10.2.3
# Mantel-Haenszel test of association for person-time data
# Example 10.11
dat <- breast.survival[breast.survival$stage=="III",]
n1 <- sum(dat$time[dat$receptor=="Low"])
n2 <- sum(dat$time[dat$receptor=="High"])
d1 <- sum(dat$status[dat$receptor=="Low"])
d2 <- sum(dat$status[dat$receptor=="High"])
hr.hat <- (d1 * n2)/(d2 * n1)
m <- hr.hat*(d2/n2)*n1 + (d2/n2)*n2
e1 <- (n1*m)/(n1 + n2)
e2 <- (n2*m)/(n1 + n2)
X_pt <- (d1 - e1)^2 / e1 + (d2 - e2)^2 / e2
pchisq(X_pt, df = 1, lower.tail = FALSE)
# just for two groups
mantelhaen.pt.test <- function(time, status, group, rev=FALSE){
dname <- deparse(substitute(time))
alternative <- "two.sided"
null <- 1
g <- levels(group)
if(rev) g <- rev(g)
n1 <- sum(time[group==g[1]])
n2 <- sum(time[group==g[2]])
d1 <- sum(status[group==g[1]])
d2 <- sum(status[group==g[2]])
est <- (d1 * n2)/(d2 * n1)
names(est) <- "hazard ratio"
names(null) <- names(est)
m <- est*(d2/n2)*n1 + (d2/n2)*n2
e1 <- (n1*m)/(n1 + n2)
e2 <- (n2*m)/(n1 + n2)
STATISTIC <- (d1 - e1)^2 / e1 + (d2 - e2)^2 / e2
if(e1 < 5 || e2 < 5) warning(paste("expected counts less than 5: e1 =",e1,", e2 =",e2))
names(STATISTIC) <- "Mantel-Haenszel X-squared"
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
RVAL <- list(statistic = STATISTIC, p.value = p.value, estimate = est, null.value = null,
alternative = alternative,
method = "Mantel-Haenszel test of association for person-time data",
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
mantelhaen.pt.test(time = dat$time, status = dat$status, group = dat$receptor, rev = TRUE)
# test warning
dat2 <- dat[-(1:8),]
mantelhaen.pt.test(time = dat2$time, status = dat2$status, group = dat2$receptor, rev = TRUE)
# Example 10.12
library(epitools)
rateratio(x = c(6,23,20), y = c(3637, 4797, 1037), method = "wald")
# disease counts
dc <- with(breast.survival, tapply(status, stage, sum))
# person time
ptime <- with(breast.survival, tapply(time, stage, sum))
rateratio(x = as.vector(dc), y = as.vector(ptime), method = "wald")
# mantel-haenszel test for 1 x I table
d <- with(breast.survival, tapply(status, stage, sum))
n <- with(breast.survival, tapply(time, stage, sum))
e <- n * sum(d) / sum(n)
X_pt <- sum((d - e)^2 / e)
pchisq(X_pt, df = length(d) - 1, lower.tail = FALSE)
# hazard ratios?
(d*n[1])/(d[1]*n)
# two or more groups
mantelhaen.pt.test <- function(time, status, group){
dname <- deparse(substitute(time))
alternative <- "two.sided"
null <- 1
d <- tapply(status, group, sum)
n <- tapply(time, group, sum)
est <- (d*n[1])/(d[1]*n)
names(est) <- levels(group)
names(null) <- "hazard ratio"
e <- n * sum(d) / sum(n)
STATISTIC <- sum((d - e)^2 / e)
p.value <- pchisq(STATISTIC, df = length(d) - 1, lower.tail = FALSE)
if(any(e < 5)) warning(paste("expected counts less than 5:",e[e < 5]))
names(STATISTIC) <- "Mantel-Haenszel X-squared"
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
RVAL <- list(statistic = STATISTIC, p.value = p.value, estimate = est, null.value = null,
alternative = alternative,
method = "Mantel-Haenszel test of association for person-time data",
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
mantelhaen.pt.test(time = breast.survival$time, status = breast.survival$status, group = breast.survival$stage)
with(breast.survival, mantelhaen.pt.test(time, status, stage))
mantelhaen.pt.test(time = dat$time, status = dat$status, group = dat$receptor)
# test warning
dat2 <- dat[-(1:8),]
mantelhaen.pt.test(time = dat2$time, status = dat2$status, group = dat2$receptor)
# 10.3 --------------------------------------------------------------------
# 10.3.1 - asymptotic (unconditional) methods for K (1 x 2) Tables
# Example 10.14
# dat <- xtabs(~ status + receptor.level + stage, data=breast.survival, subset = status==1)
dk <- with(breast.survival, tapply(status, list(receptor.level, stage), sum))
nk <- with(breast.survival, tapply(time, list(receptor.level, stage), sum))
mk <- apply(dk,2,sum)
n <- apply(nk,2,sum)
# estimate hazard ratio
f1 <- function(x){
sum((x*mk*nk[1,])/(x*nk[1,] + nk[2,])) - sum(dk[1,])
}
HR <- uniroot(f = f1, interval = c(0,1e5))$root
hr2 <- mk/(HR*nk[1,] + nk[2,])
hr1 <- HR*hr2
# fitted counts:
dh1 <- hr1*nk[1,]
dh2 <- hr2*nk[2,]
# confidence interval
V <- sum((1/dh1 + 1/dh2)^(-1))
exp(log(HR) + c(-1,1)*qnorm(0.975)/sqrt(V))
# tests of association
# Null: log(HR) = 0
# expected counts
e1 <- nk[1,]*mk/n
e2 <- nk[2,]*mk/n
V0 <- sum((nk[1,]*nk[2,]*mk)/(n^2))
# wald test of association
X_w <- log(HR)^2 * V0
pchisq(X_w, df = 1, lower.tail = FALSE)
# LRT of association
X_lr <- 2 * sum(dk[1,] * log((dk[1,]/e1)) + dk[2,] * log((dk[2,]/e2)))
pchisq(X_lr, df = 1, lower.tail = FALSE)
# make a function
k.hazard.ratio.test <- function(time, status, exposure, strata,
Wald=TRUE, conf.level=0.95){
dname <- deparse(substitute(time))
alternative <- "two.sided"
dk <- tapply(status, list(exposure, strata), sum)
nk <- tapply(time, list(exposure, strata), sum)
mk <- apply(dk,2,sum)
n <- apply(nk,2,sum)
# estimate hazard ratio
f1 <- function(x){
sum((x*mk*nk[1,])/(x*nk[1,] + nk[2,])) - sum(dk[1,])
}
est <- uniroot(f = f1, interval = c(0,1e5))$root
names(est) <- "common hazard ratio"
null <- 1
names(null) <- names(est)
hr2 <- mk/(est*nk[1,] + nk[2,])
hr1 <- est*hr2
# fitted counts:
dh1 <- hr1*nk[1,]
dh2 <- hr2*nk[2,]
# confidence interval
V <- sum((1/dh1 + 1/dh2)^(-1))
alpha <- (1-conf.level)/2
CINT <- exp(log(est) + c(-1,1)*qnorm(1 - alpha)/sqrt(V))
attr(CINT, "conf.level") <- conf.level
# tests of association
# Null: log(HR) = 0
# wald test of association
if(Wald){
V0 <- sum((nk[1,]*nk[2,]*mk)/(n^2))
STATISTIC <- log(est)^2 * V0
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
} else {
# LRT of association
# expected counts
e1 <- nk[1,]*mk/n
e2 <- nk[2,]*mk/n
STATISTIC <- 2 * sum(dk[1,] * log((dk[1,]/e1)) + dk[2,] * log((dk[2,]/e2)))
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
}
names(STATISTIC) <- "X-squared"
METHOD <- paste(if(Wald) "Wald" else "Likelihood Ratio", "Test of association")
RVAL <- list(statistic = STATISTIC, p.value = p.value, estimate = est,
null.value = null,
conf.int = CINT, alternative = alternative,
method = METHOD,
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
with(breast.survival,
k.hazard.ratio.test(time = time, status = status,
exposure = receptor.level, strata = stage))
with(breast.survival,
k.hazard.ratio.test(time = time, status = status,
exposure = receptor.level, strata = stage, Wald=FALSE))
# test for linear trend
s <- 1:3
num <- sum(s * (dk[1,] - dh1))^2
vk <- (1/dh1 + 1/dh2)^(-1)
den <- sum(s^2 * vk) - (sum(s * vk))^2 / sum(vk)
X_t <- num/den
pchisq(X_t, df = 1, lower.tail = FALSE)
# make a function
linear.trend.test <- function(time, status, exposure, strata,
scores = seq_along(length(levels(strata)))){
dname <- paste("\n ", deparse(substitute(time)),
"\n ", deparse(substitute(status)),
"\n using scores:", paste(scores, collapse = " "))
alternative <- "two.sided"
dk <- tapply(status, list(exposure, strata), sum)
nk <- tapply(time, list(exposure, strata), sum)
mk <- apply(dk,2,sum)
n <- apply(nk,2,sum)
# estimate common hazard ratio; need this for test statistic calculation
f1 <- function(x){
sum((x*mk*nk[1,])/(x*nk[1,] + nk[2,])) - sum(dk[1,])
}
est <- uniroot(f = f1, interval = c(0,1e5))$root
hr2 <- mk/(est*nk[1,] + nk[2,])
hr1 <- est*hr2
# fitted counts:
dh1 <- hr1*nk[1,]
dh2 <- hr2*nk[2,]
s <- scores
num <- sum(s * (dk[1,] - dh1))^2
vk <- (1/dh1 + 1/dh2)^(-1)
den <- sum(s^2 * vk) - (sum(s * vk))^2 / sum(vk)
STATISTIC <- num/den
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
names(STATISTIC) <- "X-squared"
METHOD <- paste("Test for linear trend (in log-hazard ratios)")
RVAL <- list(statistic = STATISTIC, parameter = c(df = 1), p.value = p.value,
method = METHOD,
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
with(breast.survival, linear.trend.test(time, status, receptor.level, stage))
with(breast.survival, linear.trend.test(time, status, receptor.level, stage,
scores = c(1,2,6)))
library(epitools)
dk <- with(breast.survival,tapply(status, list(receptor.level, stage), sum))
nk <- with(breast.survival,tapply(time, list(receptor.level, stage), sum))
lapply(seq(ncol(dk)), function(x)epitools::rateratio(x = dk[,x],y = nk[,x],
method = "wald", rev = "columns")$measure[2,])
# 2.297692
# calculate hazard ratios for each stratum
est <- (d*n[1])/(d[1]*n)
(dk[1,1]*nk[2,1])/(dk[2,1]*nk[1,1])
HR <- function(x){
(dk[1,x]*nk[2,x])/(dk[2,x]*nk[1,x])
}
sapply(seq(ncol(dk)),HR)
y <- log(sapply(seq(ncol(dk)),HR))
x <- 1:3
summary(lm(y ~ x))
hazard.ratios <- function(time, status, exposure, strata){
dk <- tapply(status, list(exposure, strata), sum)
nk <- tapply(time, list(exposure, strata), sum)
est <- lapply(seq(ncol(dk)),
function(x)epitools::rateratio(x = dk[,x],y = nk[,x],
method = "wald", rev = "columns")$measure[2,])
names(est) <- levels(strata)
return(est)
}
with(breast.survival, hazard.ratios(time, status, receptor.level, stage))
# LRT test of homogeneity
X_h <- 2 * sum(dk[1,] * log((dk[1,]/dh1)) + dk[2,] * log((dk[2,]/dh2)))
pchisq(X_h, df = ncol(dk) - 1, lower.tail = FALSE)
# make a function
lrt.homogeneity <- function(time, status, exposure, strata){
dname <- paste("\n ", deparse(substitute(time)),
"\n ", deparse(substitute(status)))
dk <- tapply(status, list(exposure, strata), sum)
nk <- tapply(time, list(exposure, strata), sum)
mk <- apply(dk,2,sum)
n <- apply(nk,2,sum)
# estimate common hazard ratio; need this for test statistic calculation
f1 <- function(x){
sum((x*mk*nk[1,])/(x*nk[1,] + nk[2,])) - sum(dk[1,])
}
est <- uniroot(f = f1, interval = c(0,1e5))$root
hr2 <- mk/(est*nk[1,] + nk[2,])
hr1 <- est*hr2
# fitted counts:
dh1 <- hr1*nk[1,]
dh2 <- hr2*nk[2,]
STATISTIC <- 2 * sum(dk[1,] * log((dk[1,]/dh1)) + dk[2,] * log((dk[2,]/dh2)))
df <- ncol(dk) - 1
names(df) <- "df"
p.value <- pchisq(STATISTIC, df = df, lower.tail = FALSE)
names(STATISTIC) <- "X-squared"
METHOD <- paste("Likelihood ratio test of homogeneity")
RVAL <- list(statistic = STATISTIC, parameter = df, p.value = p.value,
method = METHOD,
data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
}
with(breast.survival, lrt.homogeneity(time, status, receptor.level, stage))
# 10.3.7 methods for K (1 X I) Tables
# Example 10.18
dik <- with(breast.survival, tapply(status, list(stage, receptor.level), sum))
nik <- with(breast.survival, tapply(time, list(stage, receptor.level), sum))
mk <- apply(dik,2,sum)
nk <- apply(nik,2,sum)
# expected counts
eik <- sapply(seq_along(levels(breast.survival$receptor.level)),function(x)nik[,x]*mk[x]/nk[x])
di. <- apply(dik,1,sum)
ei. <- apply(eik,1,sum)
X_oe <- sum(((di. - ei.)^2)/ei.)
pchisq(X_oe, df = nrow(dik)-1, lower.tail = FALSE)
breast.array <- xtabs(~ status + stage + receptor.level, data=breast.survival)
mantelhaen.test(breast.array) # does not match book
# make a function
# add to k.hazard.ratio.test
# test of association for stratified data when exposure is polychotomous
k.hazard.ratio.test <- function(time, status, exposure, strata,
Wald=TRUE, conf.level=0.95){
dname <- paste("\n ", deparse(substitute(time)),
"\n ", deparse(substitute(status)))
alternative <- "two.sided"
# 2 exposure levels
if(length(levels(exposure))==2){
dk <- tapply(status, list(exposure, strata), sum)
nk <- tapply(time, list(exposure, strata), sum)
mk <- apply(dk,2,sum)
n <- apply(nk,2,sum)
# estimate hazard ratio
f1 <- function(x){
sum((x*mk*nk[1,])/(x*nk[1,] + nk[2,])) - sum(dk[1,])
}
est <- uniroot(f = f1, interval = c(0,1e5))$root
names(est) <- "common hazard ratio"
null <- 1
names(null) <- names(est)
hr2 <- mk/(est*nk[1,] + nk[2,])
hr1 <- est*hr2
# fitted counts:
dh1 <- hr1*nk[1,]
dh2 <- hr2*nk[2,]
# confidence interval
V <- sum((1/dh1 + 1/dh2)^(-1))
alpha <- (1-conf.level)/2
CINT <- exp(log(est) + c(-1,1)*qnorm(1 - alpha)/sqrt(V))
attr(CINT, "conf.level") <- conf.level
# tests of association
# Null: log(HR) = 0
# wald test of association
if(Wald){
V0 <- sum((nk[1,]*nk[2,]*mk)/(n^2))
STATISTIC <- log(est)^2 * V0
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
} else {
# LRT of association
# expected counts
e1 <- nk[1,]*mk/n
e2 <- nk[2,]*mk/n
STATISTIC <- 2 * sum(dk[1,] * log((dk[1,]/e1)) + dk[2,] * log((dk[2,]/e2)))
p.value <- pchisq(STATISTIC, df = 1, lower.tail = FALSE)
}
names(STATISTIC) <- "X-squared"
METHOD <- paste(if(Wald) "Wald" else "Likelihood Ratio", "Test of association")
RVAL <- list(statistic = STATISTIC, parameter = c(df = 1), p.value = p.value, estimate = est,
null.value = null,
conf.int = CINT, alternative = alternative,
method = METHOD,
data.name = dname)
} else {
dik <- tapply(status, list(exposure, strata), sum)
nik <- tapply(time, list(exposure, strata), sum)
mk <- apply(dik,2,sum)
nk <- apply(nik,2,sum)
# expected counts
eik <- sapply(seq_along(levels(strata)),function(x)nik[,x]*mk[x]/nk[x])
di. <- apply(dik,1,sum)
ei. <- apply(eik,1,sum)
STATISTIC <- sum(((di. - ei.)^2)/ei.)
df <- nrow(dik) - 1
names(df) <- "df"
p.value <- pchisq(STATISTIC, df = df, lower.tail = FALSE)
names(STATISTIC) <- "X-squared"
METHOD <- ("X-squared Test of association - polychotomous exposure")
RVAL <- list(statistic = STATISTIC, parameter = df, p.value = p.value,
method = METHOD,
data.name = dname)
}
class(RVAL) <- "htest"
return(RVAL)
}
with(breast.survival,
k.hazard.ratio.test(time = time, status = status,
exposure = receptor.level, strata = stage))
# LR test
with(breast.survival,
k.hazard.ratio.test(time = time, status = status,
exposure = receptor.level, strata = stage,
Wald=FALSE))
# polychotomous exposure
with(breast.survival,
k.hazard.ratio.test(time = time, status = status,
exposure = stage, strata = receptor.level))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.