Nothing
## -----------------------------------------------------------------------------
## Function: estimation()
## -----------------------------------------------------------------------------
## Description: This function estimates the log-relative hazard, baseline
## hazards at each unique event time, cumulative baseline hazard in
## the time interval [Tau1, Tau2] and pure risk in [Tau1, Tau2] and
## for covariate profile x
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## Tau1 left bound of the time interval considered for the
## cumulative baseline hazard and pure risk. Default is the
## first event time
##
## Tau2 right bound of the time interval considered for the
## cumulative baseline hazard and pure risk. Default is the
## last event time
##
## x vector of length p, specifying the covariate profile
## considered for the pure risk. Default is (0,...,0).
##
## missing.data is data on the p covariates missing for certain individuals
## in the stratified case cohort? If missing.data = TRUE, the
## arguments below need to be provided
##
## riskmat.phase2 at risk matrix for the phase-two data at all of the cases
## event times, even that with missing covariate data. If
## missing.data = TRUE, this argument needs to be provided
##
## dNt.phase2 counting process matrix for failures in the phase-two data.
## If missing.data = TRUE and status.phase2 = NULL, this
## argument needs to be provided
##
## status.phase2 vector indicating the case status in the phase-two data. If
## missing.data = TRUE and dNt.phase2 = NULL, this argument
## needs to be provided
## -----------------------------------------------------------------------------
estimation <- function (mod, Tau1 = NULL, Tau2 = NULL, x = NULL,
missing.data = NULL, riskmat.phase2 = NULL,
dNt.phase2 = NULL, status.phase2 = NULL) {
if (is.null(missing.data)) {
missing.data <- FALSE
}
# ----------------------------------------------------------------------------
# If missing covariate data --------------------------------------------------
if (missing.data == TRUE) {
if (is.null(riskmat.phase2)) {
stop("The at risk matrix for the phase-two data at all of the cases event
times need to be provided, even that with missing covariate data")
} else {
if (is.null(dNt.phase2)&is.null(status.phase2)) {
stop("The status for the phase-two data, or the counting process matrix
for failures in the phase-two data, need to be provided")
} else {
if (is.null(dNt.phase2)) {
# --------------------------------------------------------------------
# If not provided, compute the counting process matrix for failures in
# the phase-two data -------------------------------------------------
observed.times.phase2 <- apply(riskmat.phase2, 1,
function(v) {which.max(cumsum(v))})
dNt.phase2 <- matrix(0, nrow(riskmat.phase2),
ncol(riskmat.phase2))
dNt.phase2[cbind(1:nrow(riskmat.phase2), observed.times.phase2)] <- 1
dNt.phase2 <- sweep(dNt.phase2, 1, status.phase2, "*")
}
# ----------------------------------------------------------------------
# Quantities needed for the estimation ---------------------------------
mod.detail <- coxph.detail(mod, riskmat = TRUE)
riskmat <- mod.detail$riskmat
X <- model.matrix(mod)
indiv.phase3 <- row.names(X)
riskmat.phase3 <- riskmat.phase2[indiv.phase3, ] # use the cases'
# actual failure time, known even for cases with missing covariate data
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
# ----------------------------------------------------------------------
# Estimation of the log-relative hazard --------------------------------
beta.hat <- mod$coefficients
exp.X.weighted <- weights * exp(X %*% beta.hat)
S0t <- t(riskmat.phase3) %*% (exp.X.weighted) # for any
# event time t (all of the cases failure times, even the one of cases
# who have missing covariate data)
cases.times <- colnames(riskmat.phase2) # cases failure times, even
# the one of cases who have missing covariate data
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((cases.times > Tau1 & cases.times <= Tau2))
# ----------------------------------------------------------------------
# Estimation of the baseline hazards at each unique event time and
# cumulative baseline hazard in time interval [Tau1, Tau2] -------------
lambda0.t.hat <- t(colSums(dNt.phase2) / S0t) # we do not use weights
# in the numerator of the Breslow estimator
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
# ----------------------------------------------------------------------
# Estimation of the pure risk in [Tau1, Tau2] and for covariate profile
# x --------------------------------------------------------------------
if ((length(x) != length(beta.hat)) | (is.null(x))) {
x <- rep(0, length(beta.hat))
} # if no covariate profile provided, use 0 as reference level
exp.x <- c(exp(x %*% beta.hat))
Pi.x.hat <- 1 - exp(- exp.x * Lambda0.Tau1Tau2.hat)
return(list(beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat,
Pi.x.Tau1Tau2.hat = Pi.x.hat))
}
}
} else {
# --------------------------------------------------------------------------
# If no missing covariate data ---------------------------------------------
# --------------------------------------------------------------------------
# Quantities needed for the estimation -------------------------------------
mod.detail <- coxph.detail(mod, riskmat = TRUE)
riskmat <- mod.detail$riskmat
X <- model.matrix(mod)
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
# --------------------------------------------------------------------------
# Estimation of the log-relative hazard ------------------------------------
beta.hat <- mod$coefficients
exp.X.weighted <- weights * exp(X %*% beta.hat)
S0t <- t(riskmat) %*% (exp.X.weighted)
observed.times <- apply(riskmat, 1,
function(v) {which.max(cumsum(v))})
dNt <- matrix(0, nrow(riskmat), ncol(riskmat))
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- sweep(dNt, 1, mod$y[, ncol(mod$y)], "*")
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((mod.detail$time > Tau1 &
mod.detail$time <= Tau2))
# --------------------------------------------------------------------------
# Estimation of the baseline hazards at each unique event time and
# cumulative baseline hazard in time interval [Tau1, Tau2] -----------------
lambda0.t.hat <- t(colSums(dNt) / S0t) # we do not use weights in
# the numerator of the Breslow estimator
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
# --------------------------------------------------------------------------
# Estimation of the pure risk in [Tau1, Tau2] and for covariate profile x
if ((length(x) != length(beta.hat)) | (is.null(x))) {
x <- rep(0, length(beta.hat))
} # if no covariate profile provided, use 0 as reference level
exp.x <- c(exp(x %*% beta.hat))
Pi.x.hat <- 1 - exp(- exp.x * Lambda0.Tau1Tau2.hat)
return(list(beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat,
Pi.x.Tau1Tau2.hat = Pi.x.hat))
}
}
## -----------------------------------------------------------------------------
## Function: estimation.CumBH()
## -----------------------------------------------------------------------------
## Description: This function estimates the log-relative hazard, baseline
## hazards at each unique event time and cumulative baseline hazard
## in the time interval [Tau1, Tau2]
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## Tau1 left bound of the time interval considered for the
## cumulative baseline hazard. Default is the first event time
##
## Tau2 right bound of the time interval considered for the
## cumulative baseline hazard. Default is the last event time
##
##
## missing.data is data on the p covariates missing for certain individuals
## in the stratified case cohort? If missing.data = TRUE, the
## arguments below need to be provided
##
## riskmat.phase2 at risk matrix for the phase-two data at all of the cases
## event times, even that with missing covariate data. If
## missing.data = TRUE, this argument needs to be provided
##
## dNt.phase2 counting process matrix for failures in the phase-two data.
## If missing.data = TRUE and status.phase2 = NULL, this
## argument needs to be provided
##
## status.phase2 vector indicating the case status in the phase-two data. If
## missing.data = TRUE and dNt.phase2 = NULL, this argument
## needs to be provided
## -----------------------------------------------------------------------------
estimation.CumBH <- function (mod, Tau1 = NULL, Tau2 = NULL,
missing.data = FALSE, riskmat.phase2 = NULL,
dNt.phase2 = NULL, status.phase2 = NULL) {
if (is.null(missing.data)) {
missing.data = FALSE
}
# ----------------------------------------------------------------------------
# If missing covariate data --------------------------------------------------
if (missing.data == TRUE) {
if (is.null(riskmat.phase2)) {
stop("The at risk matrix for the phase-two data at all of the cases event
times need to be provided, even that with missing covariate data")
} else {
if (is.null(dNt.phase2)&is.null(status.phase2)) {
stop("The status for the phase-two data, or the counting process matrix
for failures in the phase-two data, need to be provided")
} else {
if (is.null(dNt.phase2)) {
# --------------------------------------------------------------------
# If not provided, compute the counting process matrix for failures in
# the phase-two data -------------------------------------------------
observed.times.phase2 <- apply(riskmat.phase2, 1,
function(v) {which.max(cumsum(v))})
dNt.phase2 <- matrix(0, nrow(riskmat.phase2),
ncol(riskmat.phase2))
dNt.phase2[cbind(1:nrow(riskmat.phase2), observed.times.phase2)] <-1
dNt.phase2 <- sweep(dNt.phase2, 1, status.phase2, "*")
}
# ----------------------------------------------------------------------
# Quantities needed for the estimation ---------------------------------
mod.detail <- coxph.detail(mod, riskmat = TRUE)
riskmat <- mod.detail$riskmat
X <- model.matrix(mod)
indiv.phase3 <- row.names(X)
riskmat.phase3 <- riskmat.phase2[indiv.phase3, ] # use the cases'
# actual failure time, known even for cases with missing covariate data
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1 # when using the whole cohort, omega_i = 1
}
# ----------------------------------------------------------------------
# Estimation of the log relative hazard --------------------------------
beta.hat <- mod$coefficients
exp.X.weighted <- weights * exp(X %*% beta.hat)
S0t <- t(riskmat.phase3) %*% (exp.X.weighted) # for any
# event time t (all of the cases failure times, even the one of cases
# who have missing covariate data)
cases.times <- colnames(riskmat.phase2) # cases failure times, even
# the one of cases who have missing covariate data
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((cases.times > Tau1 & cases.times <= Tau2))
# ----------------------------------------------------------------------
# Estimation of the baseline hazards at each unique event time and
# cumulative baseline hazard in time interval [Tau1, Tau2] -------------
lambda0.t.hat <- t(colSums(dNt.phase2) / S0t) # we do not use weights
# in the numerator of the Breslow estimator
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
return(list(beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat))
}
}
} else {
# --------------------------------------------------------------------------
# Quantities needed for the estimation -------------------------------------
mod.detail <- coxph.detail(mod, riskmat = TRUE)
riskmat <- mod.detail$riskmat
X <- model.matrix(mod)
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
# --------------------------------------------------------------------------
# Estimation of the log-relative hazard ------------------------------------
beta.hat <- mod$coefficients
# --------------------------------------------------------------------------
# Estimation of the baseline hazards at each unique event time and
# cumulative baseline hazard in time interval [Tau1, Tau2] -----------------
exp.X.weighted <- weights * exp(X %*% beta.hat)
S0t <- t(riskmat) %*% (exp.X.weighted)
observed.times <- apply(riskmat, 1,
function(v) {which.max(cumsum(v))})
dNt <- matrix(0, nrow(riskmat), ncol(riskmat))
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- sweep(dNt, 1, mod$y[, ncol(mod$y)], "*")
lambda0.t.hat <- t(colSums(dNt) / S0t) # we do not use weights in
# the numerator of the Breslow estimator
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((mod.detail$time > Tau1 &
mod.detail$time <= Tau2))
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
return(list(beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat))
}
}
## -----------------------------------------------------------------------------
## Function: estimation.PR()
## -----------------------------------------------------------------------------
## Description: This function estimates the pure risk in the time interval
## [Tau1, Tau2] and for covariate profile x, from the values of the
## log-relative hazard and cumulative baseline hazard in
## [Tau1, Tau2]
## -----------------------------------------------------------------------------
## beta log-relative hazard
##
## Lambda0.Tau1Tau2 cumulative baseline hazard in the time interval that is
## considered for the pure risk
##
## x vector of length p, specifying the covariate profile
## considered for the pure risk. Default is (0,...,0)
## -----------------------------------------------------------------------------
estimation.PR <- function (beta, Lambda0.Tau1Tau2, x = NULL) {
if ((length(x) != length(beta)) | (is.null(x))) {
x <- rep(0, length(beta))
} # if no covariate profile provided, use 0 as reference level
exp.x <- c(exp(x %*% beta))
Pi.x <- 1 - exp(- exp.x * Lambda0.Tau1Tau2)
return(Pi.x.Tau1Tau2.hat = Pi.x)
}
## -----------------------------------------------------------------------------
## Function: influences()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the log-relative
## hazard, baseline hazards at each unique event time, cumulative
## baseline hazard in the time interval [Tau1, Tau2] and pure risk
## in [Tau1, Tau2] and for covariate profile x. It also returns the
## parameters estimates. If calibrated weights were used and the
## auxiliary variables observations are provided, the function
## returns the overall influences, as well as the phase-two
## influences
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## Tau1 left bound of the time interval considered for the
## cumulative baseline hazard and pure risk. Default is the
## first event time
##
## Tau2 right bound of the time interval considered for the
## cumulative baseline hazard and pure risk. Default is the
## last event time
##
## x vector of length p, specifying the covariate profile
## considered for the pure risk. Default is (0,...,0)
##
## calibrated are calibrated weights used for the estimation of the log-
## relative hazard, cumulative baseline hazard and pure risk?
## If calibrated = TRUE, the argument below needs to be
## provided
##
## A matrix with the values of the auxiliary variables used for
## the calibration of the weights in the whole cohort. If
## calibrated = TRUE, this argument needs to be provided
## -----------------------------------------------------------------------------
influences <- function (mod, Tau1 = NULL, Tau2 = NULL, x = NULL,
calibrated = NULL, A = NULL) {
# ----------------------------------------------------------------------------
# Quantities needed for the influences ---------------------------------------
mod.detail <- coxph.detail(mod, riskmat = T)
riskmat <- mod.detail$riskmat
number.times <- ncol(riskmat)
n <- nrow(riskmat)
X <- model.matrix(mod)
p <- ncol(X)
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
beta.hat <- mod$coefficients
exp.X.weighted <- weights * exp(X %*% beta.hat)
Y.exp.X.weighted <- riskmat * matrix(exp.X.weighted, nrow = n,
ncol = number.times, byrow = FALSE)
S0t <- t(riskmat) %*% (exp.X.weighted)
S1t <- t(riskmat) %*% (X * matrix(exp.X.weighted, nrow = n,
ncol = p, byrow = FALSE))
observed.times <- apply(riskmat, 1, function(v) {which.max(cumsum(v))})
dNt <- matrix(0, n, number.times)
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- dNt * matrix(mod$y[, ncol(mod$y)], nrow(riskmat),
number.times, byrow = FALSE)
dNt.weighted <- dNt * matrix(weights, nrow = n, ncol = number.times,
byrow = FALSE)
infomat.indiv <- mod.detail$imat
if (!is.null(dim(infomat.indiv))) {
infomat <- apply(X = infomat.indiv, MARGIN = c(1,2), FUN = sum)
} else {
infomat <- sum(infomat.indiv)
}
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((Tau1 < mod.detail$time) &
(mod.detail$time <= Tau2))
Tau1Times <- which((mod.detail$time <= Tau1))
Tau2Times <- which((mod.detail$time <= Tau2))
lambda0.t.hat <- t(colSums(dNt) / S0t)
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
if ((length(x) != p) | (is.null(x))) {
x <- rep(0, p)
} # if no covariate profile provided, use 0 as reference level
exp.x <- c(exp(x %*% beta.hat))
Pi.x.hat <- 1 - exp(- exp.x * Lambda0.Tau1Tau2.hat)
if (is.null(calibrated)) {
calibrated <- FALSE
}
# ----------------------------------------------------------------------------
# If calibrated weights ------------------------------------------------------
if (calibrated == TRUE) {
if (is.null(A)) {
stop("A matrix with auxiliary variables values need to be provided")
}
# --------------------------------------------------------------------------
# Computation of the influences for the Lagrangian multipliers, eta --------
indiv.phase2 <- row.names(X)
A.phase2 <- A[indiv.phase2, ]
q <- ncol(A.phase2)
A.weighted <- sweep(A.phase2, 1, weights, "*")
AA.weighted <- array(NA, dim = c(q, q, n))
for(i in 1:nrow(A.phase2)) {
AA.weighted[,, i] <- tcrossprod(A.phase2[i,], A.weighted[i,])
}
sum.AA.weighted <- apply(X = AA.weighted, MARGIN = c(1,2), FUN = sum)
sum.AA.weighted.inv <- solve(sum.AA.weighted)
infl1.eta <- A %*% sum.AA.weighted.inv
infl2.eta <- 0 * A
infl2.eta[indiv.phase2,] <- - A.weighted %*% sum.AA.weighted.inv
infl.eta <- infl1.eta + infl2.eta
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
XA <- array(NA, dim = c(p, q, n))
for(i in 1:n) {
XA[,, i] <- tcrossprod(X[i,], A.phase2[i,])
}
drond.G1t.eta <- array(NA, dim = c(p, q, number.times))
drond.G0t.eta <- array(NA, dim = c(q, number.times))
drond.S1t.eta <- array(NA, dim = c(p, q, number.times))
drond.S0t.eta <- array(NA, dim = c(q, number.times))
for(t in 1:number.times) {
drond.G1t.eta[,, t] <- apply(X = sweep(XA, 3, dNt.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.G0t.eta[, t] <- colSums(A.phase2 * matrix(dNt.weighted[, t],
nrow = n, ncol = q,
byrow = FALSE))
drond.S1t.eta[,, t] <- apply(X = sweep(XA, 3, Y.exp.X.weighted[, t],
"*"), MARGIN = c(1,2), FUN = sum)
drond.S0t.eta[, t] <- colSums(A.phase2 * matrix(Y.exp.X.weighted[, t],
nrow = n, ncol = q,
byrow = FALSE))
}
drond.Ut.eta <- array(NA, dim = c(p, q, number.times))
X.expect <- S1t / matrix(S0t, nrow = number.times, ncol = p,
byrow = FALSE)
for(t in 1:number.times) {
drond.Ut.eta[,, t] <- drond.G1t.eta[,, t] - tcrossprod(X.expect[t, ],
drond.G0t.eta[, t]) -
(colSums(dNt.weighted) / S0t)[t] * drond.S1t.eta[,, t] +
(colSums(dNt.weighted) / S0t ^ 2)[t] *
tcrossprod(S1t[t, ], drond.S0t.eta[, t])
}
drond.U.eta <- apply(X = drond.Ut.eta, MARGIN = c(1,2), FUN = sum)
infl1.beta <- infl1.eta %*% t(drond.U.eta) %*% solve(infomat)
score <- array(NA, dim = c(n, p, number.times))
for(i in 1:n) {
score[i,,] <- - sweep(t(X.expect), 1, X[i,], "-")
}
for(t in 1:number.times) {
dMt <- dNt.weighted[, t] - riskmat[, t] *
exp.X.weighted * (colSums(dNt.weighted)/S0t)[t]
score[,, t] <- score[,, t] * matrix(dMt, nrow = n, ncol = p,
byrow = FALSE)
}
score.beta <- apply(X = score, MARGIN = c(1,2), FUN = sum)
infl2.beta <- infl2.eta %*% t(drond.U.eta) %*% solve(infomat)
infl2.beta[indiv.phase2,] <- infl2.beta[indiv.phase2,] +
score.beta %*% solve(infomat)
infl.beta <- infl1.beta + infl2.beta
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
infl1.lambda0.t <- (- (infl1.beta %*% t(S1t) + infl1.eta %*% drond.S0t.eta) *
matrix(lambda0.t.hat, nrow = nrow(A),
ncol = number.times, byrow = TRUE)) /
matrix(S0t, nrow = nrow(A), ncol = number.times,
byrow = TRUE)
infl2.lambda0.t <- 0 * infl1.lambda0.t
infl2.lambda0.t[indiv.phase2,] <- (dNt - (Y.exp.X.weighted +
infl2.beta[indiv.phase2,] %*%
t(S1t) + infl2.eta[indiv.phase2,] %*%
drond.S0t.eta) *
matrix(lambda0.t.hat, nrow = n,
ncol = number.times, byrow = TRUE)) /
matrix(S0t, nrow = n, ncol = number.times,
byrow = TRUE)
infl.lambda0.t <- infl1.lambda0.t + infl2.lambda0.t
infl1.Lambda0.Tau1Tau2 <- rowSums(infl1.lambda0.t[, Tau1Tau2.times, drop=FALSE])
infl2.Lambda0.Tau1Tau2 <- rowSums(infl2.lambda0.t[, Tau1Tau2.times, drop=FALSE])
infl.Lambda0.Tau1Tau2 <- infl1.Lambda0.Tau1Tau2 + infl2.Lambda0.Tau1Tau2
# --------------------------------------------------------------------------
# Computation of the influences for the pure risk in [Tau1, Tau2] and for
# covariate profile x ------------------------------------------------------
infl1.Pi.x <- c((1 - Pi.x.hat) * exp.x * Lambda0.Tau1Tau2.hat) *
infl1.beta %*% x + c((1 - Pi.x.hat) * exp.x) *
infl1.Lambda0.Tau1Tau2
infl2.Pi.x <- c((1 - Pi.x.hat) * exp.x * Lambda0.Tau1Tau2.hat) *
infl2.beta %*% x + c((1 - Pi.x.hat) * exp.x) *
infl2.Lambda0.Tau1Tau2
infl.Pi.x <- infl1.Pi.x + infl2.Pi.x
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
infl.Pi.x.Tau1Tau2 = infl.Pi.x, infl2.beta = infl2.beta,
infl2.lambda0.t = infl2.lambda0.t,
infl2.Lambda0.Tau1Tau2 = as.matrix(infl2.Lambda0.Tau1Tau2),
infl2.Pi.x.Tau1Tau2 = infl2.Pi.x, beta.hat = beta.hat,
lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat,
Pi.x.Tau1Tau2.hat = Pi.x.hat))
}else{
# --------------------------------------------------------------------------
# If design weights --------------------------------------------------------
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
infl.beta <- as.matrix(residuals(mod, type = "dfbeta", weighted = T))
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
infl.lambda0.t <- (dNt - (Y.exp.X.weighted + infl.beta %*% t(S1t)) *
matrix(lambda0.t.hat, nrow = n, ncol = number.times,
byrow = TRUE)) /
matrix(S0t, nrow = n, ncol = number.times,
byrow = TRUE)
infl.Lambda0.Tau1Tau2 <- rowSums(infl.lambda0.t[, Tau1Tau2.times, drop=FALSE])
# --------------------------------------------------------------------------
# Computation of the influences for the pure risk in [Tau1, Tau2] and for
# covariate profile x ------------------------------------------------------
infl.Pi.x <- c((1 - Pi.x.hat) * exp.x * Lambda0.Tau1Tau2.hat) *
infl.beta %*% x + c((1 - Pi.x.hat) * exp.x) *
infl.Lambda0.Tau1Tau2
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
infl.Pi.x.Tau1Tau2 = infl.Pi.x, beta.hat = beta.hat,
lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat,
Pi.x.Tau1Tau2.hat = Pi.x.hat))
}
}
## -----------------------------------------------------------------------------
## Function: influences.RH()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the log-relative
## hazard and also returns the parameter estimate. If calibrated
## weights were used and the auxiliary variables observations are
## provided, the function returns the overall influences, as well
## as the phase-two influences
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## calibrated are calibrated weights used for the estimation of the log-
## relative hazard, cumulative baseline hazard and pure risk?
## If calibrated = TRUE, the argument below needs to be
## provided
##
## A matrix with the values of the auxiliary variables used for
## the calibration of the weights in the whole cohort. If
## calibrated = TRUE, this argument needs to be provided
## -----------------------------------------------------------------------------
influences.RH <- function (mod, calibrated = NULL, A = NULL) {
if (is.null(calibrated)) {
calibrated <- FALSE
}
mod.detail <- coxph.detail(mod, riskmat = T)
beta.hat <- mod$coefficients
# ----------------------------------------------------------------------------
# If calibrated weights ------------------------------------------------------
if (calibrated == TRUE) {
if (is.null(A)) {
stop("A matrix with auxiliary variables values need to be provided")
}
# --------------------------------------------------------------------------
# Quantities needed for the influences -------------------------------------
riskmat <- mod.detail$riskmat
number.times <- ncol(riskmat)
n <- nrow(riskmat)
X <- model.matrix(mod)
p <- ncol(X)
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
exp.X.weighted <- weights * exp(X %*% beta.hat)
Y.exp.X.weighted <- riskmat * matrix(exp.X.weighted, nrow = n,
ncol = number.times, byrow = FALSE)
S0t <- t(riskmat) %*% (exp.X.weighted)
S1t <- t(riskmat) %*% (X * matrix(exp.X.weighted, nrow = n,
ncol = p, byrow = FALSE))
observed.times <- apply(riskmat, 1, function(v) {which.max(cumsum(v))})
dNt <- matrix(0, n, number.times)
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- dNt * matrix(mod$y[, ncol(mod$y)], nrow(riskmat),
number.times, byrow = FALSE)
dNt.weighted <- dNt * matrix(weights, nrow = n, ncol = number.times,
byrow = FALSE)
infomat.indiv <- mod.detail$imat
infomat <- apply(X = infomat.indiv, MARGIN = c(1,2), FUN = sum)
# --------------------------------------------------------------------------
# Computation of the influences for the Lagrangian multipliers, eta --------
indiv.phase2 <- row.names(X)
A.phase2 <- A[indiv.phase2, ]
q <- ncol(A.phase2)
A.weighted <- sweep(A.phase2, 1, weights, "*")
AA.weighted <- array(NA, dim = c(q, q, n))
for(i in 1:nrow(A.phase2)) {
AA.weighted[,, i] <- tcrossprod(A.phase2[i,], A.weighted[i,])
}
sum.AA.weighted <- apply(X = AA.weighted, MARGIN = c(1,2), FUN = sum)
sum.AA.weighted.inv <- solve(sum.AA.weighted)
infl1.eta <- A %*% sum.AA.weighted.inv
infl2.eta <- 0 * A
infl2.eta[indiv.phase2,] <- - A.weighted %*% sum.AA.weighted.inv
infl.eta <- infl1.eta + infl2.eta
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
XA <- array(NA, dim = c(p, q, n))
for(i in 1:n) {
XA[,, i] <- tcrossprod(X[i,], A.phase2[i,])
}
drond.G1t.eta <- array(NA, dim = c(p, q, number.times))
drond.G0t.eta <- array(NA, dim = c(q, number.times))
drond.S1t.eta <- array(NA, dim = c(p, q, number.times))
drond.S0t.eta <- array(NA, dim = c(q, number.times))
for(t in 1:number.times) {
drond.G1t.eta[,, t] <- apply(X = sweep(XA, 3, dNt.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.G0t.eta[, t] <- colSums(A.phase2 * matrix(dNt.weighted[, t],
nrow = n, ncol = q,
byrow = FALSE))
drond.S1t.eta[,, t] <- apply(X = sweep(XA, 3, Y.exp.X.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.S0t.eta[, t] <- colSums(A.phase2 * matrix(Y.exp.X.weighted[, t],
nrow = n, ncol = q,
byrow = FALSE))
}
drond.Ut.eta <- array(NA, dim = c(p, q, number.times))
X.expect <- S1t / matrix(S0t, nrow = number.times, ncol = p,
byrow = FALSE)
for(t in 1:number.times) {
drond.Ut.eta[,, t] <- drond.G1t.eta[,, t] - tcrossprod(X.expect[t, ],
drond.G0t.eta[, t]) -
(colSums(dNt.weighted)/S0t)[t] * drond.S1t.eta[,, t] +
(colSums(dNt.weighted)/S0t^2)[t] *
tcrossprod(S1t[t, ], drond.S0t.eta[, t])
}
drond.U.eta <- apply(X = drond.Ut.eta, MARGIN = c(1,2), FUN = sum)
infl1.beta <- infl1.eta %*% t(drond.U.eta) %*% solve(infomat)
score <- array(NA, dim = c(n, p, number.times))
for(i in 1:n) {
score[i,,] <- - sweep(t(X.expect), 1, X[i,], "-")
}
for(t in 1:number.times) {
dMt <- dNt.weighted[, t] - riskmat[, t] * exp.X.weighted *
(colSums(dNt.weighted)/S0t)[t]
score[,, t] <- score[,, t] * matrix(dMt, nrow = n, ncol = p,
byrow = FALSE)
}
score.beta <- apply(X = score, MARGIN = c(1,2), FUN = sum)
infl2.beta <- infl2.eta %*% t(drond.U.eta) %*% solve(infomat)
infl2.beta[indiv.phase2,] <- infl2.beta[indiv.phase2,] + score.beta %*%
solve(infomat)
infl.beta <- infl1.beta + infl2.beta
return(list(infl.beta = infl.beta, infl2.beta = infl2.beta,
beta.hat = beta.hat))
}else{
# --------------------------------------------------------------------------
# If design weights --------------------------------------------------------
infl.beta <- residuals(mod, type = "dfbeta", weighted = T)
return(list(infl.beta = infl.beta, beta.hat = beta.hat))
}
}
## -----------------------------------------------------------------------------
## Function: influences.CumBH()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the log-relative
## hazard, baseline hazards at each unique event time, and
## cumulative baseline hazard in the time interval [Tau1, Tau2]. It
## also returns the parameters estimates. If calibrated weights
## were used and the auxiliary variables observations are provided,
## the function returns the overall influences, as well as the
## phase-two influences
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## Tau1 left bound of the time interval considered for the
## cumulative baseline hazard. Default is the first event time
##
## Tau2 right bound of the time interval considered for the
## cumulative baseline hazard. Default is the last event time
##
## calibrated are calibrated weights used for the estimation of the log-
## relative hazard, cumulative baseline hazard and pure risk?
## If calibrated = TRUE, the argument below needs to be
## provided
##
## A matrix with the values of the auxiliary variables used for
## the calibration of the weights in the whole cohort. If
## calibrated = TRUE, this argument needs to be provided
## -----------------------------------------------------------------------------
influences.CumBH <- function (mod, Tau1 = NULL, Tau2 = NULL, A = NULL,
calibrated = NULL) {
# ----------------------------------------------------------------------------
# Quantities needed for the influences ---------------------------------------
mod.detail <- coxph.detail(mod, riskmat = T)
riskmat <- mod.detail$riskmat
number.times <- ncol(riskmat)
n <- nrow(riskmat)
X <- model.matrix(mod)
p <- ncol(X)
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
beta.hat <- mod$coefficients
exp.X.weighted <- weights * exp(X %*% beta.hat)
Y.exp.X.weighted <- riskmat * matrix(exp.X.weighted, nrow = n,
ncol = number.times, byrow = FALSE)
S0t <- t(riskmat) %*% (exp.X.weighted)
S1t <- t(riskmat) %*% (X * matrix(exp.X.weighted, nrow = n,
ncol = p, byrow = FALSE))
observed.times <- apply(riskmat, 1, function(v) {which.max(cumsum(v))})
dNt <- matrix(0, n, number.times)
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- dNt * matrix(mod$y[, ncol(mod$y)], nrow(riskmat),
number.times, byrow = FALSE)
dNt.weighted <- dNt * matrix(weights, nrow = n, ncol = number.times,
byrow = FALSE)
infomat.indiv <- mod.detail$imat
infomat <- apply(X = infomat.indiv, MARGIN = c(1,2), FUN = sum)
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((Tau1 < mod.detail$time) &
(mod.detail$time <= Tau2))
Tau1Times <- which((mod.detail$time <= Tau1))
Tau2Times <- which((mod.detail$time <= Tau2))
lambda0.t.hat <- t(colSums(dNt) / S0t)
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
if (is.null(calibrated)) {
calibrated <- FALSE
}
# ----------------------------------------------------------------------------
# If calibrated weights ------------------------------------------------------
if (calibrated == TRUE) {
if (is.null(A)) {
stop("A matrix with auxiliary variables values need to be provided")
}
# --------------------------------------------------------------------------
# Computation of the influences for the Lagrangian multipliers, eta --------
indiv.phase2 <- row.names(X)
A.phase2 <- A[indiv.phase2, ]
q <- ncol(A.phase2)
A.weighted <- sweep(A.phase2, 1, weights, "*")
AA.weighted <- array(NA, dim = c(q, q, n))
for(i in 1:nrow(A.phase2)) {
AA.weighted[,, i] <- tcrossprod(A.phase2[i,], A.weighted[i,])
}
sum.AA.weighted <- apply(X = AA.weighted, MARGIN = c(1,2), FUN = sum)
sum.AA.weighted.inv <- solve(sum.AA.weighted)
infl1.eta <- A %*% sum.AA.weighted.inv
infl2.eta <- 0 * A
infl2.eta[indiv.phase2,] <- - A.weighted %*% sum.AA.weighted.inv
infl.eta <- infl1.eta + infl2.eta
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
XA <- array(NA, dim = c(p, q, n))
for(i in 1:n) {
XA[,, i] <- tcrossprod(X[i,], A.phase2[i,])
}
drond.G1t.eta <- array(NA, dim = c(p, q, number.times))
drond.G0t.eta <- array(NA, dim = c(q, number.times))
drond.S1t.eta <- array(NA, dim = c(p, q, number.times))
drond.S0t.eta <- array(NA, dim = c(q, number.times))
for(t in 1:number.times) {
drond.G1t.eta[,, t] <- apply(X = sweep(XA, 3, dNt.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.G0t.eta[, t] <- colSums(A.phase2 * matrix(dNt.weighted[, t],
nrow = n, ncol = q,
byrow = FALSE))
drond.S1t.eta[,, t] <- apply(X = sweep(XA, 3, Y.exp.X.weighted[, t],
"*"), MARGIN = c(1,2), FUN = sum)
drond.S0t.eta[, t] <- colSums(A.phase2 * matrix(Y.exp.X.weighted[, t],
nrow = n, ncol = q,
byrow = FALSE))
}
drond.Ut.eta <- array(NA, dim = c(p, q, number.times))
X.expect <- S1t / matrix(S0t, nrow = number.times, ncol = p,
byrow = FALSE)
for(t in 1:number.times) {
drond.Ut.eta[,, t] <- drond.G1t.eta[,, t] - tcrossprod(X.expect[t, ],
drond.G0t.eta[, t]) -
(colSums(dNt.weighted) / S0t)[t] * drond.S1t.eta[,, t] +
(colSums(dNt.weighted) / S0t ^ 2)[t] *
tcrossprod(S1t[t, ], drond.S0t.eta[, t])
}
drond.U.eta <- apply(X = drond.Ut.eta, MARGIN = c(1,2), FUN = sum)
infl1.beta <- infl1.eta %*% t(drond.U.eta) %*% solve(infomat)
score <- array(NA, dim = c(n, p, number.times))
for(i in 1:n) {
score[i,,] <- - sweep(t(X.expect), 1, X[i,], "-")
}
for(t in 1:number.times) {
dMt <- dNt.weighted[, t] - riskmat[, t] *
exp.X.weighted * (colSums(dNt.weighted)/S0t)[t]
score[,, t] <- score[,, t] * matrix(dMt, nrow = n, ncol = p,
byrow = FALSE)
}
score.beta <- apply(X = score, MARGIN = c(1,2), FUN = sum)
infl2.beta <- infl2.eta %*% t(drond.U.eta) %*% solve(infomat)
infl2.beta[indiv.phase2,] <- infl2.beta[indiv.phase2,] +
score.beta %*% solve(infomat)
infl.beta <- infl1.beta + infl2.beta
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
infl1.lambda0.t <- (- (infl1.beta %*% t(S1t) + infl1.eta %*% drond.S0t.eta) *
matrix(lambda0.t.hat, nrow = nrow(A),
ncol = number.times, byrow = TRUE)) /
matrix(S0t, nrow = nrow(A), ncol = number.times,
byrow = TRUE)
infl2.lambda0.t <- 0 * infl1.lambda0.t
infl2.lambda0.t[indiv.phase2,] <- (dNt - (Y.exp.X.weighted +
infl2.beta[indiv.phase2,] %*%
t(S1t) +
infl2.eta[indiv.phase2,] %*%
drond.S0t.eta) *
matrix(lambda0.t.hat, nrow = n,
ncol = number.times,
byrow = TRUE)) /
matrix(S0t, nrow = n,
ncol = number.times,
byrow = TRUE)
infl.lambda0.t <- infl1.lambda0.t + infl2.lambda0.t
infl1.Lambda0.Tau1Tau2 <- rowSums(infl1.lambda0.t[, Tau1Tau2.times])
infl2.Lambda0.Tau1Tau2 <- rowSums(infl2.lambda0.t[, Tau1Tau2.times])
infl.Lambda0.Tau1Tau2 <- infl1.Lambda0.Tau1Tau2 + infl2.Lambda0.Tau1Tau2
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
infl2.beta = infl2.beta, infl2.lambda0.t = infl2.lambda0.t,
infl2.Lambda0.Tau1Tau2 = as.matrix(infl2.Lambda0.Tau1Tau2),
beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat))
}else{
# --------------------------------------------------------------------------
# If design weights --------------------------------------------------------
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
infl.beta <- residuals(mod, type = "dfbeta", weighted = T)
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
infl.lambda0.t <- (dNt - (Y.exp.X.weighted + infl.beta %*% t(S1t)) *
matrix(lambda0.t.hat, nrow = n, ncol = number.times,
byrow = TRUE)) /
matrix(S0t, nrow = n, ncol = number.times,
byrow = TRUE)
infl.Lambda0.Tau1Tau2 <- rowSums(infl.lambda0.t[, Tau1Tau2.times])
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat))
}
}
## -----------------------------------------------------------------------------
## Function: influences.PR()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the pure risk in the
## time interval [Tau1, Tau2] and for covariate profile x, from
## that of the log-relative hazard and cumulative baseline hazard
## [Tau1, Tau2]. It also returns the pure risk estimate. If
## calibrated weights were used and the phase-two influences are
## provided, the function returns the overall influences, as well
## as the phase-two influences
## -----------------------------------------------------------------------------
## Arguments:
##
## beta log-relative hazard
##
## Lambda0.Tau1Tau2 cumulative baseline hazard in the time interval that
## is considered for the pure risk
##
## x vector of length p, specifying the covariate profile
## considered for the pure risk. Default is (0,...,0)
##
## infl.beta log-relative hazard influences
##
## infl.Lambda0.Tau1Tau2 influences for the cumulative baseline hazard in the
## same time interval as that of the pure risk
##
## calibrated are calibrated weights used for the estimation of
## the log-relative hazard, cumulative baseline hazard
## and pure risk? If calibrated = TRUE, the arguments
## below need to be provided
##
## infl2.beta phase-two influences for the log-relative hazard
##
## infl2.Lambda0.Tau1Tau2 phase-two influences for the cumulative baseline
## hazard in the same time interval as that of the pure
## risk
## -----------------------------------------------------------------------------
influences.PR <- function (beta, Lambda0.Tau1Tau2, x = NULL, infl.beta,
infl.Lambda0.Tau1Tau2, calibrated = NULL,
infl2.beta = NULL, infl2.Lambda0.Tau1Tau2 = NULL) {
# ----------------------------------------------------------------------------
# Quantities needed for the influences ---------------------------------------
p <- length(beta)
if ((length(x) != p) | (is.null(x))) {
x <- rep(0, p)
} # if no covariate profile provided, use 0 as reference level
exp.x <- c(exp(x %*% beta))
Pi.x <- 1 - exp(- exp.x * Lambda0.Tau1Tau2)
if (is.null(calibrated)) {
calibrated <- FALSE
}
# ----------------------------------------------------------------------------
# If calibrated weights ------------------------------------------------------
if (calibrated == TRUE) {
if (is.null(infl2.beta) | is.null(infl2.Lambda0.Tau1Tau2)) {
stop("Phase-two influences must be provided for the log-RH and CumBH")
}
infl1.beta <- infl.beta - infl2.beta
infl1.Lambda0.Tau1Tau2 <- infl.Lambda0.Tau1Tau2 - infl2.Lambda0.Tau1Tau2
infl1.Pi.x <- c((1 - Pi.x) * exp.x * Lambda0.Tau1Tau2) *
infl1.beta %*% x + c((1 - Pi.x) * exp.x) *
infl1.Lambda0.Tau1Tau2
infl2.Pi.x <- c((1 - Pi.x) * exp.x * Lambda0.Tau1Tau2) *
infl2.beta %*% x + c((1 - Pi.x) * exp.x) *
infl2.Lambda0.Tau1Tau2
infl.Pi.x <- infl1.Pi.x + infl2.Pi.x
return(list(infl.Pi.x.Tau1Tau2 = infl.Pi.x, infl2.Pi.x.Tau1Tau2 = infl2.Pi.x,
Pi.x.Tau1Tau2.hat = Pi.x))
}else{
# --------------------------------------------------------------------------
# If design weights --------------------------------------------------------
infl.Pi.x <- c((1 - Pi.x) * exp.x * Lambda0.Tau1Tau2) * infl.beta %*% x +
c((1 - Pi.x) * exp.x) * infl.Lambda0.Tau1Tau2
return(list(infl.Pi.x.Tau1Tau2 = infl.Pi.x, Pi.x.Tau1Tau2.hat = Pi.x))
}
}
## -----------------------------------------------------------------------------
## Function: auxiliary.construction()
## -----------------------------------------------------------------------------
## Description: This function returns the auxiliary variables proposed by
## Breslow et al. (Stat. Biosci., 2009) and Breslow et al. (IMS,
## 2013). They are based on the relative hazard and cumulative
## baseline hazard influences, when using the cohort data with
## imputed covariates values
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph run on the
## cohort data with imputed covariates values
##
## Tau1 left bound of the time interval considered for the
## cumulative baseline hazard. Default is the first event time
##
## Tau2 right bound of the time interval considered for the
## cumulative baseline hazard. Default is the last event time
## -----------------------------------------------------------------------------
auxiliary.construction.orig <- function (mod, Tau1 = NULL, Tau2 = NULL) {
# ----------------------------------------------------------------------------
# Quantities needed for the influences ---------------------------------------
mod.detail <- coxph.detail(mod, riskmat = T)
riskmat <- mod.detail$riskmat
number.times <- ncol(riskmat)
n <- nrow(riskmat)
X <- model.matrix(mod)
p <- ncol(X)
weights <- mod$weights # weights used for the fit, omega_i
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1. Should be the case here
beta.hat <- mod$coefficients
exp.X.weighted <- weights * exp(X %*% beta.hat)
Y.exp.X.weighted <- riskmat * matrix(exp.X.weighted, nrow = n,
ncol = number.times, byrow = FALSE)
S0t <- t(riskmat) %*% (exp.X.weighted)
S1t <- t(riskmat) %*% (X * matrix(exp.X.weighted, nrow = n,
ncol = p, byrow = FALSE))
observed.times <- apply(riskmat, 1, function(v) {which.max(cumsum(v))})
dNt <- matrix(0, n, number.times)
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- dNt * matrix(mod$y[, ncol(mod$y)], nrow(riskmat),
number.times, byrow = FALSE)
lambda0.t.hat <- t(colSums(dNt) / S0t)
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((Tau1 < mod.detail$time) & (mod.detail$time <= Tau2))
# ----------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta ----------------
infl.beta <- residuals(mod, type = "dfbeta", weighted = T)
# ----------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] ---------------------------------------------------------------
infl.lambda0.t <- (dNt - (Y.exp.X.weighted + infl.beta %*% t(S1t)) *
matrix(lambda0.t.hat, nrow = n, ncol = number.times,
byrow = TRUE)) /
matrix(S0t, nrow = n, ncol = number.times,
byrow = TRUE)
infl.Lambda0.Tau1Tau2 <- rowSums(infl.lambda0.t[, Tau1Tau2.times])
return(list(A.RH = infl.beta, A.CumBH = infl.Lambda0.Tau1Tau2))
}
## -----------------------------------------------------------------------------
## Function: product.covar.weight()
## -----------------------------------------------------------------------------
## Description: This function computes the product of the joint design weights
## and joint sampling indicators covariances, needed for the
## phase-two component of the variance
## -----------------------------------------------------------------------------
## Argument:
##
## stratified was the sampling of the case-cohort stratified on W?
##
## casecohort if stratified = TRUE, data frame with status (case status),
## W (strata), strata.m (number of sampled individuals in the
## stratum) and strata.n (stratum size in the cohhort), for
## each individual in the stratified case cohort data. If
## stratified = FALSE,data frame with status (case status), m
## (number of sampled individuals) and n (cohort size), for
## each individual inthe un-stratified case cohort data
## -----------------------------------------------------------------------------
product.covar.weight <- function (casecohort, stratified = NULL) {
if (is.null(stratified)) {
stratified <- FALSE
}
if(stratified == TRUE){
noncases <- which(casecohort$status == 0)
n.noncases <- length(noncases)
casecohort.W <- casecohort$W[noncases]
casecohort.m <- casecohort$strata.m[noncases]
casecohort.n <- casecohort$strata.n[noncases]
casecohort.m.n <- casecohort.m / casecohort.n
casecohort.n.m <- casecohort.n / casecohort.m
#---------------------------------------------------------------------------
# Computation of the joint design weights, given by 1 over the sampling
# indicators probabilities -------------------------------------------------
omega <- function (indiv) {
return((casecohort.W == casecohort.W[indiv]) * casecohort.n.m *
(casecohort.n - 1) / (casecohort.m - 1))
}
Omega <- do.call(rbind, lapply(1:n.noncases, omega))
diag(Omega) <- casecohort.n.m # omega_ij = 1 / P(Xi_i = Xi_j = 1), i \neq j, and the omega_ii = omega_i = 1 / P(Xi_i = 1)
#---------------------------------------------------------------------------
# Computation of the joint sampling indicators covariances -----------------
covar <- function (indiv) {
return((casecohort.W == casecohort.W[indiv]) *
(casecohort.m.n * (casecohort.m - 1) / (casecohort.n - 1) -
casecohort.m.n^2))
}
Covar <- do.call(rbind, lapply(1:n.noncases, covar))
diag(Covar) <- casecohort.m.n * (1 - casecohort.m.n)
} else {
noncases <- which(casecohort$status == 0)
n.noncases <- length(noncases)
casecohort.m <- casecohort$m[noncases]
casecohort.n <- casecohort$n[noncases]
casecohort.m.n <- casecohort.m / casecohort.n
casecohort.n.m <- casecohort.n / casecohort.m
#---------------------------------------------------------------------------
# Computation of the joint design weights, given by 1 over the sampling
# indicators probabilities -------------------------------------------------
Omega <- matrix(casecohort.n.m * (casecohort.n - 1) / (casecohort.m - 1),
nrow = n.noncases, ncol = n.noncases)
diag(Omega) <- casecohort.n.m
#---------------------------------------------------------------------------
# Computation of the joint sampling indicators covariances -----------------
Covar <- matrix(casecohort.m.n * (casecohort.m - 1) / (casecohort.n - 1) -
casecohort.m.n ^ 2, nrow = n.noncases, ncol = n.noncases)
diag(Covar) <- casecohort.m.n * (1 - casecohort.m.n)
}
#-----------------------------------------------------------------------------
# Computation of the product -------------------------------------------------
product.covar.weight <- Covar * Omega # sigma_ij * omega_ij
product.covar.weight[which(is.na(product.covar.weight))] <- 0
return(product.covar.weight)
}
## -----------------------------------------------------------------------------
## Function: conf.interval()
## -----------------------------------------------------------------------------
## Description: This function computes the 95% confidence interval of a
## parameter estimate and its variance estimate, assuming normality
## -----------------------------------------------------------------------------
## Arguments:
##
## param.estimate parameter estimate
##
## var.estimate variance estimate for the parameter
## -----------------------------------------------------------------------------
conf.interval <- function (param.estimate, var.estimate) {
return(c(param.estimate - sqrt(var.estimate) * qnorm(0.975),
param.estimate + sqrt(var.estimate) * qnorm(0.975)))
}
## -----------------------------------------------------------------------------
## Function: influences.missingdata()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the log-relative
## hazard, baseline hazards at each unique event time, cumulative
## baseline hazard in the time interval [Tau1, Tau2] and pure risk
## in [Tau1, Tau2] and for covariate profile x, when covariate data
## is missing for individuals in the case cohort. It returns the
## overall influences, as well as the phase-two and phase-three
## influences. It also returns the parameters estimates
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## Tau1 left bound of the time interval considered for the
## cumulative baseline hazard and pure risk. Default is the
## first event time
##
## Tau2 right bound of the time interval considered for the
## cumulative baseline hazard and pure risk. Default is the
## last event time
##
## x vector of length p, specifying the covariate profile
## considered for the pure risk. Default is (0,...,0)
##
## estimated.weights are the weights for the third phase of sampling (due to
## missingness) estimated ? If estimated.weights = TRUE,
## B.phase2 needs to be provided
##
## B.phase2 matrix for the phase.two data with as many columns as
## phase-three strata, with one 1 per row, to indicate the
## phase-three stratum position of the whole cohort. This
## argument needs to be provided if
## estimated.weights = TRUE
##
## riskmat.phase2 at risk matrix for the phase-two data at all of the
## cases event times, even that with missing covariate
## data
##
## dNt.phase2 counting process matrix for failures in the phase-two
## data. If status.phase2 = NULL, this argument needs to be
## provided
##
## status.phase2 vector indicating the case status in the phase-two data
## If dNt.phase2 = NULL, this argument needs to be provided
## -----------------------------------------------------------------------------
influences.missingdata <- function (mod, riskmat.phase2, dNt.phase2 = NULL,
status.phase2 = NULL,
Tau1 = NULL, Tau2 = NULL, x = NULL,
estimated.weights = FALSE, B.phase2 = NULL) {
if (is.null(estimated.weights)) {
estimated.weights <- FALSE
}
if (is.null(dNt.phase2)&is.null(status.phase2)) {
stop("The status for the phase-two data, or the counting process matrix
for failures in the phase-two data, need to be provided")
} else {
if (is.null(dNt.phase2)) {
# ------------------------------------------------------------------------
# If not provided, compute the counting process matrix for failures in the
# phase-two data ---------------------------------------------------------
observed.times.phase2 <- apply(riskmat.phase2, 1,
function(v) {which.max(cumsum(v))})
dNt.phase2 <- matrix(0, nrow(riskmat.phase2),
ncol(riskmat.phase2))
dNt.phase2[cbind(1:nrow(riskmat.phase2), observed.times.phase2)] <- 1
dNt.phase2 <- sweep(dNt.phase2, 1, status.phase2, "*")
}
}
# ----------------------------------------------------------------------------
# Quantities needed for the estimation ---------------------------------------
mod.detail <- coxph.detail(mod, riskmat = TRUE)
riskmat <- mod.detail$riskmat
number.times <- ncol(riskmat)
number.times.phase2 <- ncol(riskmat.phase2)
eventtimes.phase2 <- as.numeric(colnames(riskmat.phase2))
X <- model.matrix(mod)
p <- ncol(X)
n.phase3 <- nrow(X)
n.phase2 <- nrow(riskmat.phase2)
indiv.phase3 <- row.names(X)
riskmat.phase3 <- riskmat.phase2[indiv.phase3, ]
beta.hat <- mod$coefficients
weights <- mod$weights # use the cases' actual failure times, known
# even for cases with missing covariate data
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
exp.X.weighted <- weights * exp(X %*% beta.hat)
observed.times <- apply(riskmat, 1, function(v) {which.max(cumsum(v))})
dNt <- matrix(0, n.phase3, number.times)
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- dNt * matrix(mod$y[, ncol(mod$y)], nrow(riskmat),
number.times, byrow = FALSE)
dNt.weighted <- dNt * matrix(weights, nrow = n.phase3,
ncol = number.times, byrow = FALSE)
infomat.indiv <- mod.detail$imat
if (!is.null(dim(infomat.indiv))) {
infomat <- apply(X = infomat.indiv, MARGIN = c(1,2), FUN = sum)
} else {
infomat <- sum(infomat.indiv)
}
# can also be obtained from the following code:
#XX <- array(NA, dim = c(p, p, n.phase3))
#for(i in 1:n.phase3){
# XX[,, i] <- tcrossprod(X[i,])
#}
#S2t <- array(NA, dim = c(p, p, number.times))
#S1S1t <- array(NA, dim = c(p, p, number.times))
#for(t in 1:number.times){
# S1S1t[,, t] <- tcrossprod(S1t[t, ])
# S2t[,, t] <- apply(X = sweep(XX, 3, riskmat[, t] * exp.X.weighted, "*"), MARGIN = c(1,2), FUN = sum)
#}
#infomat <- rowSums((sweep(S2t, 3, colSums(dNt.weighted)/S0t, "*") - sweep(S1S1t, 3, colSums(dNt.weighted)/(S0t ^ 2), "*")), dims = 2)
Y.exp.X.weighted <- sweep(riskmat, 1, exp.X.weighted, "*")
Y.exp.X.weighted.casestimes <- sweep(riskmat.phase3, 1, exp.X.weighted, "*")
S0t <- t(riskmat) %*% (exp.X.weighted)
S1t <- t(riskmat) %*% (sweep(X, 1, exp.X.weighted, "*"))
X.expect <- S1t / matrix(S0t, nrow = number.times, ncol = p, byrow = FALSE)
S0t.casestimes <- t(riskmat.phase3) %*% (exp.X.weighted)
S1t.casestimes <- t(riskmat.phase3) %*% (sweep(X, 1, exp.X.weighted, "*"))
score <- array(NA, dim = c(n.phase3, p, number.times))
for(i in 1:n.phase3){
score[i,,] <- - sweep(t(X.expect), 1, X[i,], "-")
}
for(t in 1:number.times) {
dMt <- dNt.weighted[, t] - riskmat[, t] * exp.X.weighted *
(colSums(dNt.weighted) / S0t)[t]
score[,, t] <- score[,, t] * matrix(dMt, nrow = n.phase3, ncol = p,
byrow = FALSE)
}
score.beta <- apply(X = score, MARGIN = c(1,2), FUN = sum)
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((Tau1 < eventtimes.phase2) &
(eventtimes.phase2 <= Tau2))
Tau1Times <- which((eventtimes.phase2 <= Tau1))
Tau2Times <- which((eventtimes.phase2 <= Tau2))
lambda0.t.hat <- t(colSums(dNt.phase2) / S0t.casestimes)
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
if ((length(x) != p) | (is.null(x))) {
x <- rep(0, p)
} # if no covariate profile provided, use 0 as reference level
exp.x <- c(exp(x %*% beta.hat))
Pi.x.hat <- 1 - exp(- exp.x * Lambda0.Tau1Tau2.hat)
# ----------------------------------------------------------------------------
# If missingness weights estimated -------------------------------------------
if (estimated.weights == TRUE) {
if (is.null(B.phase2)) {
stop("Strata for the third phase of sampling (missingness) must be provided")
}
# --------------------------------------------------------------------------
# Computation of the influences for gamma ----------------------------------
J3 <- ncol(B.phase2)
B.phase3 <- B.phase2[indiv.phase3, ]
weights.p3.est <- estimation.weights.phase3(B.phase3 = B.phase3,
total.phase2 = colSums(B.phase2),
gamma0 = rep(0, J3), niter.max = 10^3,
epsilon.stop = 10^(-10))$estimated.weights
B.p3weights <- sweep(B.phase3, 1, weights.p3.est, "*")
BB.p3weights <- array(NA, dim = c(J3, J3, n.phase3))
for (i in 1:n.phase3) {
BB.p3weights[,, i] <- tcrossprod(B.phase3[i,], B.p3weights[i,])
}
sum.BB.p3weights <- apply(X = BB.p3weights, MARGIN = c(1,2), FUN = sum)
sum.BB.p3weights.inv <- solve(sum.BB.p3weights)
infl2.gamma <- B.phase2 %*% sum.BB.p3weights.inv
infl3.gamma <- 0 * B.phase2
infl3.gamma[indiv.phase3,] <- - B.p3weights %*% sum.BB.p3weights.inv
infl.gamma <- infl2.gamma + infl3.gamma
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
XB <- array(NA, dim = c(p, J3, n.phase3))
for (i in 1:n.phase3) {
XB[,, i] = tcrossprod(X[i,], B.phase3[i,])
}
drond.G1t.gamma <- array(NA, dim = c(p, J3, number.times))
drond.G0t.gamma <- array(NA, dim = c(J3, number.times))
drondS1.gamma <- array(NA, dim = c(p, J3, number.times))
drond.S0t.gamma <- array(NA, dim = c(J3, number.times))
for (t in 1:number.times) {
drond.G1t.gamma[,, t] <- apply(X = sweep(XB, 3, dNt.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.G0t.gamma[, t] <- colSums(B.phase3 * matrix(dNt.weighted[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
drondS1.gamma[,, t] <- apply(X = sweep(XB, 3,
Y.exp.X.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.S0t.gamma[, t] <- colSums(B.phase3 * matrix(Y.exp.X.weighted[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
}
drond.Ut.gamma <- array(NA, dim = c(p, J3, number.times))
for (t in 1:number.times) {
drond.Ut.gamma[,, t] <- drond.G1t.gamma[,, t] -
tcrossprod(X.expect[t, ], drond.G0t.gamma[, t]) -
(colSums(dNt.weighted) / S0t)[t] * drondS1.gamma[,, t] +
(colSums(dNt.weighted) / S0t ^ 2)[t] * tcrossprod(S1t[t, ],
drond.S0t.gamma[, t])
}
drond.U.gamma <- apply(X = drond.Ut.gamma, MARGIN = c(1,2), FUN = sum)
infl2.beta <- infl2.gamma %*% t(drond.U.gamma) %*% solve(infomat)
infl3.beta <- infl3.gamma %*% t(drond.U.gamma) %*% solve(infomat)
infl3.beta[indiv.phase3, ] <- infl3.beta[indiv.phase3,] + score.beta %*%
solve(infomat)
infl.beta <- infl2.beta + infl3.beta
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
drondS1.gamma.casestimes <- array(NA, dim = c(p, J3, number.times.phase2))
drond.S0t.gamma.casestimes <- array(NA, dim = c(J3, number.times.phase2))
for (t in 1:number.times.phase2) {
drondS1.gamma.casestimes[,, t] <- apply(X = sweep(XB, 3,
Y.exp.X.weighted.casestimes[, t],
"*"),
MARGIN = c(1,2), FUN = sum)
drond.S0t.gamma.casestimes[, t] <- colSums(B.phase3 * matrix(Y.exp.X.weighted.casestimes[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
}
infl2.lambda0.t <- (dNt.phase2 - (infl2.beta %*% t(S1t.casestimes) +
infl2.gamma %*% drond.S0t.gamma.casestimes) *
matrix(lambda0.t.hat, nrow = n.phase2,
ncol = number.times.phase2, byrow = TRUE)) /
matrix(S0t.casestimes, nrow = n.phase2,
ncol = number.times.phase2, byrow = TRUE)
infl3.lambda0.t <- 0 * infl2.lambda0.t
infl3.lambda0.t[indiv.phase3,] <- - (Y.exp.X.weighted.casestimes +
infl3.beta[indiv.phase3,] %*%
t(S1t.casestimes) +
infl3.gamma[indiv.phase3,] %*%
drond.S0t.gamma.casestimes) *
matrix(lambda0.t.hat,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE) /
matrix(S0t.casestimes,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE)
infl.lambda0.t <- infl2.lambda0.t + infl3.lambda0.t
infl2.Lambda0.Tau1Tau2 <- rowSums(infl2.lambda0.t[, Tau1Tau2.times])
infl3.Lambda0.Tau1Tau2 <- rowSums(infl3.lambda0.t[, Tau1Tau2.times])
infl.Lambda0.Tau1Tau2 <- infl2.Lambda0.Tau1Tau2 +
infl3.Lambda0.Tau1Tau2
# --------------------------------------------------------------------------
# Computation of the influences for the pure risk in [Tau1, Tau2] and for
# covariate profile x ------------------------------------------------------
infl2.Pi.x <- c((1 - Pi.x.hat) * exp.x * Lambda0.Tau1Tau2.hat) *
infl2.beta %*% x + c((1 - Pi.x.hat) * exp.x) *
infl2.Lambda0.Tau1Tau2
infl3.Pi.x <- c((1 - Pi.x.hat) * exp.x * Lambda0.Tau1Tau2.hat) *
infl3.beta %*% x + c((1 - Pi.x.hat) * exp.x) *
infl3.Lambda0.Tau1Tau2
infl.Pi.x <- infl2.Pi.x + infl3.Pi.x
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
infl.Pi.x.Tau1Tau2 = infl.Pi.x, infl2.beta = infl2.beta,
infl2.lambda0.t = infl2.lambda0.t,
infl2.Lambda0.Tau1Tau2 = as.matrix(infl2.Lambda0.Tau1Tau2),
infl2.Pi.x.Tau1Tau2 = infl2.Pi.x, infl3.beta = infl3.beta,
infl3.lambda0.t = infl3.lambda0.t,
infl3.Lambda0.Tau1Tau2 = as.matrix(infl3.Lambda0.Tau1Tau2),
infl3.Pi.x.Tau1Tau2 = infl3.Pi.x, beta.hat = beta.hat,
lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat,
Pi.x.Tau1Tau2.hat = Pi.x.hat))
} else {
# --------------------------------------------------------------------------
# If missingness weights known ---------------------------------------------
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
infl2.beta <- matrix(0, nrow = nrow(riskmat.phase2), ncol = p)
infl3.beta <- infl2.beta
rownames(infl3.beta) <- rownames(riskmat.phase2)
infl3.beta[indiv.phase3,] <- score.beta %*% solve(infomat)
infl.beta <- infl2.beta + infl3.beta
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
infl2.lambda0.t <- (dNt.phase2) / matrix(S0t.casestimes,
nrow = nrow(dNt.phase2),
ncol = number.times.phase2,
byrow = TRUE)
infl3.lambda0.t <- 0 * infl2.lambda0.t
infl3.lambda0.t[indiv.phase3,] <- - (Y.exp.X.weighted.casestimes +
infl3.beta[indiv.phase3,] %*%
t(S1t.casestimes)) *
matrix(lambda0.t.hat,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE) /
matrix(S0t.casestimes,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE)
infl.lambda0.t <- infl2.lambda0.t + infl3.lambda0.t
infl2.Lambda0.Tau1Tau2 <- rowSums(infl2.lambda0.t[, Tau1Tau2.times])
infl3.Lambda0.Tau1Tau2 <- rowSums(infl3.lambda0.t[, Tau1Tau2.times])
infl.Lambda0.Tau1Tau2 <- infl2.Lambda0.Tau1Tau2 + infl3.Lambda0.Tau1Tau2
# --------------------------------------------------------------------------
# Computation of the influences for the pure risk in [Tau1, Tau2] and for
# covariate profile x ------------------------------------------------------
infl2.Pi.x <- c((1 - Pi.x.hat) * exp.x * Lambda0.Tau1Tau2.hat) *
infl2.beta %*% x + c((1 - Pi.x.hat) * exp.x) *
infl2.Lambda0.Tau1Tau2
infl3.Pi.x <- c((1 - Pi.x.hat) * exp.x * Lambda0.Tau1Tau2.hat) *
infl3.beta %*% x + c((1 - Pi.x.hat) * exp.x) *
infl3.Lambda0.Tau1Tau2
infl.Pi.x <- infl2.Pi.x + infl3.Pi.x
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
infl.Pi.x.Tau1Tau2 = infl.Pi.x, infl2.beta = infl2.beta,
infl2.lambda0.t = infl2.lambda0.t,
infl2.Lambda0.Tau1Tau2 = as.matrix(infl2.Lambda0.Tau1Tau2),
infl2.Pi.x.Tau1Tau2 = infl2.Pi.x, infl3.beta = infl3.beta,
infl3.lambda0.t = infl3.lambda0.t,
infl3.Lambda0.Tau1Tau2 = as.matrix(infl3.Lambda0.Tau1Tau2),
infl3.Pi.x.Tau1Tau2 = infl3.Pi.x, beta.hat = beta.hat,
lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat,
Pi.x.Tau1Tau2.hat = Pi.x.hat))
}
}
## -----------------------------------------------------------------------------
## Function: influences.RH.missingdata()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the log-relative
## hazard, when covariate data is missing for individuals in the
## case cohort. It returns the overall influences, as well as the
## phase-two and phase-three influences and parameter estimate
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## estimated.weights are the weights for the third phase of sampling (due to
## missingness) estimated ? If estimated.weights = TRUE,
## B needs to be provided
##
## B.phase2 matrix for the phase.two data with as many columns as
## phase-three strata, with one 1 per row, to indicate the
## phase-three stratum position of the whole cohort. This
## argument needs to be provided if
## estimated.weights = TRUE
##
## riskmat.phase2 at risk matrix for the phase-two data at all of the
## cases event times, even that with missing covariate
## data
##
## dNt.phase2 counting process matrix for failures in the phase-two
## data. If status.phase2 = NULL, this argument needs to be
## provided
##
## status.phase2 vector indicating the case status in the phase-two data
## If dNt.phase2 = NULL, this argument needs to be provided
## -----------------------------------------------------------------------------
influences.RH.missingdata <- function (mod, riskmat.phase2, dNt.phase2 = NULL,
status.phase2 = NULL,
estimated.weights = FALSE,
B.phase2 = NULL) {
if (is.null(estimated.weights)) {
estimated.weights <- FALSE
}
if (is.null(dNt.phase2)&is.null(status.phase2)) {
stop("The status for the phase-two data, or the counting process matrix
for failures in the phase-two data, need to be provided")
} else {
if (is.null(dNt.phase2)) {
# ------------------------------------------------------------------------
# If not provided, compute the counting process matrix for failures in the
# phase-two data ---------------------------------------------------------
observed.times.phase2 <- apply(riskmat.phase2, 1,
function(v) {which.max(cumsum(v))})
dNt.phase2 <- matrix(0, nrow(riskmat.phase2),
ncol(riskmat.phase2))
dNt.phase2[cbind(1:nrow(riskmat.phase2), observed.times.phase2)] <- 1
dNt.phase2 <- sweep(dNt.phase2, 1, status.phase2, "*")
}
}
# ----------------------------------------------------------------------------
# Quantities needed for the estimation ---------------------------------------
mod.detail <- coxph.detail(mod, riskmat = TRUE)
riskmat <- mod.detail$riskmat
number.times <- ncol(riskmat)
number.times.phase2 <- ncol(riskmat.phase2)
X <- model.matrix(mod)
p <- ncol(X)
n.phase3 <- nrow(X)
n.phase2 <- nrow(riskmat.phase2)
indiv.phase3 <- row.names(X)
riskmat.phase3 <- riskmat.phase2[indiv.phase3, ]
beta.hat <- mod$coefficients
weights <- mod$weights # use the cases' actual failure times, known
# even for cases with missing covariate data
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
exp.X.weighted <- weights * exp(X %*% beta.hat)
observed.times <- apply(riskmat, 1, function(v) {which.max(cumsum(v))})
dNt <- matrix(0, n.phase3, number.times)
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- dNt * matrix(mod$y[, ncol(mod$y)], nrow(riskmat),
number.times, byrow = FALSE)
dNt.weighted <- dNt * matrix(weights, nrow = n.phase3,
ncol = number.times, byrow = FALSE)
infomat.indiv <- mod.detail$imat
infomat <- apply(X = infomat.indiv, MARGIN = c(1,2), FUN = sum)
indiv.phase3 <- row.names(X)
riskmat.phase3 <- riskmat.phase2[indiv.phase3, ]
Y.exp.X.weighted <- sweep(riskmat, 1, exp.X.weighted, "*")
Y.exp.X.weighted.casestimes <- sweep(riskmat.phase3, 1, exp.X.weighted, "*")
S0t <- t(riskmat) %*% (exp.X.weighted)
S1t <- t(riskmat) %*% (sweep(X, 1, exp.X.weighted, "*"))
X.expect <- S1t / matrix(S0t, nrow = number.times, ncol = p, byrow = FALSE)
S0t.casestimes <- t(riskmat.phase3) %*% (exp.X.weighted)
S1t.casestimes <- t(riskmat.phase3) %*% (sweep(X, 1, exp.X.weighted, "*"))
score <- array(NA, dim = c(n.phase3, p, number.times))
for(i in 1:n.phase3){
score[i,,] <- - sweep(t(X.expect), 1, X[i,], "-")
}
for(t in 1:number.times) {
dMt <- dNt.weighted[, t] - riskmat[, t] * exp.X.weighted *
(colSums(dNt.weighted) / S0t)[t]
score[,, t] <- score[,, t] * matrix(dMt, nrow = n.phase3, ncol = p,
byrow = FALSE)
}
score.beta <- apply(X = score, MARGIN = c(1,2), FUN = sum)
# ----------------------------------------------------------------------------
# If missingness weights estimated -------------------------------------------
if (estimated.weights == TRUE) {
if (is.null(B.phase2)) {
stop("Strata for the third phase of sampling (missingness) must be provided")
}
# --------------------------------------------------------------------------
# Computation of the influences for gamma ----------------------------------
J3 <- ncol(B.phase2)
B.phase3 <- B.phase2[indiv.phase3, ]
weights.p3.est <- estimation.weights.phase3(B.phase3 = B.phase3,
total.phase2 = colSums(B.phase2),
gamma0 = rep(0, J3), niter.max = 10^3,
epsilon.stop = 10^(-10))$estimated.weights
B.p3weights <- sweep(B.phase3, 1, weights.p3.est, "*")
BB.p3weights <- array(NA, dim = c(J3, J3, n.phase3))
for (i in 1:n.phase3) {
BB.p3weights[,, i] <- tcrossprod(B.phase3[i,], B.p3weights[i,])
}
sum.BB.p3weights <- apply(X = BB.p3weights, MARGIN = c(1,2), FUN = sum)
sum.BB.p3weights.inv <- solve(sum.BB.p3weights)
infl2.gamma <- B.phase2 %*% sum.BB.p3weights.inv
infl3.gamma <- 0 * B.phase2
infl3.gamma[indiv.phase3,] <- - B.p3weights %*% sum.BB.p3weights.inv
infl.gamma <- infl2.gamma + infl3.gamma
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
XB <- array(NA, dim = c(p, J3, n.phase3))
for (i in 1:n.phase3) {
XB[,, i] = tcrossprod(X[i,], B.phase3[i,])
}
drond.G1t.gamma <- array(NA, dim = c(p, J3, number.times))
drond.G0t.gamma <- array(NA, dim = c(J3, number.times))
drondS1.gamma <- array(NA, dim = c(p, J3, number.times))
drond.S0t.gamma <- array(NA, dim = c(J3, number.times))
for (t in 1:number.times) {
drond.G1t.gamma[,, t] <- apply(X = sweep(XB, 3, dNt.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.G0t.gamma[, t] <- colSums(B.phase3 * matrix(dNt.weighted[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
drondS1.gamma[,, t] <- apply(X = sweep(XB, 3,
Y.exp.X.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.S0t.gamma[, t] <- colSums(B.phase3 * matrix(Y.exp.X.weighted[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
}
drond.Ut.gamma <- array(NA, dim = c(p, J3, number.times))
for (t in 1:number.times) {
drond.Ut.gamma[,, t] <- drond.G1t.gamma[,, t] -
tcrossprod(X.expect[t, ], drond.G0t.gamma[, t]) -
(colSums(dNt.weighted) / S0t)[t] * drondS1.gamma[,, t] +
(colSums(dNt.weighted) / S0t ^ 2)[t] * tcrossprod(S1t[t, ],
drond.S0t.gamma[, t])
}
drond.U.gamma <- apply(X = drond.Ut.gamma, MARGIN = c(1,2), FUN = sum)
infl2.beta <- infl2.gamma %*% t(drond.U.gamma) %*% solve(infomat)
infl3.beta <- infl3.gamma %*% t(drond.U.gamma) %*% solve(infomat)
infl3.beta[indiv.phase3, ] <- infl3.beta[indiv.phase3,] + score.beta %*%
solve(infomat)
infl.beta <- infl2.beta + infl3.beta
return(list(infl.beta = infl.beta, infl2.beta = infl2.beta,
infl3.beta = infl3.beta, beta.hat = beta.hat))
} else {
# --------------------------------------------------------------------------
# If missingness weights known ---------------------------------------------
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
infl2.beta <- matrix(0, nrow = nrow(riskmat.phase2), ncol = p)
infl3.beta <- infl2.beta
rownames(infl3.beta) <- rownames(riskmat.phase2)
infl3.beta[indiv.phase3,] <- score.beta %*% solve(infomat)
infl.beta <- infl2.beta + infl3.beta
return(list(infl.beta = infl.beta, infl2.beta = infl2.beta,
infl3.beta = infl3.beta, beta.hat = beta.hat))
}
}
## -----------------------------------------------------------------------------
## Function: influences.CumBH.missingdata()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the log-relative
## hazard, baseline hazards at each unique event time and
## cumulative baseline hazard in the time interval [Tau1, Tau2],
## when covariate data is missing for individuals in the case
## cohort. It returns the overall influences, as well as the
## phase-two and phase-three influences. It also returns the
## parameters estimates
## -----------------------------------------------------------------------------
## Arguments:
##
## mod a cox model object, result of function coxph
##
## Tau1 left bound of the time interval considered for the
## cumulative baseline hazard. Default is the first event
## time
##
## Tau2 right bound of the time interval considered for the
## cumulative baseline hazard. Default is the last event
## time
##
## estimated.weights are the weights for the third phase of sampling (due to
## missingness) estimated ? If estimated.weights = TRUE,
## B needs to be provided
##
## B.phase2 matrix for the phase.two data with as many columns as
## phase-three strata, with one 1 per row, to indicate the
## phase-three stratum position of the whole cohort. This
## argument needs to be provided if
## estimated.weights = TRUE
##
## riskmat.phase2 at risk matrix for the phase-two data at all of the
## cases event times, even that with missing covariate
## data
##
## dNt.phase2 counting process matrix for failures in the phase-two
## data. If status.phase2 = NULL, this argument needs to be
## provided
##
## status.phase2 vector indicating the case status in the phase-two data
## If dNt.phase2 = NULL, this argument needs to be provided
## -----------------------------------------------------------------------------
influences.CumBH.missingdata <- function (mod, riskmat.phase2,
dNt.phase2 = NULL,
status.phase2 = NULL, Tau1 = NULL,
Tau2 = NULL,
estimated.weights = FALSE,
B.phase2 = NULL) {
if (is.null(estimated.weights)) {
estimated.weights <- FALSE
}
if (is.null(dNt.phase2)&is.null(status.phase2)) {
stop("The status for the phase-two data, or the counting process matrix
for failures in the phase-two data, need to be provided")
} else {
if (is.null(dNt.phase2)) {
# ------------------------------------------------------------------------
# If not provided, compute the counting process matrix for failures in the
# phase-two data ---------------------------------------------------------
observed.times.phase2 <- apply(riskmat.phase2, 1,
function(v) {which.max(cumsum(v))})
dNt.phase2 <- matrix(0, nrow(riskmat.phase2),
ncol(riskmat.phase2))
dNt.phase2[cbind(1:nrow(riskmat.phase2), observed.times.phase2)] <- 1
dNt.phase2 <- sweep(dNt.phase2, 1, status.phase2, "*")
}
}
# ----------------------------------------------------------------------------
# Quantities needed for the estimation ---------------------------------------
mod.detail <- coxph.detail(mod, riskmat = TRUE)
riskmat <- mod.detail$riskmat
number.times <- ncol(riskmat)
number.times.phase2 <- ncol(riskmat.phase2)
eventtimes.phase2 <- as.numeric(colnames(riskmat.phase2))
X <- model.matrix(mod)
p <- ncol(X)
n.phase3 <- nrow(X)
n.phase2 <- nrow(riskmat.phase2)
indiv.phase3 <- row.names(X)
riskmat.phase3 <- riskmat.phase2[indiv.phase3, ]
beta.hat <- mod$coefficients
weights <- mod$weights # use the cases' actual failure times, known
# even for cases with missing covariate data
if (is.null(weights)) {
weights <- 1
} # when using the whole cohort, omega_i = 1
exp.X.weighted <- weights * exp(X %*% beta.hat)
observed.times <- apply(riskmat, 1, function(v) {which.max(cumsum(v))})
dNt <- matrix(0, n.phase3, number.times)
dNt[cbind(1:nrow(riskmat), observed.times)] <- 1
dNt <- dNt * matrix(mod$y[, ncol(mod$y)], nrow(riskmat),
number.times, byrow = FALSE)
dNt.weighted <- dNt * matrix(weights, nrow = n.phase3,
ncol = number.times, byrow = FALSE)
infomat.indiv <- mod.detail$imat
infomat <- apply(X = infomat.indiv, MARGIN = c(1,2), FUN = sum)
indiv.phase3 <- row.names(X)
riskmat.phase3 <- riskmat.phase2[indiv.phase3, ]
Y.exp.X.weighted <- sweep(riskmat, 1, exp.X.weighted, "*")
Y.exp.X.weighted.casestimes <- sweep(riskmat.phase3, 1, exp.X.weighted, "*")
S0t <- t(riskmat) %*% (exp.X.weighted)
S1t <- t(riskmat) %*% (sweep(X, 1, exp.X.weighted, "*"))
X.expect <- S1t / matrix(S0t, nrow = number.times, ncol = p, byrow = FALSE)
S0t.casestimes <- t(riskmat.phase3) %*% (exp.X.weighted)
S1t.casestimes <- t(riskmat.phase3) %*% (sweep(X, 1, exp.X.weighted, "*"))
score <- array(NA, dim = c(n.phase3, p, number.times))
for(i in 1:n.phase3){
score[i,,] <- - sweep(t(X.expect), 1, X[i,], "-")
}
for(t in 1:number.times) {
dMt <- dNt.weighted[, t] - riskmat[, t] * exp.X.weighted *
(colSums(dNt.weighted) / S0t)[t]
score[,, t] <- score[,, t] * matrix(dMt, nrow = n.phase3, ncol = p,
byrow = FALSE)
}
score.beta <- apply(X = score, MARGIN = c(1,2), FUN = sum)
if (is.null(Tau1)) {
Tau1 <- floor(min(mod.detail$time))
}
if (is.null(Tau2)) {
Tau2 <- floor(max(mod.detail$time))
}
Tau1Tau2.times <- which((Tau1 < eventtimes.phase2) &
(eventtimes.phase2 <= Tau2))
Tau1Times <- which((eventtimes.phase2 <= Tau1))
Tau2Times <- which((eventtimes.phase2 <= Tau2))
lambda0.t.hat <- t(colSums(dNt.phase2) / S0t.casestimes)
Lambda0.Tau1Tau2.hat <- sum(lambda0.t.hat[Tau1Tau2.times])
# ----------------------------------------------------------------------------
# If missingness weights estimated -------------------------------------------
if (estimated.weights == TRUE) {
if (is.null(B.phase2)) {
stop("Strata for the third phase of sampling (missingness) must be provided")
}
# --------------------------------------------------------------------------
# Computation of the influences for gamma ----------------------------------
J3 <- ncol(B.phase2)
B.phase3 <- B.phase2[indiv.phase3, ]
weights.p3.est <- estimation.weights.phase3(B.phase3 = B.phase3,
total.phase2 = colSums(B.phase2),
gamma0 = rep(0, J3), niter.max = 10^3,
epsilon.stop = 10^(-10))$estimated.weights
B.p3weights <- sweep(B.phase3, 1, weights.p3.est, "*")
BB.p3weights <- array(NA, dim = c(J3, J3, n.phase3))
for (i in 1:n.phase3) {
BB.p3weights[,, i] <- tcrossprod(B.phase3[i,], B.p3weights[i,])
}
sum.BB.p3weights <- apply(X = BB.p3weights, MARGIN = c(1,2), FUN = sum)
sum.BB.p3weights.inv <- solve(sum.BB.p3weights)
infl2.gamma <- B.phase2 %*% sum.BB.p3weights.inv
infl3.gamma <- 0 * B.phase2
infl3.gamma[indiv.phase3,] <- - B.p3weights %*% sum.BB.p3weights.inv
infl.gamma <- infl2.gamma + infl3.gamma
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
XB <- array(NA, dim = c(p, J3, n.phase3))
for (i in 1:n.phase3) {
XB[,, i] = tcrossprod(X[i,], B.phase3[i,])
}
drond.G1t.gamma <- array(NA, dim = c(p, J3, number.times))
drond.G0t.gamma <- array(NA, dim = c(J3, number.times))
drondS1.gamma <- array(NA, dim = c(p, J3, number.times))
drond.S0t.gamma <- array(NA, dim = c(J3, number.times))
for (t in 1:number.times) {
drond.G1t.gamma[,, t] <- apply(X = sweep(XB, 3, dNt.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.G0t.gamma[, t] <- colSums(B.phase3 * matrix(dNt.weighted[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
drondS1.gamma[,, t] <- apply(X = sweep(XB, 3,
Y.exp.X.weighted[, t], "*"),
MARGIN = c(1,2), FUN = sum)
drond.S0t.gamma[, t] <- colSums(B.phase3 * matrix(Y.exp.X.weighted[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
}
drond.Ut.gamma <- array(NA, dim = c(p, J3, number.times))
for (t in 1:number.times) {
drond.Ut.gamma[,, t] <- drond.G1t.gamma[,, t] -
tcrossprod(X.expect[t, ], drond.G0t.gamma[, t]) -
(colSums(dNt.weighted) / S0t)[t] * drondS1.gamma[,, t] +
(colSums(dNt.weighted) / S0t ^ 2)[t] * tcrossprod(S1t[t, ],
drond.S0t.gamma[, t])
}
drond.U.gamma <- apply(X = drond.Ut.gamma, MARGIN = c(1,2), FUN = sum)
infl2.beta <- infl2.gamma %*% t(drond.U.gamma) %*% solve(infomat)
infl3.beta <- infl3.gamma %*% t(drond.U.gamma) %*% solve(infomat)
infl3.beta[indiv.phase3, ] <- infl3.beta[indiv.phase3,] + score.beta %*%
solve(infomat)
infl.beta <- infl2.beta + infl3.beta
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
drondS1.gamma.casestimes <- array(NA, dim = c(p, J3, number.times.phase2))
drond.S0t.gamma.casestimes <- array(NA, dim = c(J3, number.times.phase2))
for (t in 1:number.times.phase2) {
drondS1.gamma.casestimes[,, t] <- apply(X = sweep(XB, 3,
Y.exp.X.weighted.casestimes[, t],
"*"),
MARGIN = c(1,2), FUN = sum)
drond.S0t.gamma.casestimes[, t] <- colSums(B.phase3 * matrix(Y.exp.X.weighted.casestimes[, t],
nrow = n.phase3,
ncol = J3,
byrow = FALSE))
}
infl2.lambda0.t <- (dNt.phase2 - (infl2.beta %*% t(S1t.casestimes) +
infl2.gamma %*% drond.S0t.gamma.casestimes) *
matrix(lambda0.t.hat, nrow = n.phase2,
ncol = number.times.phase2, byrow = TRUE)) /
matrix(S0t.casestimes, nrow = n.phase2,
ncol = number.times.phase2, byrow = TRUE)
infl3.lambda0.t <- 0 * infl2.lambda0.t
infl3.lambda0.t[indiv.phase3,] <- - (Y.exp.X.weighted.casestimes +
infl3.beta[indiv.phase3,] %*%
t(S1t.casestimes) +
infl3.gamma[indiv.phase3,] %*%
drond.S0t.gamma.casestimes) *
matrix(lambda0.t.hat,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE) /
matrix(S0t.casestimes,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE)
infl.lambda0.t <- infl2.lambda0.t + infl3.lambda0.t
infl2.Lambda0.Tau1Tau2 <- rowSums(infl2.lambda0.t[, Tau1Tau2.times])
infl3.Lambda0.Tau1Tau2 <- rowSums(infl3.lambda0.t[, Tau1Tau2.times])
infl.Lambda0.Tau1Tau2 <- infl2.Lambda0.Tau1Tau2 +
infl3.Lambda0.Tau1Tau2
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
infl2.beta = infl2.beta, infl2.lambda0.t = infl2.lambda0.t,
infl2.Lambda0.Tau1Tau2 = as.matrix(infl2.Lambda0.Tau1Tau2),
infl3.beta = infl3.beta, infl3.lambda0.t = infl3.lambda0.t,
infl3.Lambda0.Tau1Tau2 = as.matrix(infl3.Lambda0.Tau1Tau2),
beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat))
} else {
# --------------------------------------------------------------------------
# If missingness weights known ---------------------------------------------
# --------------------------------------------------------------------------
# Computation of the influences for log-relative hazard, beta --------------
infl2.beta <- matrix(0, nrow = nrow(riskmat.phase2), ncol = p)
infl3.beta <- infl2.beta
rownames(infl3.beta) <- rownames(riskmat.phase2)
infl3.beta[indiv.phase3,] <- score.beta %*% solve(infomat)
infl.beta <- infl2.beta + infl3.beta
# --------------------------------------------------------------------------
# Computation of the influences for the baseline hazards at each unique
# event time and for the cumulative baseline hazard in time interval
# [Tau1, Tau2] -------------------------------------------------------------
infl2.lambda0.t <- (dNt.phase2) / matrix(S0t.casestimes,
nrow = nrow(dNt.phase2),
ncol = number.times.phase2,
byrow = TRUE)
infl3.lambda0.t <- 0 * infl2.lambda0.t
infl3.lambda0.t[indiv.phase3,] <- - (Y.exp.X.weighted.casestimes +
infl3.beta[indiv.phase3,] %*%
t(S1t.casestimes)) *
matrix(lambda0.t.hat,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE) /
matrix(S0t.casestimes,
nrow = n.phase3,
ncol = number.times.phase2,
byrow = TRUE)
infl.lambda0.t <- infl2.lambda0.t + infl3.lambda0.t
infl2.Lambda0.Tau1Tau2 <- rowSums(infl2.lambda0.t[, Tau1Tau2.times])
infl3.Lambda0.Tau1Tau2 <- rowSums(infl3.lambda0.t[, Tau1Tau2.times])
infl.Lambda0.Tau1Tau2 <- infl2.Lambda0.Tau1Tau2 + infl3.Lambda0.Tau1Tau2
return(list(infl.beta = infl.beta, infl.lambda0.t = infl.lambda0.t,
infl.Lambda0.Tau1Tau2 = as.matrix(infl.Lambda0.Tau1Tau2),
infl2.beta = infl2.beta, infl2.lambda0.t = infl2.lambda0.t,
infl2.Lambda0.Tau1Tau2 = as.matrix(infl2.Lambda0.Tau1Tau2),
infl3.beta = infl3.beta, infl3.lambda0.t = infl3.lambda0.t,
infl3.Lambda0.Tau1Tau2 = as.matrix(infl3.Lambda0.Tau1Tau2),
beta.hat = beta.hat, lambda0.t.hat = lambda0.t.hat,
Lambda0.Tau1Tau2.hat = Lambda0.Tau1Tau2.hat))
}
}
## -----------------------------------------------------------------------------
## Function: influences.PR.missingdata()
## -----------------------------------------------------------------------------
## Description: This function estimates the influences of the pure risk in the
## time interval [Tau1, Tau2] and for covariate profile x, from
## that of the log-relative hazard and cumulative baseline hazard
## [Tau1, Tau2], when covariate data is missing for individuals in
## the case cohort. It also returns the pure risk estimate. It
## returns the overall influences, as well as the phase-two and
## phase-three influences
## -----------------------------------------------------------------------------
## Arguments:
##
## beta log-relative hazard
##
## Lambda0.Tau1Tau2 cumulative baseline hazard in the time interval that
## is considered for the pure risk
##
## x vector of length p, specifying the covariate profile
## considered for the pure risk. Default is (0,...,0)
##
## infl2.beta phase-two influences for the log-relative hazard
##
## infl2.Lambda0.Tau1Tau2 phase-two influences for the cumulative baseline
## hazard in the same time interval as that of the pure
## risk
##
## infl3.beta phase-three influences for the log-relative hazard
##
## infl3.Lambda0.Tau1Tau2 phase-three influences for the cumulative baseline
## hazard in the same time interval as that of the pure
## risk
## -----------------------------------------------------------------------------
influences.PR.missingdata <- function (beta, Lambda0.Tau1Tau2, x = NULL,
infl2.beta, infl2.Lambda0.Tau1Tau2,
infl3.beta, infl3.Lambda0.Tau1Tau2) {
p <- length(beta)
# ----------------------------------------------------------------------------
# Quantities needed for the influences ---------------------------------------
if ((length(x) != p) | (is.null(x))) {
x <- rep(0, p)
} # if no covariate profile provided, use 0 as reference level
exp.x <- c(exp(x %*% beta))
Pi.x <- 1 - exp(- exp.x * Lambda0.Tau1Tau2)
infl2.Pi.x <- c((1 - Pi.x) * exp.x * Lambda0.Tau1Tau2) *
infl2.beta %*% x + c((1 - Pi.x) * exp.x) *
infl2.Lambda0.Tau1Tau2
infl3.Pi.x <- c((1 - Pi.x) * exp.x * Lambda0.Tau1Tau2) *
infl3.beta %*% x + c((1 - Pi.x) * exp.x) *
infl3.Lambda0.Tau1Tau2
infl.Pi.x <- infl2.Pi.x + infl3.Pi.x
return(list(infl.Pi.x.Tau1Tau2 = infl.Pi.x, infl2.Pi.x.Tau1Tau2 = infl2.Pi.x,
infl3.Pi.x.Tau1Tau2 = infl3.Pi.x, Pi.x.Tau1Tau2.hat = Pi.x))
}
## -----------------------------------------------------------------------------
## Function: estimation.weights.phase3()
## -----------------------------------------------------------------------------
## Description: This function estimates the weights for the third phase of
## sampling (due to missingness)
## -----------------------------------------------------------------------------
## Arguments:
##
## B.phase3 matrix with as many columns as phase-three strata, with one
## 1 per row, to indicate the phase-three stratum position
##
## total.phase2 un-weighted totals in the stratified case-cohort, using
## all the individuals, even the ones with missing covariate
## data (phase-three data)
##
## gamma0 vector of initial/possible values for gamma, to be used as
## seed in the iterative procedure. Default is (0,...,0)
##
## niter.max maximum number of iterations for the Newton Raphson method.
## Default is 10^4 iterations
##
## epsilon.stop threshold for the difference between the estimated weighted
## total and the total in the whole cohort. If this difference
## is less than the value of epsilon.stop, no more iterations
## will be performed. Default is 10^(-10)
## -----------------------------------------------------------------------------
estimation.weights.phase3 <- function (B.phase3, total.phase2, gamma0 = NULL,
niter.max = NULL, epsilon.stop = NULL) {
if ((is.null(gamma0)) || (length(gamma0) != ncol(B.phase3))) {
gamma0 <- rep(0, ncol(B.phase3))
message("gamma0 has not been filled out. Default is 0. ")
}
if ((is.null(epsilon.stop)) || (epsilon.stop <= 0)) {
epsilon.stop <- 10^(-10)
message("epsilon.stop has not been filled out. Default is 10^(-10). ")
}
if ((is.null(niter.max)) || (niter.max <= 0)) {
niter.max <- 10^4
message("niter.max has not been filled out. Default is 10^4 iterations.")
}
for (niter in 1:niter.max) {
estimated.weights <- exp(B.phase3 %*% gamma0)
B.estweighted <- sweep(B.phase3, 1, estimated.weights, "*")
f <- colSums(B.estweighted) - total.phase2
epsilon <- max(abs(f))
if(epsilon < epsilon.stop){break}
BB.estweighted <- array(NA, dim = c(ncol(B.phase3), ncol(B.phase3),
nrow(B.phase3)))
for (i in 1:nrow(B.phase3)){
BB.estweighted[,, i] <- tcrossprod(B.phase3[i,], B.estweighted[i,])
}
drond.f.gamma0 <- apply(X = BB.estweighted, MARGIN = c(1,2), FUN = sum)
gamma0 <- gamma0 - c(f %*% solve(drond.f.gamma0))
}
estimated.weights <- as.numeric(exp(B.phase3 %*% gamma0))
B.estweighted <- sweep(B.phase3, 1, estimated.weights, "*")
return(list(gamma.hat = gamma0, estimated.weights = estimated.weights,
estimated.total = colSums(B.estweighted)))
}
## -----------------------------------------------------------------------------
## Function: scenarios()
## -----------------------------------------------------------------------------
## Description: This function creates a dataframe with the different scenarios
## to be investigated over simulations
## -----------------------------------------------------------------------------
## Arguments:
##
## n vector with the cohort sizes
##
## prob.y vector with the outcome probabilities (at the end of
## study duration)
##
## noncases.per.case vector with the desired numbers of non-cases to be
## sampled for each case
##
## part vector with the number of parts to break down the
## replications. By default there is no breaking down
## -----------------------------------------------------------------------------
scenarios <- function (n = c(5000, 10000), prob.y = c(0.02, 0.05, 0.1),
noncases.per.case = c(2, 4), part = NULL) {
parameters <- NULL
# ----------------------------------------------------------------------------
# Different combinations of parameters ---------------------------------------
if(is.null(part)){
for (l in 1:length(prob.y)) {
for (m in 1:length(n)) {
for (i in 1:length(noncases.per.case)) {
parameters <- rbind(parameters, c(prob.y[l], n[m],
noncases.per.case[i]))
}
}
}
colnames(parameters) = c("prob.y", "n", "noncases.per.case")
} else {
for (l in 1:length(prob.y)) {
for (m in 1:length(n)) {
for (i in 1:length(noncases.per.case)) {
for (j in 1:part) {
parameters <- rbind(parameters, c(prob.y[l], n[m],
noncases.per.case[i], j))
}
}
}
}
colnames(parameters) = c("prob.y", "n", "noncases.per.case", "part")
}
return(parameters)
}
## -----------------------------------------------------------------------------
## Function: robustvariance()
## -----------------------------------------------------------------------------
## Description: This function returns the robust variance estimate, i.e.,
## computes the sum of the squared influence functions, for
## parameters such as log-relative hazard, cumulative baseline
## hazard or covariate specific pure-risk. The overall influences
## should be provided. The function works with design weights or
## calibrated weights, or with missing covariates in phase two data
## (i.e. with influences obtained from the influences.missingdata R
## function)
## -----------------------------------------------------------------------------
## Argument:
##
## infl overall influences on parameters such as log-relative hazard,
## cumulative baseline hazard or covariate specific pure-risk.
## -----------------------------------------------------------------------------
robustvariance <- function (infl) {
return(robust.var = diag(crossprod(infl)))
}
## -----------------------------------------------------------------------------
## Function: variance()
## -----------------------------------------------------------------------------
## Description: This function returns the variance estimate that us influence
## -based and follows the complete variance decomposition, for
## parameters such as log-relative hazard, cumulative baseline
## hazard or covariate specific pure-risk. The function works with
## design weights or calibrated weights
## -----------------------------------------------------------------------------
## Arguments:
##
## n number of individuals in the whole cohort
##
## casecohort data frame with with status (case status) and weights
## (design or calibrated, if they are not provided in the
## argument below), for each individual in the case cohort
## data. If stratified = TRUE, it should also contain W
## (strata), strata.m (number of sampled individuals in the
## stratum) and strata.n (stratum size in the cohort). If
## stratified = FALSE, it should also contain m (number of
## sampled individuals) and n (cohort size).
##
## weights vector with design or calibrated weights for each individual
## in the case cohort data
##
## infl overall influences on a parameter such as log-relative
## hazard, cumulative baseline hazard or covariate specific
## pure-risk
##
## calibrated are calibrated weights used for the estimation of the
## parameter? If calibrated = TRUE, the arguments below need to
## be provided. Default is FALSE
##
## infl2 phase-two influences. Should be provided if calibrated
## weights are used
##
## cohort if stratified = TRUE, data frame with status (case status)
## and subcohort (subcohort sampling indicators) for each
## individual in the stratified case cohort data. If stratified
## = FALSE, data frame with status (case status) and
## unstrat.subcohort (subcohort unstratified sampling
## indicators). Should be provided if calibrated weights are
## used
##
## stratified was the sampling of the case-cohort stratified on W? Default
## is FALSE
##
## variance.phase2 should the phase-two variance component also be returned?
## Default is FALSE
## -----------------------------------------------------------------------------
variance <- function(n, casecohort, weights = NULL, infl, calibrated = NULL,
infl2 = NULL, cohort = NULL, stratified = NULL,
variance.phase2 = NULL) {
if (is.null(calibrated)) {
calibrated <- FALSE
}
if (is.null(stratified)) {
stratified <- FALSE
}
if (is.null(variance.phase2)) {
variance.phase2 <- FALSE
}
if (!is.null(weights)){
casecohort$weights = weights
} else {
if (is.null(casecohort$weights)) {
stop("the individuals weights must be provided")
}
}
prod.covar.weight <- product.covar.weight(casecohort, stratified)
if(calibrated == FALSE) {
superpop.var <- n / (n - 1) * crossprod((infl / sqrt(casecohort$weights)))
phase2.var <- with(casecohort, t(infl[status == 0,]) %*%
prod.covar.weight %*% infl[status == 0,])
var <- diag(superpop.var + phase2.var)
} else {
if (is.null(infl2) | is.null(cohort)) {
stop("infl2 and cohort must be provided if calibrated weights have been used")
} else {
omega <- rep(1, n)
names(omega) <- rownames(cohort)
omega[rownames(casecohort)] <- casecohort$weights
superpop.var <- n / (n - 1) * colSums((infl - infl2) ^ 2 + 2 *
(infl - infl2) * infl2 +
(infl2 / sqrt(omega)) ^ 2)
if (stratified == TRUE) {
phase2.var <- with(cohort,
t(infl2[(status == 0) & (subcohort == TRUE),]) %*%
prod.covar.weight %*%
infl2[(status == 0) & (subcohort == TRUE),])
} else {
phase2.var <- with(cohort,
t(infl2[(status == 0) &
(unstrat.subcohort == TRUE),]) %*%
prod.covar.weight %*%
infl2[(status == 0) &
(unstrat.subcohort == TRUE),])
}
var <- superpop.var + diag(phase2.var)
}
}
if (variance.phase2 == FALSE) {
return(variance = var)
} else {
return(list(variance = var, variance.phase2 = diag(phase2.var)))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.