Nothing
#****h* /00main
# NAME
# blupsurv --- package root
# FUNCTION
# The blupsurv package contains
# tools for fitting proportional hazards models to clustered
# recurrent events data. Nested frailties are modeled by their
# best linear unbiased predictors under an auxiliary Poisson model.
# Univariate and bivariate recurrent events processes are permitted.
#
# The most important function is bivrec.agdata, which does most of the
# model fitting. After initializing, fitting consists of iteratively
# calling fupdatefrailties4 to update frailty estimates,
# updatedisppears to update dispersion parameter estimtes,
# and updateregprof to update regression parameter estimates. The
# function fmkstderr computes standard errors at the end.
#
# Modules bivmethods and unimethods contain the R S3 methods for fitting
# bivariate and univariate models respectively.
#
# The call graph below is rather small and unhelpful. See a larger but
# equally unhelpful pdf version here:
# href:callgraph.pdf
#
# |exec dot -Tpdf -o callgraph.pdf ../../man/calls2.dot
# |dotfile ../../man/calls2.dot
# USAGE
# For usage instructions, see the R package documentation
# AUTHOR
# Emmanuel Sharef
#******
#****h* /bivrecFit
# NAME
# bivrec -- Fitter for bivariate recurrent events
# FUNCTION
# The bivrec module contains the workhorse functions to fit bivariate
# event process models. Since univariate models are just a special
# case, they can be fit by these functions as well.
#
# The main fitting routine is
# bivrec.agdata
# and an overview of the algorithm implementation is given in the
# documentation for that function.
#******
#****h* bivrecFit/utility
# NAME
# Utility functions
# FUNCTION
# Functions used to convert matrices and data formats
#******
#****h* bivrecFit/initialization
# NAME
# Initialization functions
# FUNCTION
# Functions used to initialize the algorithm
#******
#****h* bivrecFit/estimation
# NAME
# Estimation functions
# FUNCTION
# Functions used directly as part of the model-fitting procedure
#******
#****h* bivrecFit/fortranwrappers
# NAME
# Wrapper functions
# FUNCTION
# Functions that act purely as FORTRAN wrappers
#******
#****h* bivrecFit/ZZdebug
# NAME
# Debugging functions
# FUNCTION
# Functions used only for debugging
#******
#****f* utility/makeUijframe
# NAME
# makeUijFrame --- Make Uijmat and Vijmat matrices
# FUNCTION
# Convert vectors of frailties into matrices
# in order to make them addressable as U[i, j]
# INPUTS
# m number of clusters
# Ji cluster sizes
# Uij vector of length m * Ji containing frailties for event 1
# Vij vector of length m * Ji containing frailties for event 2
# OUTPUTS
# Uijmat matrix of dimension m x max(Ji) containing entries of Uij
# Vijmat matrix of dimension m x max(Ji) containing entries of Vij
# SYNOPSIS
makeUijframe <- function(m, Ji, Uij, Vij)
# SOURCE
#
{
# Allocate matrix storage
Uijmat <- matrix(0, m, max(Ji))
Vijmat <- matrix(0, m, max(Ji))
# Fill matrices row by row
for(i in 1:m){
Uijmat[i, 1:Ji[i]] <- Uij[(c(0, cumsum(Ji))[i] + 1):(c(0, cumsum(Ji))[i + 1])]
Vijmat[i, 1:Ji[i]] <- Vij[(c(0, cumsum(Ji))[i] + 1):(c(0, cumsum(Ji))[i + 1])]
}
return(list(Uijmat = Uijmat, Vijmat = Vijmat))
}
#************ makeUijframe
#****f* initialization/makedatamat
# NAME
# makedatamat --- make data matrix
# FUNCTION
# Convert an Anderson - Gill data frame for a single recurrent event process into
# the data matrix format used in the remainder of the algorithm.
# INPUTS
# agdata a data frame in Anderson-Gill format with columns
# i,j,k,r,start,stop,delta and covariates
# as a matrix of discretization breakpoints, for each stratum
# generated by makeas
# alternating boolean, indicating whether the at-risk function
# alternates between events (episodic process)
# OUTPUTS
# datamat a matrix with columns i,j,k,r,time,delta,smin,smax
# and covariates. The columns are:
# i cluster
# j subject
# k event counter
# r stratum
# time length of time for each interval
# delta event indicator
# smin discretization interval corresponding to
# start time in input
# smax discretization interval corresponding to
# stop time in input
# SYNOPSIS
makedatamat <- function(agdata, as, alternating)
# SOURCE
#
{
# Allocate space. Most data is copied directly from agdata.
datamat <- as.matrix(cbind(agdata[, 1:4], 0, agdata$delta, 1, 0,
agdata[, 8:dim(agdata)[2]]))
colnames(datamat)[5:8] <- c("time", "delta", "smin", "smax")
# Rows in which a new event has occurred
diffevent <- (c(TRUE, diff(agdata$i * 1000 + agdata$j) != 0) |
c(TRUE, agdata$delta[ - length(agdata$delta)] == 1))
badind <- NULL
# Loop to compute discretization intervals
for(ind in 1:dim(datamat)[1])
{
# Reset last event start time
if(diffevent[ind]) {
lasteventtime <- agdata$start[ind]
smax <- 0
}
# Interevent time is given by stop - start from agdata.
if(!alternating){
newtime <- agdata$stop[ind] - lasteventtime
}else{
newtime <- agdata$stop[ind] - agdata$start[ind]
}
r <- datamat[ind, "r"]
datamat[ind, "time"] <- newtime
# Find the discretization intervals that the times fall into
smin <- smax + 1
smax <- sum(as[r, ] < newtime)
# Most of the time, smin <= smax, however, if both start and stop times
# fall into the same interval, this needs to be fixed
if(smin > smax){
smin <- datamat[ind - 1, "smin"]
badind <- c(badind, ind - 1)
timediff0 <- as[r, datamat[ind - 1, "smax"] + 1] -
as[r, datamat[ind - 1, "smin"]]
timediff1 <- as[r, smax + 1] - as[r, smax]
datamat[ind, 9:dim(datamat)[2]] <- (timediff0 *
datamat[ind - 1, 9:dim(datamat)[2]] + timediff1 *
datamat[ind, 9:dim(datamat)[2]]) / (timediff0 + timediff1)
}
datamat[ind, "smin"] <- smin
datamat[ind, "smax"] <- smax
}
# Remove bad entries
if(!is.null(badind)) datamat <- datamat[ - badind, ]
return(datamat)
}
#************ makedatamat
#****f* ZZdebug/makealphars2
# NAME
# makealphars2 --- make baseline hazard
# FUNCTION
# Compute the MLEs for the baseline hazard parameters alphars,
# given estimates of regression parameters andf frailties, for
# a single recurrent event process.
#
# This is never called, and is for debugging purposes only. See
# fmkalphars2 and the Fortran implementation.
# SYNOPSIS
makealphars2 <- function(m, Ji, datamat, betahat, as, Uijmat)
# SOURCE
#
{
# Allocate storage space
alphars <- matrix(0, dim(as)[1], dim(as)[2])
# drs will contain the numerator, Srs the denominator
drs <- alphars
Srs <- alphars
for(ind in 1:dim(datamat)[1]){
i <- datamat[ind, "i"]
j <- datamat[ind, "j"]
k <- datamat[ind, "k"]
smin <- datamat[ind, "smin"]
smax <- datamat[ind, "smax"]
r <- datamat[ind, "r"]
time <- datamat[ind, "time"]
Z <- datamat[ind, -(1:8)]
# the numerator is just the sum of the event indicators
drs[r, smax] <- drs[r, smax] + datamat[ind, "delta"]
# Computing the denominator requires looping over all at - risk intervals
for(s in smin:smax)
Srs[r, s] <- Srs[r, s] + Uijmat[i, j] *
exp(as.matrix(Z)%*%as.matrix(betahat)) * A(time, as, r, s)
}
alphars <- drs / Srs
alphars[is.nan(alphars)] <- 100 # Not needed since bugs fixed!
return(alphars)
}
#************ makealphars2
#****f* estimation/fmakealphars2
# NAME
# fmakealphars2 --- Fortran wrapper for fmkalpha2
# FUNCTION
# Compute the MLEs for the baseline hazard parameters alphars,
# given estimates of regression parameters andf frailties, for
# a single recurrent event process.
#
# This works in the same way as makealphars2
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat
# betahat vector of regression parameter estimates
# as matrix of discretization breakpoints for each stratum
# Uijmat matrix of frailty estimates
# smooth boolean to indicate whether the output should be smoothed
# OUTPUTS
# alphars matrix of baseline hazard parameters of size p x K
# containing the hazard estimate for each stratum and
# discretization interval
# SYNOPSIS
fmakealphars2 <- function(m, Ji, datamat, betahat, as, Uijmat, smooth)
# SOURCE
#
{
# Allocate storage
alphars <- matrix(0, dim(as)[1], dim(as)[2])
# Split the data matrix into different components
covs <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
delta <- datamat[, "delta"]
times <- datamat[, "time"]
# Fortran call
out <- .Fortran("fmkalpha2",
betahat = as.double(betahat),
index = as.integer(index),
delta = as.double(delta),
times = as.double(times),
Z = as.double(covs),
alphars = as.double(alphars),
as = as.double(as),
Uijmat = as.double(Uijmat),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(as)[1]),
ns = as.integer(dim(as)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji))
)
# Extract output
alphars <- matrix(out$alphars, dim(alphars)[1], dim(alphars)[2])
if(smooth){
alphars <- smoothalpha(alphars, as)
}
return(alphars)
}
#************ fmakealphars2
#****f* estimation/smoothalpha
# NAME
# smoothalpha --- smooth hazard estimates
# FUNCTION
# Applies a spline smoother to the baseline hazard estimates. This was sometimes
# found to improve stability.
# INPUTS
# alphars matrix of baseline hazard parameters
# as matrix of discretization breakpoints for each stratum
# OUTPUTS
# alphars matrix of baseline hazard parameters
# SYNOPSIS
smoothalpha <- function(alphars, as)
# SOURCE
#
{
for(r in 1:dim(as)[1]){
idx <- which(alphars[r, ] != 100)
idx <- c(idx, idx[length(idx)] + 1)
xs <- (as[r, idx[ - 1]] + as[r, idx[ - length(idx)]]) / 2
if(length(xs) > 50) nk <- 50 else nk <- NULL
# Apply a spline smoother to the hazards
out <- smooth.spline(xs, alphars[r, idx[ - length(idx)]],
w = diff(as[r, idx]), nknots = nk)
alphars[r, idx] <- c(out$y, 100)
}
return(alphars)
}
#************ smoothalpha
#****f* initialization/makeas
# NAME
# makeas --- construct discretization
# FUNCTION
# Split the range of event times into intervals,
# during which the baseline hazard is assumed constant.
# INPUTS
# agdata a data frame in Anderson-Gill format
# K vector the number of breakpoints for each stratum
# alternating boolean indicating episodic data
# OUTPUTS
# as matrix of size max(r) x max(K)+1 containing
# discretization breakpoints for each stratum
# SYNOPSIS
makeas <- function(agdata, K, alternating = FALSE)
# SOURCE
#
{
# get number of strata
rmax <- max(agdata$r)
# extract the portion of agdata containing events
agdatatemp <- agdata[agdata$delta == 1 |
c(diff(agdata$i * 1000 + agdata$j) != 0, TRUE), ]
if(!alternating) {
agdatatemp$start[ - 1] <- agdatatemp$stop[ - length(agdatatemp$stop)]
agdatatemp$start[c(1, diff(agdatatemp$i * 1000 + agdatatemp$j)) != 0] <- 0
}
# Initialize matrices.
as <- matrix(0, rmax, max(K) + 1)
for(r in 1:rmax){
# Extract the set of event times and death times for each stratum
eventtimes <- c(0, sort(unique((agdatatemp$stop -
agdatatemp$start)[agdatatemp$r == r & agdatatemp$delta == 1])))
maxtime <- max((agdatatemp$stop - agdatatemp$start)[agdatatemp$r == r])+.001
# Compute the set of breakpoints as quantiles, and fill in the rest
# of the matrix with the maximum value.
as[r, 1:(K[r] + 1)] <- quantile(c(eventtimes, maxtime),
seq(from = 0, to = 1, length = K[r] + 1))
if(K[r] < max(K)) as[r, (K[r] + 2):max(K + 1)] <- maxtime
}
return(as)
}
#************ makeas
#****f* utility/A
# NAME
# A --- time in an interval
# FUNCTION
# Computes the amount of time in interval (a[r,s-1], a[r,s]) prior to time t
# INPUTS
# t time
# as matrix of breakpoints
# r stratum
# s interval
# SYNOPSIS
A <- function(t, as, r, s)
# SOURCE
#
{
s <- s + 1
if(s == 0){ return(0)}
if(t < as[r, s - 1]){ return(0)}
if(t >= as[r, s]){
return(as[r, s] - as[r, s - 1])
}
return(t - as[r, s - 1])
}
#************ A
#****f* ZZdebug/fupdatefrailties3
# NAME
# fupdatefrailties3 --- update frailty estimates
# FUNCTION
# Drop-in replacement for fupdatefrailties4, which updates the frailties
# using only Fortran calls to low-level functionality like fsmuij. See
# fupdatefrailties4 for details.
# SYNOPSIS
fupdatefrailties3 <- function(m, Ji, datamat, datamatd, alphars, alpharsd, as, asd, betahat, betadhat, dispparams)
# SOURCE
#
{
# cat("\tUpdating frailty estimates\n")
## Extract dispersion parameter estimates
sigma2hat <- dispparams$sigma2hat
sigma2dhat <- dispparams$sigma2dhat
nu2hat <- dispparams$nu2hat
nu2dhat <- dispparams$nu2dhat
thetahat <- dispparams$thetahat
## Initialize frailty vectors
Uihat <- rep(0, m);Vihat <- rep(0, m);
Uijhat <- rep(0, sum(Ji));Vijhat <- rep(0, sum(Ji));
## Initialize matrices for storage of pi, qi, pij, qij, etc.
jimax <- max(Ji)
pi <- rep(0, m);qi <- rep(0, m);ri <- rep(0, m);si <- rep(0, m)
piprime <- rep(0, m);qiprime <- rep(0, m);
riprime <- rep(0, m);siprime <- rep(0, m);
wi <- rep(0, m)
pij <- matrix(0, m, jimax);pijprime <- matrix(0, m, jimax);
qij <- matrix(0, m, jimax);qijprime <- matrix(0, m, jimax);
rij <- matrix(0, m, jimax);rijprime <- matrix(0, m, jimax);
sij <- matrix(0, m, jimax);sijprime <- matrix(0, m, jimax);
wij <- matrix(0, m, jimax);zij <- matrix(0, m, jimax);
Smu <- matrix(0, m, jimax);Sdelta <- matrix(0, m, jimax);
Seta <- matrix(0, m, jimax);SDelta <- matrix(0, m, jimax);
covs <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
delta <- datamat[, "delta"]
Sm <- try(.Fortran("fsmuij",
betahat = as.double(betahat),
index = as.integer(index),
delta = as.double(delta),
Z = as.double(covs),
alphars = as.double(alphars),
as = as.double(as),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(alphars)[1]),
ns = as.integer(dim(alphars)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
Smu = as.double(Smu),
Sdelta = as.double(Sdelta)))
Smu <- matrix(Sm$Smu, m, jimax)
Sdelta <- matrix(Sm$Sdelta, m, jimax)
covs <- matrix(datamatd[, -(1:8)], dim(datamatd)[1], dim(datamatd)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamatd[, c("i", "j", "k", "r", "smin", "smax")]
Delta <- datamatd[, "delta"]
Se <- try(.Fortran("fsmuij",
betahat = as.double(betadhat),
index = as.integer(index),
delta = as.double(Delta),
Z = as.double(covs),
alphars = as.double(alpharsd),
as = as.double(asd),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(alpharsd)[1]),
ns = as.integer(dim(alpharsd)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
Smu = as.double(Seta),
Sdelta = as.double(SDelta)))
Seta <- matrix(Se$Smu, m, jimax)
SDelta <- matrix(Se$Sdelta, m, jimax)
for(i in 1:m){
for(j in 1:Ji[i]){
Smuij <- Smu[i, j]
Setaij <- Seta[i, j]
Sdeltaij <- Sdelta[i, j]
SDeltaij <- SDelta[i, j]
wij[i, j] <- nu2hat - thetahat^2 * Setaij / (1 + nu2dhat * Setaij)
zij[i, j] <- nu2dhat - thetahat^2 * Smuij / (1 + nu2hat * Smuij)
pij[i, j] <- Smuij / (1 + wij[i, j] * Smuij)
qij[i, j] <- -thetahat * Smuij * Setaij / ((1 + nu2hat * Smuij) *
(1 + nu2dhat * Setaij) - thetahat^2 * Smuij * Setaij)
rij[i, j] <- qij[i, j]
sij[i, j] <- Setaij / (1 + zij[i, j] * Setaij)
pijprime[i, j] <- Sdeltaij / (1 + wij[i, j] * Smuij)
qijprime[i, j] <- -thetahat * Smuij * SDeltaij /
((1 + nu2hat * Smuij) * (1 + nu2dhat * Setaij) -
thetahat^2 * Smuij * Setaij)
rijprime[i, j] <- -thetahat * Sdeltaij * Setaij /
((1 + nu2hat * Smuij) * (1 + nu2dhat * Setaij) -
thetahat^2 * Smuij * Setaij)
sijprime[i, j] <- SDeltaij / (1 + zij[i, j] * Setaij)
}
## Compute pi, qi, etc, as well as wi.
piprime[i] <- sum(pijprime[i, ])
qiprime[i] <- sum(qijprime[i, ])
riprime[i] <- sum(rijprime[i, ])
siprime[i] <- sum(sijprime[i, ])
pi[i] <- sum(pij[i, ])
qi[i] <- sum(qij[i, ])
ri[i] <- sum(rij[i, ])
si[i] <- sum(sij[i, ])
wi[i] <- 1 / ((1 + sigma2hat * pi[i]) *
(1 + sigma2dhat * si[i]) - sigma2hat * sigma2dhat * ri[i]^2)
## Compute cluster frailty estimates
Uihat[i] <- 1 + sigma2hat * wi[i] * ((1 + sigma2dhat * si[i]) *
(piprime[i] - pi[i] + qiprime[i] - qi[i]) - sigma2dhat * ri[i] *
(riprime[i] - ri[i] + siprime[i] - si[i]))
Vihat[i] <- 1 + sigma2dhat * wi[i] * ((1 + sigma2hat * pi[i]) *
(riprime[i] - ri[i] + siprime[i] - si[i]) - sigma2hat * ri[i] *
(piprime[i] - pi[i] + qiprime[i] - qi[i]))
## Compute individual frailty estimates
Jicum <- c(0, cumsum(Ji))
for(j in 1:jimax){
Uijhat[Jicum[i] + j] <- Uihat[i] - (nu2hat * pij[i, j] + thetahat *
rij[i, j]) * Uihat[i] - (nu2hat * qij[i, j] + thetahat * sij[i, j]) *
Vihat[i] + nu2hat * (pijprime[i, j] + qijprime[i, j]) + thetahat *
(rijprime[i, j] + sijprime[i, j])
Vijhat[Jicum[i] + j] <- Vihat[i] - (thetahat * qij[i, j] + nu2dhat *
sij[i, j]) * Vihat[i] - (thetahat * pij[i, j] + nu2dhat * rij[i, j]) *
Uihat[i] + thetahat * (pijprime[i, j] + qijprime[i, j]) + nu2dhat *
(rijprime[i, j] + sijprime[i, j])
}
}
Uijhat[Uijhat<.01] <- 0.01; Vijhat[Vijhat < 0.01] <- 0.01
Uijframe <- makeUijframe(m, Ji, Uijhat, Vijhat)
cat("mean(Uijhat): ", mean(Uijhat), " mean(Vijhat): ", mean(Vijhat), "\n")
if(mean(Uijhat)<.5 | mean(Vijhat)<.5) browser()
return(list(Uihat = Uihat,
Vihat = Vihat,
Uijhat = Uijhat,
Vijhat = Vijhat,
pqrs = list(pij = pij, qij = qij, rij = rij, sij = sij,
pijprime = pijprime, qijprime = qijprime,
rijprime = rijprime, sijprime = sijprime,
pi = pi, qi = qi, ri = ri, si = si,
piprime = piprime, qiprime = qiprime,
riprime = riprime, siprime = siprime,
wi = wi, zij = zij, wij = wij),
Uijframe = Uijframe))
}
#************ fupdatefrailties3
#****f* estimation/fupdatefrailties4
# NAME
# fupdatefrailties4 --- update frailty estimates
# FUNCTION
# Compute estimates of the cluster- and subject-level frailties. This function
# acts as a wrapper for the FORTRAN routine fmkfrail. See the fmkfrail
# documentation for details, or see fupdatefrailties3 for an R implementation.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat for event 1
# datamatd data matrix generated by makedatamat for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# betahat regression coefficient estimates for event 1
# betadhat regression coefficient estimates for event 2
# dispparams list of dispersion parameter estimates, see updatedisppears
# OUTPUTS
# Uihat vector of cluster-level frailties for event 1
# Vihat vector of cluster-level frailties for event 2
# Uijhat vector of subject-level frailties for event 1
# Vijhat vector of subject-level frailties for event 2
# pqrs list of intermediate values which are useful for computing
# dispersion parameter estimates
# Uijframe list containing matrix versions of Uijhat and Vijhat
# SYNOPSIS
fupdatefrailties4 <- function(m, Ji, datamat, datamatd, alphars, alpharsd,
as, asd, betahat, betadhat, dispparams)
# SOURCE
#
{
# Extract dispersion parameters
sigma2hat <- dispparams$sigma2hat
sigma2dhat <- dispparams$sigma2dhat
nu2hat <- dispparams$nu2hat
nu2dhat <- dispparams$nu2dhat
thetahat <- dispparams$thetahat
## Initialize frailty vectors
Uihat <- rep(0, m);Vihat <- rep(0, m);
Uijhat <- rep(0, sum(Ji));Vijhat <- rep(0, sum(Ji));
# Initialize matrices for storage of pi, qi, pij, qij, etc.
jimax <- max(Ji)
pi <- rep(0, m);qi <- rep(0, m);ri <- rep(0, m);si <- rep(0, m)
piprime <- rep(0, m);qiprime <- rep(0, m);
riprime <- rep(0, m);siprime <- rep(0, m);
wi <- rep(0, m)
pij <- matrix(0, m, jimax);pijprime <- matrix(0, m, jimax);
qij <- matrix(0, m, jimax);qijprime <- matrix(0, m, jimax);
rij <- matrix(0, m, jimax);rijprime <- matrix(0, m, jimax);
sij <- matrix(0, m, jimax);sijprime <- matrix(0, m, jimax);
wij <- matrix(0, m, jimax);zij <- matrix(0, m, jimax);
# Prepare data for submission to Fortran function fmkfrail
Z <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
ncovs1 <- dim(Z)[2]
d1 <- dim(Z)[1]
index <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
delta <- datamat[, "delta"]
Zd <- matrix(datamatd[, -(1:8)], dim(datamatd)[1], dim(datamatd)[2] - 8)
ncovs2 <- dim(Zd)[2]
d2 <- dim(Zd)[1]
indexd <- datamatd[, c("i", "j", "k", "r", "smin", "smax")]
deltad <- datamatd[, "delta"]
nr = dim(alphars)[1]
ns = dim(alphars)[2]
nsd = dim(alpharsd)[2]
# Submit to Fortran
out <- try(.Fortran("fmkfrail",
index = as.integer(index), # indices i,j,k,r,smin,smax
indexd = as.integer(indexd),
delta = as.double(delta), # event indicators
deltad = as.double(deltad),
Z = as.double(Z), # covariates
Zd = as.double(Zd),
alphars = as.double(alphars), # hazards
alpharsd = as.double(alpharsd),
as = as.double(as), # breakpoints
asd = as.double(asd),
betahat = as.double(betahat), # regression coefficients
betadhat = as.double(betadhat),
m = as.integer(m), # clusters and cluster sizes
Ji = as.integer(Ji),
jimax = as.integer(max(Ji)),
Jicum = as.integer(c(0, cumsum(Ji))),
jisum = as.integer(sum(Ji)),
d1 = as.integer(d1), # dimensions of covariate vectors
d2 = as.integer(d2),
ncovs1 = as.integer(ncovs1),
ncovs2 = as.integer(ncovs2),
nr = as.integer(nr), # number of strata
ns = as.integer(ns), # number of breakpoints
nsd = as.integer(nsd),
sigma2 = as.double(sigma2hat), # current dispersion parameters
sigma2d = as.double(sigma2dhat),
nu2 = as.double(nu2hat),
nu2d = as.double(nu2dhat),
theta = as.double(thetahat),
Uihat = as.double(Uihat), # emtpy matrices to store frailty estimates
Vihat = as.double(Vihat),
Uijhat = as.double(Uijhat),
Vijhat = as.double(Vijhat),
pi = as.double(pi), # matrices to store intermediate values
qi = as.double(qi),
ri = as.double(ri),
si = as.double(si),
piprime = as.double(piprime),
qiprime = as.double(qiprime),
riprime = as.double(riprime),
siprime = as.double(siprime),
pij = as.double(pij),
qij = as.double(qij),
rij = as.double(rij),
sij = as.double(sij),
pijprime = as.double(pijprime),
qijprime = as.double(qijprime),
rijprime = as.double(rijprime),
sijprime = as.double(sijprime),
wi = as.double(wi),
wij = as.double(wij),
zij = as.double(zij)))
# Too small frailty estimates are not allowed
out$Uijhat[out$Uijhat<.01] <- 0.01; out$Vijhat[out$Vijhat < 0.01] <- 0.01
# Extract output
pij = matrix(out$pij, m, jimax)
qij = matrix(out$qij, m, jimax)
rij = matrix(out$rij, m, jimax)
sij = matrix(out$sij, m, jimax)
pijprime = matrix(out$pijprime, m, jimax)
qijprime = matrix(out$qijprime, m, jimax)
rijprime = matrix(out$rijprime, m, jimax)
sijprime = matrix(out$sijprime, m, jimax)
zij = matrix(out$zij, m, jimax)
wij = matrix(out$wij, m, jimax)
# Format frailtyoutput list
return(list(Uihat = out$Uihat,
Vihat = out$Vihat,
Uijhat = out$Uijhat,
Vijhat = out$Vijhat,
pqrs = list(pij = pij, qij = qij, rij = rij, sij = sij,
pijprime = pijprime, qijprime = qijprime,
rijprime = rijprime, sijprime = sijprime,
pi = out$pi, qi = out$qi, ri = out$ri, si = out$si,
piprime = out$piprime, qiprime = out$qiprime,
riprime = out$riprime, siprime = out$siprime,
wi = out$wi, zij = zij, wij = wij),
Uijframe = makeUijframe(m, Ji, out$Uijhat, out$Vijhat)))
}
#************ fupdatefrailties4
#****f* estimation/updatedisppears
# NAME
# updatedisppears --- Pearson dispersion parameter estimators
# FUNCTION
# Compute dispersion parameter estimates as Pearson-type estimators with a
# bias correction for the BLUP shrinkage effect.
# INPUTS
# m number of clusters
# Ji cluster sizes
# frailtyoutput all output of fupdatefrailties4
# dispparams a list of current estimates of dispersion parameters,
# with components:
# sigma2hat, sigma2dhat, nu2hat, nu2dhat, thetahat
# corrval a ``correction factor'' that can be used to implement
# Ma's degree-of-freedom adjustment
# OUTPUTS
# sigma2hat cluster-level dispersion for process 1
# sigma2dhat cluster-level dispersion for process 2
# nu2hat subject-level dispersion for process 1
# nu2dhat subject-level dispersion for process 2
# thetahat subject-level covariance
# SYNOPSIS
updatedisppears <- function(m, Ji, frailtyoutput, dispparams, corrval)
# SOURCE
#
{
Jicum <- c(0, cumsum(Ji))
jimax <- max(Ji)
## Extract variables from the parameters passed into the function
pij <- frailtyoutput$pqrs$pij; qij <- frailtyoutput$pqrs$qij;
rij <- frailtyoutput$pqrs$rij; sij <- frailtyoutput$pqrs$sij;
pijprime <- frailtyoutput$pqrs$pijprime; qijprime <- frailtyoutput$pqrs$qijprime;
rijprime <- frailtyoutput$pqrs$rijprime; sijprime <- frailtyoutput$pqrs$sijprime;
pi <- frailtyoutput$pqrs$pi; qi <- frailtyoutput$pqrs$qi;
ri <- frailtyoutput$pqrs$ri; si <- frailtyoutput$pqrs$si;
piprime <- frailtyoutput$pqrs$piprime; qiprime <- frailtyoutput$pqrs$qiprime;
riprime <- frailtyoutput$pqrs$riprime; siprime <- frailtyoutput$pqrs$siprime;
wij <- frailtyoutput$pqrs$wij;zij <- frailtyoutput$pqrs$zij;
wi <- frailtyoutput$pqrs$wi
Uihat <- frailtyoutput$Uihat;Vihat <- frailtyoutput$Vihat;
Uijhat <- frailtyoutput$Uijhat;Vijhat <- frailtyoutput$Vijhat;
sigma2hat <- dispparams$sigma2hat; sigma2dhat <- dispparams$sigma2dhat
nu2hat <- dispparams$nu2hat; nu2dhat <- dispparams$nu2dhat
thetahat <- dispparams$thetahat
## Initialize mean squared distance vectors
ci <- rep(0, m);cid <- rep(0, m);
cij <- matrix(0, m, jimax);cijd <- matrix(0, m, jimax);
cijprime <- matrix(0, m, jimax)
kUU <- matrix(0, m, jimax);kUV <- matrix(0, m, jimax);
kVU <- matrix(0, m, jimax);kVV <- matrix(0, m, jimax)
for(i in 1:m){
## Compute mean squared distances of cluster frailty estimators
ci[i] <- sigma2hat * wi[i] * (1 + sigma2dhat * si[i])
cid[i] <- sigma2dhat * wi[i] * (1 + sigma2hat * pi[i])
for(j in 1:Ji[i]){
## Compute useful covariance terms for the bias correction
kUU[i, j] <- sigma2hat * wi[i] * ((1 + sigma2dhat * si[i]) *
(sigma2hat * pi[i] + nu2hat * pij[i, j] + thetahat * qij[i, j]) -
sigma2dhat * qi[i] * (sigma2hat * qi[i] + nu2hat * qij[i, j] +
thetahat * sij[i, j]))
kUV[i, j] <- sigma2hat * wi[i] * ((1 + sigma2dhat * si[i]) *
(thetahat * pij[i, j] + nu2dhat * qij[i, j]) - sigma2dhat *
qi[i] * (thetahat * qij[i, j] + nu2dhat * sij[i, j] - 1))
kVU[i, j] <- sigma2dhat * wi[i] * ((1 + sigma2hat * pi[i]) *
(thetahat * sij[i, j] + nu2hat * qij[i, j]) - sigma2hat *
qi[i] * (thetahat * qij[i, j] + nu2hat * pij[i, j] - 1))
kVV[i, j] <- sigma2dhat * wi[i] * ((1 + sigma2hat * pi[i]) *
(sigma2dhat * si[i] + nu2dhat * sij[i, j] + thetahat * qij[i, j]) -
sigma2hat * qi[i] * (sigma2dhat * qi[i] + nu2dhat * qij[i, j] +
thetahat * pij[i, j]))
## Compute mean squared distances of individual frailty estimators
cij[i, j] <- (nu2hat * pij[i, j] + thetahat * qij[i, j] - 1) *
(kUU[i, j] - sigma2hat - nu2hat) + (nu2hat * qij[i, j] + thetahat *
sij[i, j]) * (kVU[i, j] - thetahat)
cijd[i, j] <- (nu2dhat * sij[i, j] + thetahat * qij[i, j] - 1) *
(kVV[i, j] - sigma2dhat - nu2dhat) + (nu2dhat * qij[i, j] + thetahat *
pij[i, j]) * (kUV[i, j] - thetahat)
cijprime[i, j] <- thetahat - (1 - nu2hat * pij[i, j] - thetahat *
qij[i, j]) * kUV[i, j] + (nu2hat * qij[i, j] + thetahat *
sij[i, j]) * kVV[i, j] - (sigma2dhat + nu2dhat) *
(nu2hat * qij[i, j] + thetahat * sij[i, j]) - thetahat *
(nu2hat * pij[i, j] + thetahat * qij[i, j])
}
}
## Compute estimators of dispersion parameters
sigma2hatnew <- corrval * 1 / m * sum((Uihat - 1)^2 + ci)
sigma2dhatnew <- corrval * 1 / m * sum((Vihat - 1)^2 + cid)
nu2hatnew <- 0;nu2dhatnew <- 0; thetahatnew <- 0
for(i in 1:m){
nu2hattemp <- 0;nu2dhattemp <- 0;thetahattemp <- 0;
for(j in 1:Ji[i]){
nu2hattemp <- nu2hattemp + (Uijhat[Jicum[i] + j] - Uihat[i])^2 +
ci[i] + cij[i, j] - 2 * (sigma2hat - kUU[i, j])
nu2dhattemp <- nu2dhattemp + (Vijhat[Jicum[i] + j] - Vihat[i])^2 +
cid[i] + cijd[i, j] - 2 * (sigma2dhat - kVV[i, j])
thetahattemp <- thetahattemp + (Uijhat[Jicum[i] + j] - Uihat[i]) *
(Vijhat[Jicum[i] + j] - Vihat[i]) + cijprime[i, j] + kUV[i, j] +
kVU[i, j] - sigma2hat * sigma2dhat * wi[i] * qi[i]
}
nu2hatnew <- nu2hatnew + nu2hattemp / Ji[i];
nu2dhatnew <- nu2dhatnew + nu2dhattemp / Ji[i];
thetahatnew <- thetahatnew + thetahattemp / Ji[i];
}
# Ad - hoc correction adjustment
nu2hatnew <- corrval * nu2hatnew / m
nu2dhatnew <- corrval * nu2dhatnew / m
thetahatnew <- corrval * thetahatnew / m
# Format for output
return(list(sigma2hat = sigma2hatnew,
sigma2dhat = sigma2dhatnew,
nu2hat = nu2hatnew,
nu2dhat = nu2dhatnew,
thetahat = thetahatnew))
}
#************ updatedisppears
#****f* estimation/updatedispohls
# NAME
# updatedispohls --- Ohlsson-type dispersion parameter estimators
# FUNCTION
# Compute dispersion parameter estimators using the method proposed by Ohlsson
# et al. See the paper for the reference.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat for event 1
# datamatd data matrix generated by makedatamat for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# betahat regression coefficient estimates for event 1
# betadhat regression coefficient estimates for event 2
# fixzero list of estimators that should be fixed at zero (testing only)
# OUTPUTS
# sigma2hat cluster-level dispersion for process 1
# sigma2dhat cluster-level dispersion for process 2
# nu2hat subject-level dispersion for process 1
# nu2dhat subject-level dispersion for process 2
# thetahat subject-level covariance
# SYNOPSIS
updatedispohls <- function(m, Ji, datamat, datamatd, alphars, alpharsd,
as, asd, betahat, betadhat, fixzero)
# SOURCE
#
{
jimax <- max(Ji)
Smu <- matrix(0, m, jimax);Sdelta <- matrix(0, m, jimax);
Seta <- matrix(0, m, jimax);SDelta <- matrix(0, m, jimax);
# Prepare data for Fortran function fsmuij to compute sum for event 1
covs <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
delta <- datamat[, "delta"]
# The FORTRAN function fsmuij computes a matrix of size m x max(Ji), whose
# (i, j) entry is sum(mu_rijks) over all r,k,s
Sm <- try(.Fortran("fsmuij",
betahat = as.double(betahat),
index = as.integer(index),
delta = as.double(delta),
Z = as.double(covs),
alphars = as.double(alphars),
as = as.double(as),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(alphars)[1]),
ns = as.integer(dim(alphars)[2]),
m = as.integer(m), maxj = as.integer(max(Ji)),
Smu = as.double(Smu),
Sdelta = as.double(Sdelta)))
# extract sums from FORTRAN output
Smu <- matrix(Sm$Smu, m, jimax)
Sdelta <- matrix(Sm$Sdelta, m, jimax)
# Repeat FORTRAN call for event 2
covs <- matrix(datamatd[, -(1:8)], dim(datamatd)[1], dim(datamatd)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamatd[, c("i", "j", "k", "r", "smin", "smax")]
Delta <- datamatd[, "delta"]
# Compute sum of eta_ij analogously
Se <- try(.Fortran("fsmuij",
betahat = as.double(betadhat),
index = as.integer(index),
delta = as.double(Delta),
Z = as.double(covs),
alphars = as.double(alpharsd),
as = as.double(asd),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(alpharsd)[1]),
ns = as.integer(dim(alpharsd)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
Smu = as.double(Seta),
Sdelta = as.double(SDelta)))
Seta <- matrix(Se$Smu, m, jimax)
SDelta <- matrix(Se$Sdelta, m, jimax)
# Compute sum(mu_i*) for both processes
Smui <- apply(Smu, 1, sum)
Setai <- apply(Seta, 1, sum)
# Begin computing the Ohlsson-type estimates
Xtild <- Sdelta / Smu
Ztild <- SDelta / Seta
Xtildi <- apply(Sdelta, 1, sum) / Smui
Ztildi <- apply(SDelta, 1, sum) / Setai
Ztildii <- rep(Ztildi, Ji)
# This is a simplistic correction to attempt to resolve some occasional
# computational problems caused by too large or too small estimated means.
# This line replaces subject estimates which are in the most extreme 5\% by
# the cluster-level estimate for the corresponding cluster.
Ztild[Ztild > quantile(Ztild, .975) | Ztild < quantile(Ztild, .025)] <-
Ztildii[Ztild > quantile(Ztild, .975) | Ztild < quantile(Ztild, .025)]
# Subject-level Ohlsson estimates
nu2hat <- 0;nu2dhat <- 0;thetahat <- 0;thetadenom <- 0;sigma2hat <- 0;sigma2dhat <- 0;
for(i in 1:m){
for(j in 1:Ji[i]){
nu2hat <- nu2hat + Smu[i, j] * (Xtild[i, j] - Xtildi[i])^2
nu2dhat <- nu2dhat + Seta[i, j] * (Ztild[i, j] - Ztildi[i])^2
thetahat <- thetahat + (Xtild[i, j] - Xtildi[i]) *
(Ztild[i, j] - Ztildi[i])
thetadenom <- thetadenom + (1 + 1 / Smui[i] / Setai[i] *
sum(Smu[i, ] * Seta[i, ])) - (Smu[i, j] / Smui[i] +
Seta[i, j] / Setai[i])
}
}
# Complete the estimates of the dispersion parameters.
nu2hat <- (nu2hat - sum(Ji - 1)) /
(sum(Smui) - sum(apply(Smu^2, 1, sum) / Smui))
nu2dhat <- (nu2dhat - sum(Ji - 1)) /
(sum(Setai) - sum(apply(Seta^2, 1, sum) / Setai))
thetahat <- thetahat / thetadenom
# Fix parameters at 0 if desired
if(!is.null(fixzero)){
if("nu2hat"%in%fixzero) nu2hat <- 0
if("nu2dhat"%in%fixzero) nu2dhat <- 0
if("thetahat"%in%fixzero) thetahat <- 0
}
# The Ohlsson - type estimators do not guarantee that the esimated covariance
# matrix is valid. This code truncates the estimate of thetahat if needed
if(nu2hat<.001) nu2hat <- .001
if(nu2dhat<.001) nu2dhat <- .001
badtheta <- try(thetahat / sqrt(nu2hat * nu2dhat) > 1 |
thetahat / sqrt(nu2hat * nu2dhat)< -1)
if(inherits(badtheta, "try - error") | is.na(badtheta)) {browser(); return(0)}
if(badtheta) thetahat <- try(sign(thetahat) * sqrt(nu2hat * nu2dhat))
if(inherits(thetahat, "try - error")) return(0)
#! Estimate the cluster - level dispersion parameters
zij <- Smu / (Smu + 1 / nu2hat)
zi <- apply(zij, 1, sum)
zijd <- Seta / (Seta + 1 / nu2dhat)
zid <- apply(zijd, 1, sum)
Xztildi <- apply(zij * Xtild, 1, sum) / zi
Zztildi <- apply(zijd * Ztild, 1, sum) / zid
Xztild <- sum(zi * Xztildi) / sum(zi)
Zztild <- sum(zid * Zztildi) / sum(zid)
sigma2hat <- (sum(zi * (Xztildi - Xztild)^2) - nu2hat * (m - 1)) /
(sum(zi) - sum(zi^2) / sum(zi))
sigma2dhat <- (sum(zid * (Zztildi - Zztild)^2) - nu2dhat * (m - 1)) /
(sum(zid) - sum(zid^2) / sum(zid))
# Fix parameters at 0 if desired
if(!is.null(fixzero)){
if("sigma2hat"%in%fixzero) sigma2hat <- 0
if("sigma2dhat"%in%fixzero) sigma2dhat <- 0
}
# Format for output
return(list(sigma2hat = sigma2hat,
sigma2dhat = sigma2dhat,
nu2hat = nu2hat,
nu2dhat = nu2dhat,
thetahat = thetahat))
}
#************ updatedispohls
#****f* estimation/updatedispmarg2
# NAME
# updatedispmarg2 --- Marginal estimators for the dispersion parameters
# FUNCTION
# Compute dispersion parameter estimates using a marginal method, based on
# the moments of the event indicators delta under the auxiliary Poisson model.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat for event 1
# datamatd data matrix generated by makedatamat for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# betahat regression coefficient estimates for event 1
# betadhat regression coefficient estimates for event 2
# fixzero list of estimators that should be fixed at zero (testing only)
# univariate boolean indicator whether a univariate model is being fit
# OUTPUTS
# sigma2hat cluster-level dispersion for process 1
# sigma2dhat cluster-level dispersion for process 2
# nu2hat subject-level dispersion for process 1
# nu2dhat subject-level dispersion for process 2
# thetahat subject-level covariance
# SYNOPSIS
updatedispmarg2 <- function(m, Ji, datamat, datamatd, alphars, alpharsd, as, asd,
betahat, betadhat, fixzero, univariate = FALSE)
# SOURCE
#
{
# The function getdispmarg3 makes the FORTRAN calls that do most of the work
# out1 contains only terms involving event 1
# out2 only the event 2, and out3 contains the cross-terms
out1 <- getdispmarg3(m, Ji, datamat, datamat, alphars, alphars,
as, as, betahat, betahat, TRUE)
out2 <- getdispmarg3(m, Ji, datamatd, datamatd, alpharsd, alpharsd,
asd, asd, betadhat, betadhat, TRUE)
out3 <- getdispmarg3(m, Ji, datamat, datamatd, alphars, alpharsd,
as, asd, betahat, betadhat, FALSE)
# Extract the estimated dispersion parameters, and truncate from below
sigma2hat <- max(out1$sig2, 1e-4)
sigma2dhat <- max(out2$sig2, 1e-4)
# Fix parameters at 0 if desired
if(!is.null(fixzero)){
if("sigma2hat"%in%fixzero) sigma2hat <- 0
if("sigma2dhat"%in%fixzero) sigma2dhat <- 0
}
nu2hat <- max(out1$sig2nu2 - sigma2hat, 1e-4)
nu2dhat <- max(out2$sig2nu2 - sigma2dhat, 1e-4)
thetahat <- out3$sig2nu2
# Fix parameters at 0 if desired
if(!is.null(fixzero)){
if("nu2hat"%in%fixzero) nu2hat <- 0
if("nu2dhat"%in%fixzero) nu2dhat <- 0
if("thetahat"%in%fixzero) thetahat <- 0
}
# The marginal estimators do not guarantee that the esimated covariance matrix
# is valid. This code truncates the estimate of thetahat if needed
if(!univariate)
badtheta <- try(thetahat / sqrt(nu2hat * nu2dhat) > 1 | thetahat /
sqrt(nu2hat * nu2dhat)< -1)
else badtheta <- FALSE
if(inherits(badtheta, "try - error") | is.na(badtheta)) {browser(); return(0)}
if(badtheta) thetahat <- try(sign(thetahat) * sqrt(nu2hat * nu2dhat))
if(inherits(thetahat, "try - error")) return(0)
# Prepare for output
return(list(sigma2hat = sigma2hat,
sigma2dhat = sigma2dhat,
nu2hat = nu2hat,
nu2dhat = nu2dhat,
thetahat = thetahat))
}
#************ updatedispmarg2
#****f* estimation/getdispmarg3
# NAME
# getdispmarg3 --- workhorse function for computing marginal estimators
# FUNCTION
# This function is used by updatedispmarg to compute the sums of cross
# products of event indicators and means. Inputs are data for any two processes,
# which may be event processes 1 and 2, or two copies of event 1, or two copies
# of event 2. The sample variances and covariances of the two processes are
# computed under the auxiliary Poisson model.
# INPUTS
# datamat1 data matrix generated by makedatamat for process 1
# datamat2 data matrix generated by makedatamat for process 2
# alphars1 matrix of baseline hazard parameters for process 1
# alphars2 matrix of baseline hazard parameters for process 2
# as1 matrix of discretization breakpoints for process 1
# as2 matrix of discretization breakpoints for process 2
# betahat1 regression coefficient estimates for process 1
# betadha2 regression coefficient estimates for process 2
# same boolean indicator of whether process 1 and 2 are the same
# OUTPUTS
# sig2nu2 marginal estimate of sigma^2 + nu^2 (or theta, if the processes
# are different)
# sig2 marginal estimate of sigma^2
# SYNOPSIS
getdispmarg3 <- function(m, Ji, datamat1, datamat2, alphars1, alphars2,
as1, as2, betahat1, betahat2, same)
# SOURCE
#
{
#Initialze vectors and matrices
jimax <- max(Ji)
Smu1 <- matrix(0, m, jimax);Sdelta1 <- matrix(0, m, jimax);
Smu2 <- matrix(0, m, jimax);Sdelta2 <- matrix(0, m, jimax);
# Prepare for FORTRAN
covs1 <- matrix(datamat1[, -(1:8)], dim(datamat1)[1], dim(datamat1)[2] - 8)
ncovs1 <- dim(covs1)[2]
d1 <- dim(covs1)[1]
index1 <- datamat1[, c("i", "j", "k", "r", "smin", "smax")]
delta1 <- datamat1[, "delta"]
# First, compute the sums for the first data set.
Sm1 <- try(.Fortran("fsmuij",
betahat = as.double(betahat1),
index = as.integer(index1),
delta = as.double(delta1),
Z = as.double(covs1),
alphars = as.double(alphars1),
as = as.double(as1),
d = as.integer(d1),
ncovs = as.integer(ncovs1),
nr = as.integer(dim(alphars1)[1]),
ns = as.integer(dim(alphars1)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
Smu = as.double(Smu1),
Sdelta = as.double(Sdelta1)))
Smu1 <- matrix(Sm1$Smu, m, jimax)
Sdelta1 <- matrix(Sm1$Sdelta, m, jimax)
# Compute sums for the second data set
covs2 <- matrix(datamat2[, -(1:8)], dim(datamat2)[1], dim(datamat2)[2] - 8)
ncovs2 <- dim(covs2)[2]
d2 <- dim(covs2)[1]
index2 <- datamat2[, c("i", "j", "k", "r", "smin", "smax")]
delta2 <- datamat2[, "delta"]
Sm2 <- try(.Fortran("fsmuij",
betahat = as.double(betahat2),
index = as.integer(index2),
delta = as.double(delta2),
Z = as.double(covs2),
alphars = as.double(alphars2),
as = as.double(as2),
d = as.integer(d2),
ncovs = as.integer(ncovs2),
nr = as.integer(dim(alphars2)[1]),
ns = as.integer(dim(alphars2)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
Smu = as.double(Smu2),
Sdelta = as.double(Sdelta2)))
Smu2 <- matrix(Sm2$Smu, m, jimax)
Sdelta2 <- matrix(Sm2$Sdelta, m, jimax)
# This computation relies on the fact that for vectors x_1, x_2,
# a^Tx_1x_2^Te = (a^Tx_1)(a^Tx_2). Therefore,
# the numerator and denominator of sig2nu2= can be computed as
sig2nu2num <- sum((Sdelta1 - Smu1) * (Sdelta2 - Smu2))
sig2nu2den <- sum(Smu1 * Smu2)
#! If they come from the same data set, we must subtract the diagonal elements
if(same){
Smud2 <- try(.Fortran("fsmud2",
betahat = as.double(betahat1),
index = as.integer(index1),
delta = as.double(delta1),
Z = as.double(covs1),
alphars = as.double(alphars1),
as = as.double(as1),
d = as.integer(d1),
ncovs = as.integer(ncovs1),
nr = as.integer(dim(alphars1)[1]),
ns = as.integer(dim(alphars1)[2]),
Smud = as.double(0),
Smu2 = as.double(0)))
Smud <- Smud2$Smud
Smusq <- Smud2$Smu2
# Correct the estimates by removing the diagonal terms
sig2nu2num <- sig2nu2num - Smud
sig2nu2den <- sig2nu2den - Smusq
}
sig2nu2 <- sig2nu2num / sig2nu2den
# Computing the value of sig2 is analogous.
# First, sum over all the subjects in each cluster, then compute the
# numerator and denominator separately, by adding all the cross terms
# and subtracting those for which b = j.
Sdeltai1 <- apply(Sdelta1, 1, sum)
Sdeltai2 <- apply(Sdelta2, 1, sum)
Smui1 <- apply(Smu1, 1, sum)
Smui2 <- apply(Smu2, 1, sum)
sig2 <- (sum((Sdeltai1 - Smui1) * (Sdeltai2 - Smui2)) - sum((Sdelta1 - Smu1) *
(Sdelta2 - Smu2))) / (sum(Smui1 * Smui2) - sum(Smu1 * Smu2))
# Format for output
return(list(sig2nu2 = sig2nu2, sig2 = sig2))
}
#************ getdispmarg3
#****f* ZZdebug/Smuij
# NAME
# Smuij --- compute expected and observed event sums
# FUNCTION
# Compute the sums of mu_rijks and delta_rijks for every (i,j).
# This is an R implementation
# of the corresponding FORTRAN function used for debugging only.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat
# alphars matrix of baseline hazard parameters
# as matrix of discretization breakpoints
# betahat regression coefficient estimates
# OUTPUTS
# Smu matrix containing sum of mu_rijks for every (i,j)
# Sdelta matrix containing sum of delta_rijks for every (i,j)
# SYNOPSIS
Smuij <- function(m, Ji, datamat, alphars, as, betahat)
# SOURCE
#
{
jimax <- max(Ji)
covs <- as.matrix(datamat[, -c(1:8)], dim(datamat)[1], length(betahat))
Smu <- matrix(0, m, jimax);Sdelta <- matrix(0, m, jimax);
for(ind in 1:dim(datamat)[1])
{
i <- datamat[ind, "i"]
j <- datamat[ind, "j"]
r <- datamat[ind, "r"]
smax <- datamat[ind, "smax"]
smin <- datamat[ind, "smin"]
delta <- datamat[ind, "delta"]
for(s in smin:smax){
mu <- 1 - exp(-exp(as.matrix(betahat)%*%covs[ind, ]) * alphars[r, s] * (as[r, s + 1] - as[r, s]))
Smu[i, j] <- Smu[i, j] + mu
}
Sdelta[i, j] <- Sdelta[i, j] + delta
}
return(list(Smu = Smu, Sdelta = Sdelta))
}
#************ Smuij
#****f* ZZdebug/Smud2
# NAME
# Smud2 --- compute cross and squared event counts
# FUNCTION
# Compute the sums of mu_rijks * delta_rijks and
# mu_rijks^2 for every (i,j).
# This is an R implementation
# of the corresponding FORTRAN function used for debugging only.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat
# alphars matrix of baseline hazard parameters
# as matrix of discretization breakpoints
# betahat regression coefficient estimates
# OUTPUTS
# Smud matrix containing sum of mu_rijks * delta_rijks for every (i,j)
# Smu2 matrix containing sum of mu_rijks^2 for every (i,j)
# SYNOPSIS
Smud2 <- function(m, Ji, datamat, alphars, as, betahat)
# SOURCE
#
{
jimax <- max(Ji)
covs <- as.matrix(datamat[, -c(1:8)], dim(datamat)[1], length(betahat))
Smud <- 0;Smu2 <- 0
for(ind in 1:dim(datamat)[1])
{
i <- datamat[ind, "i"]
j <- datamat[ind, "j"]
r <- datamat[ind, "r"]
smax <- datamat[ind, "smax"]
smin <- datamat[ind, "smin"]
delta <- datamat[ind, "delta"]
for(s in smin:smax){
mu <- 1 - exp(-exp(as.matrix(betahat)%*%covs[ind, ]) *
alphars[r, s] * (as[r, s + 1] - as[r, s]))
if(s == smax) deltat <- delta else deltat <- 0
Smud <- Smud + (mu - deltat)^2
Smu2 <- Smu2 + mu^2
}
}
return(list(Smud = Smud, Smu2 = Smu2))
}
#************ Smud2
#****f* utility/ab2g
# NAME
# ab2g --- regression parameter conversion
# FUNCTION
# Convert alphas and betas into a single vector named gamma
# INPUTS
# alphars matrix of baseline hazard parameters
# betahat regression coefficient estimates
# K number of discretization intervals
# OUTPUTS
# SYNOPSIS
ab2g <- function(alphars, betahat, K)
# SOURCE
#
{
gamma <- NULL
for(r in 1:length(K)) gamma <- c(gamma, alphars[r, 1:K[r]])
gamma <- c(gamma, betahat)
return(gamma)
}
#************ ab2g
#****f* utility/g2ab
# NAME
# g2ab --- regression parameter conversion
# FUNCTION
# Extract alphars and betahat from a single vector named gamma
# INPUTS
# gamma vector containing all hazard and regression parameters
# K number of discretization intervals
# OUTPUTS
# alphars matrix of baseline hazard parameters
# betahat regression coefficient estimates
# SYNOPSIS
g2ab <- function(gamma, K)
# SOURCE
#
{
ind <- 1
alphars <- matrix(0, length(K), max(K) + 1)
for(r in 1:length(K)){
alphars[r, 1:K[r]] <- gamma[ind:(ind + K[r] - 1)]
alphars[r, K[r] + 1] <- 100
ind <- ind + K[r]
}
betahat <- gamma[ind:length(gamma)]
return(list(alphars = alphars, betahat = betahat))
}
#************ g2ab
#****f* ZZdebug/makemrs
# NAME
# makemrs --- terms for profile likelihood
# FUNCTION
# Compute terms needed in the profile likelihood and gradient.
# This is an R implementation of the FORTRAN routine fmkmrs, used for
# debugging only.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat
# as matrix of discretization breakpoints
# betahat regression coefficient estimates
# Uijmat matrix of frailty estimates
# OUTPUTS
# mrs matrix of terms needed by the profile likelihood
# mrsgr array of terms needed by the profile gradient
# SYNOPSIS
makemrs <- function(m, Ji, datamat, as, betahat, Uijmat)
# SOURCE
#
{
# Initialize matrices
mrs <- matrix(0, dim(as)[1], dim(as)[2] - 1)
mrsgr <- array(0, c(dim(as)[1], dim(as)[2] - 1, length(betahat)))
covs <- as.matrix(datamat[, -c(1:8)], dim(datamat)[1], length(betahat))
# Loop through the data matrix
for(ind in 1:dim(datamat)[1])
{
i <- datamat[ind, "i"]
j <- datamat[ind, "j"]
k <- datamat[ind, "k"]
smax <- datamat[ind, "smax"]
smin <- datamat[ind, "smin"]
r <- datamat[ind, "r"]
time <- datamat[ind, "time"]
for(s in smin:smax){
# At each entry in the data matrix, add the term to the
# appropriate component of the output matrices
pred <- Uijmat[i, j] * exp(as.matrix(betahat)%*%covs[ind, ]) *
A(time, as, r, s)
mrs[r, s] <- mrs[r, s] + pred
mrsgr[r, s, ] <- mrsgr[r, s, ] + covs[ind, ] * pred
}
}
# Format for output
return(list(mrs = mrs, mrsgr = mrsgr))
}
#************ makemrs
#****f* ZZdebug/fmakemrs
# NAME
# fmakemrs --- terms for profile likelihood and gradient
# FUNCTION
# Wrapper for FORTRAN function fmkmrs, drop-in replacement for makemrs.
# Never called because fmkproflik2 and fmkprofgr2 call the FORTRAN function
# directly, but kept in the codebase for debugging.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat
# as matrix of discretization breakpoints
# betahat regression coefficient estimates
# Uijmat matrix of frailty estimates
# OUTPUTS
# mrs matrix of terms needed by the profile likelihood
# mrsgr array of terms needed by the profile gradient
# SYNOPSIS
fmakemrs <- function(m, Ji, datamat, as, betahat, Uijmat)
# SOURCE
#
{
# Format data for Fortran
mrs <- matrix(0, dim(as)[1], dim(as)[2])
mrsgr <- array(0, c(dim(as)[1], dim(as)[2], length(betahat)))
covs <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
times <- datamat[, "time"]
# Submit to Fortran
out <- .Fortran("fmkmrs",
betahat = as.double(betahat),
index = as.integer(index),
times = as.double(times),
Z = as.double(covs),
as = as.double(as),
Uijmat = as.double(Uijmat),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(as)[1]),
ns = as.integer(dim(as)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
mrs = as.double(mrs),
mrsgr = as.double(mrsgr),
mkgr = as.integer(1))
# Extract output
mrs = matrix(out$mrs, dim(as)[1], dim(as)[2])
mrsgr = array(out$mrsgr, c(dim(as)[1], dim(as)[2], length(betahat)))
return(list(mrs = mrs, mrsgr = mrsgr))
}
#************ fmakemrs
#****f* ZZdebug/mkproflik
# NAME
# mkproflik --- compute profile likelihood
# FUNCTION
# Compute profile likelihood of regression parameters for a single process.
# This is an R implementation of the Fortran routine fproflik and is for
# debugging only.
# INPUTS
# m number of clusters
# Ji cluster sizes
# betahat regression coefficient estimates
# datamat data matrix generated by makedatamat
# as matrix of discretization breakpoints
# Uijmat matrix of frailty estimates
# OUTPUTS
# loglik profile loglikelihood of betahat conditional on the other parameters
# SYNOPSIS
mkproflik <- function(m, Ji, betahat, datamat, as, Uijmat)
# SOURCE
#
{
# Compute mrs using makemrs function
mrss <- makemrs(m, Ji, datamat, as, betahat, Uijmat)
mrs <- mrss$mrs
loglik <- 0
covs <- as.matrix(datamat[, -c(1:8)], dim(datamat)[1], length(betahat))
# Loop over the entries in the data matrix, but do not loop over the s
# values (delta can only be 1 on smax)
for(ind in 1:dim(datamat)[1]){
i <- datamat[ind, "i"]
j <- datamat[ind, "j"]
k <- datamat[ind, "k"]
smax <- datamat[ind, "smax"]
smin <- datamat[ind, "smin"]
r <- datamat[ind, "r"]
delta <- datamat[ind, "delta"]
# Add the term corresponding to each row in the data matrix
# to the loglikelihood
loglik <- loglik + delta * (log(Uijmat[i, j]) - log(mrs[r, smax]) +
as.matrix(betahat)%*%covs[ind, ])
}
return(loglik)
}
#************ mkproflik
#****f* ZZdebug/fmkproflik
# NAME
# fmkproflik --- compute profile likelihood
# FUNCTION
# Compute profile likelihood of regression parameters for a single process,
# conditional on the frailties and other parameters.
# This is superseded by the faster wrapper fmkproflik2, but is still in the
# code for debugging.
# INPUTS
# m number of clusters
# Ji cluster sizes
# betahat regression coefficient estimates
# datamat data matrix generated by makedatamat
# as matrix of discretization breakpoints
# Uijmat matrix of frailty estimates
# OUTPUTS
# loglik profile loglikelihood of betahat conditional on the other parameters
# SYNOPSIS
fmkproflik <- function(m, Ji, betahat, datamat, as, Uijmat)
# SOURCE
#
{
loglik <- 0
covs <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
delta <- datamat[, "delta"]
times <- datamat[, "time"]
out <- .Fortran("fproflik",
betahat = as.double(betahat),
index = as.integer(index),
delta = as.double(delta),
time = as.double(times),
Z = as.double(covs),
as = as.double(as),
Uijmat = as.double(Uijmat),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(as)[1]),
ns = as.integer(dim(as)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
lik = as.double(loglik))
return(out$lik)
}
#************ fmkproflik
#****f* fortranwrappers/fmkproflik2
# NAME
# fmkproflik2 --- compute profile likelihood
# FUNCTION
# Compute profile likelihood of regression parameters for a single process,
# conditional on the frailties and other parameters.
# This is just a Fortran wrapper for fproflik that avoids unnecessary type
# conversions for speed. Most of the inputs need to have been converted into
# the row vectors required by FORTRAN.
# INPUTS
# m number of clusters
# Ji cluster sizes
# betahat regression parameters at which the profile likelihood should be
# computed
# index matrix of indices i,j,k,r,smin,smax for each row (integer)
# delta vector of event indicators (double)
# times vector of actual time span for each row (double)
# Z matrix of covariates (as a double row vector)
# as matrix of discretization breakpoints (double row vector)
# Uijmat matrix of frailty estimates (double row vector)
# d number of rows of Z
# ncovs number of covariates
# nr number of strata
# ns number of breakpoints
# OUTPUTS
# loglik profile loglikelihood of betahat conditional on the other parameters
# SYNOPSIS
fmkproflik2 <- function(m, Ji, betahat, index, delta, times, Z, as, Uijmat,
d, ncovs, nr, ns)
# SOURCE
#
{
loglik <- as.double(0)
out <- .Fortran("fproflik",
betahat = as.double(betahat), index = index, delta = delta,
times = times, Z = Z, as = as, Uijmat = Uijmat,
d = d, ncovs = ncovs, nr = nr, ns = ns, m = as.integer(m),
maxj = as.integer(max(Ji)), lik = loglik)
return(out$lik)
}
#************ fmkproflik2
#****f* ZZdebug/mkprofgr
# NAME
# mkprofgr --- profile likelihood gradient
# FUNCTION
# Compute the gradient of the profile likelihood.
# This is an R implementation for debugging only.
# INPUTS
# m number of clusters
# Ji cluster sizes
# betahat regression coefficient estimates
# datamat data matrix generated by makedatamat
# as matrix of discretization breakpoints
# Uijmat matrix of frailty estimates
# OUTPUTS
# likgr gradient of profile loglikelihood of betahat conditional
# on the other parameters
# SYNOPSIS
mkprofgr <- function(m, Ji, betahat, datamat, as, Uijmat)
# SOURCE
#
{
# Compute all m_rs and gradients
mrss <- makemrs(m, Ji, datamat, as, betahat, Uijmat)
mrs <- mrss$mrs
mrsgr <- mrss$mrsgr
# Initialize vectors
likgr <- rep(0, length(betahat))
covs <- as.matrix(datamat[, -c(1:8)], dim(datamat)[1], length(betahat))
for(ind in 1:dim(datamat)[1]){
smax <- datamat[ind, "smax"]
r <- datamat[ind, "r"]
delta <- datamat[ind, "delta"]
# For each row in the data matrix, add the corresponding term to the gradient
likgr <- likgr + delta * (covs[ind, ] - mrsgr[r, smax, ] / mrs[r, smax])
}
return(likgr)
}
#************ mkprofgr
#****f* ZZdebug/fmkprofgr
# NAME
# fmkprofgr --- profile likelihood gradient
# FUNCTION
# Compute the gradient of the profile likelihood, conditional on the frailties
# and other parameters.
# This is superseded by the faster wrapper fmkproflik2, but is still in the
# code for debugging.
# INPUTS
# m number of clusters
# Ji cluster sizes
# betahat regression coefficient estimates
# datamat data matrix generated by makedatamat
# as matrix of discretization breakpoints
# Uijmat matrix of frailty estimates
# OUTPUTS
# likgr gradient of profile loglikelihood of betahat conditional
# on the other parameters
# SYNOPSIS
fmkprofgr <- function(m, Ji, betahat, datamat, as, Uijmat)
# SOURCE
#
{
# Prepare data for Fortran computation
gr <- rep(0, length(betahat))
covs <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
ncovs <- dim(covs)[2]
d <- dim(covs)[1]
index <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
delta <- datamat[, "delta"]
times <- datamat[, "time"]
# Fortran call
out <- .Fortran("fprofgr",
betahat = as.double(betahat),
index = as.integer(index),
delta = as.double(delta),
times = as.double(times),
Z = as.double(covs),
as = as.double(as),
Uijmat = as.double(Uijmat),
d = as.integer(d),
ncovs = as.integer(ncovs),
nr = as.integer(dim(as)[1]),
ns = as.integer(dim(as)[2]),
m = as.integer(m),
maxj = as.integer(max(Ji)),
gr = as.double(gr))
return(out$gr)
}
#************ fmkprofgr
#****f* fortranwrappers/fmkprofgr2
# NAME
# fmkprofgr2 --- compute profile likelihood gradient
# FUNCTION
# Compute the gradient of the profile likelihood, conditional on the frailties
# and other parameters.
# This is just a Fortran wrapper for fprofgr that avoids unnecessary type
# conversions for speed. Most of the inputs need to have been converted into
# the row vectors required by FORTRAN.
# INPUTS
# m number of clusters
# Ji cluster sizes
# betahat regression parameters at which the profile likelihood should be
# computed
# index matrix of indices i,j,k,r,smin,smax for each row (integer)
# delta vector of event indicators (double)
# times vector of actual time span for each row (double)
# Z matrix of covariates (as a double row vector)
# as matrix of discretization breakpoints (double row vector)
# Uijmat matrix of frailty estimates (double row vector)
# d number of rows of Z
# ncovs number of covariates
# nr number of strata
# ns number of breakpoints
# OUTPUTS
# likgr gradient of profile loglikelihood of betahat conditional
# on the other parameters
# SYNOPSIS
fmkprofgr2 <- function(m, Ji, betahat, index, delta, times, Z, as, Uijmat, d, ncovs, nr, ns)
# SOURCE
#
{
gr <- rep(0, length(betahat))
# Direct call to Fortran
out <- .Fortran("fprofgr", betahat = as.double(betahat), index = index,
delta = delta, times = times, Z = Z, as = as, Uijmat = Uijmat,
d = d, ncovs = ncovs, nr = nr, ns = ns, m = as.integer(m),
maxj = as.integer(max(Ji)), gr = gr)
return(out$gr)
}
#************ fmkprofgr2
#!Update regression coefficients $\hat\beta, \hat\beta^D$ using the \emph{profile} likelihood.
#****f* estimation/updateregprof
# NAME
# updateregprof
# FUNCTION
# Update regression coefficients beta1 and beta1 by maximizing the condition
# profile loglikelihood given all the other parameters. Give the regression
# coefficients, the baseline hazard coefficients are then updated.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat for event 1
# datamatd data matrix generated by makedatamat for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# betahat regression coefficient estimates for event
# betadhat regression coefficient estimates for event 2
# K number of discretization intervals for event 1
# Kd number of discretization intervals for event 2
# frailtyoutput output from fupdatefrailties4
# dispersionoutput output from one of the dispersion estimation routines
# smooth boolean, indicating whether hazard estimates should be smoothed
# OUTPUTS
# betahat regression coefficient estimates for event
# betadhat regression coefficient estimates for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# loglik loglikelihood ( = loglik1 + loglik2 )
# loglik1 profile loglikelihood of betahat
# loglik2 profile loglikelihood of betadhat
# SYNOPSIS
updateregprof <- function(m, Ji, datamat, datamatd, alphars, alpharsd, as, asd,
betahat, betadhat, K, Kd, frailtyoutput, dispersionoutput, smooth)
# SOURCE
#
{
# Prepare vectors and matrices for direct submission into Fortran,
# without requiring type conversions
Uijhatframe <- frailtyoutput$Uijframe
fUijmat <- as.double(Uijhatframe$Uijmat) # Frailty matrices
fVijmat <- as.double(Uijhatframe$Vijmat)
fas = as.double(as) # Baseline hazard parameters
fasd = as.double(asd)
# Covariates for event 1
Z <- as.double(matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8))
ncovs <- as.integer(length(betahat)) # number of covariates
d1 <- as.integer(dim(datamat)[1]) # data matrix dimension (event 1)
index <- as.integer(datamat[, c("i", "j", "k", "r", "smin", "smax")])
delta <- as.double(datamat[, "delta"]) # recurrent event indicators
times <- as.double(datamat[, "time"]) # recurrent event times
# Covariates for event 2
Zd <- as.double(matrix(datamatd[, -(1:8)], dim(datamatd)[1], dim(datamatd)[2] - 8))
d2 <- as.integer(dim(datamatd)[1]) # data matrix dimension (event 2)
indexd <- as.integer(datamatd[, c("i", "j", "k", "r", "smin", "smax")])
deltad <- as.double(datamatd[, "delta"]) # terminal event indicators
timesd <- as.double(datamatd[, "time"]) # terminal event times
nr = as.integer(dim(as)[1]) # Maximum value of r (number of strata)
ns = as.integer(dim(as)[2]) # Maximum value of s (recurrent) + 1
nsd = as.integer(dim(asd)[2]) # Maximum value of s (terminal) + 1
# Update regression parameters betahat, betadhat using the profile likelihood.
# Use the built-in optim method with the BFGS algorithm.
# The loglikelihood is computed via fmkproflik2and the gradient
# via fmkprofgr2. Conditionally on the frailties, the processes are
# independent and can be fit separately
opt <- try(optim(betahat, fn = fmkproflik2, gr = fmkprofgr2,
control = list(fnscale=-1), method = "BFGS",
m = m, Ji = Ji, index = index, Z = Z, delta = delta, times = times, as = fas,
Uijmat = fUijmat, ncovs = ncovs, d = d1, nr = nr, ns = ns))
optd <- try(optim(betadhat, fn = fmkproflik2, gr = fmkprofgr2,
control = list(fnscale=-1), method = "BFGS",
m = m, Ji = Ji, index = indexd, Z = Zd, delta = deltad, times = timesd,
as = fasd, Uijmat = fVijmat, ncovs = ncovs, d = d2, nr = nr, ns = nsd))
# Check for errors and quit if it didn't converge
if(inherits(opt, "try - error") | inherits(optd, "try - error")){
warning("Inner loop did not converge")
browser()
return(0)
}
if(opt$convergence != 0 | optd$convergence != 0){
warning("Inner loop did not converge")
browser()
return(0)
}
betahat <- opt$par
betadhat <- optd$par
loglik1 <- opt$value
loglik2 <- optd$value
loglik <- loglik1 + loglik2
# Given the updated regression parameters, compute the
# corresponding baseline hazard parameters
alphars <- fmakealphars2(m, Ji, datamat, betahat, as, fUijmat, smooth)
alpharsd <- fmakealphars2(m, Ji, datamatd, betadhat, asd, fVijmat, smooth)
# Format for output
return(list(betahat = betahat, betadhat = betadhat,
alphars = alphars, alpharsd = alpharsd,
as = as, asd = asd,
loglik = loglik, loglik1 = loglik1, loglik2 = loglik2))
}
#************ updateregprof
#########################################################################
#****f* ZZdebug/makeSens2
# NAME
# makeSens2 -- construct sensitivity matrix
# FUNCTION
# Construct the sensitivity matrix at the end of the procedure. This is an R
# implementation of the fmksens2 FORTRAN routine used for debugging.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat for event 1
# datamatd data matrix generated by makedatamat for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# betahat regression coefficient estimates for event
# betadhat regression coefficient estimates for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# frailtyoutput output from fupdatefrailties4
# dispersionoutput output from one of the dispersion estimation routines
# K number of discretization intervals for event 1
# Kd number of discretization intervals for event 2
# full boolean indicating whether full matrix should be computed.
# If FALSE, only the sensitivity for betahat is returned.
# OUTPUTS
# S Sensitivity matrix
# SYNOPSIS
makeSens2 <- function(m, Ji, datamat, datamatd, alphars, alpharsd, betahat, betadhat,
as, asd, frailtyoutput, dispersionoutput, K, Kd, full = FALSE)
# SOURCE
#
{
# Extract intermediate values from output
pi <- frailtyoutput$pqrs$pi; qi <- frailtyoutput$pqrs$qi;
ri <- frailtyoutput$pqrs$ri; si <- frailtyoutput$pqrs$si;
wij <- frailtyoutput$pqrs$wij;zij <- frailtyoutput$pqrs$zij;
wi <- frailtyoutput$pqrs$wi
sigma2hat <- dispersionoutput$sigma2hat;
sigma2dhat <- dispersionoutput$sigma2dhat
nu2hat <- dispersionoutput$nu2hat; nu2dhat <- dispersionoutput$nu2dhat
thetahat <- dispersionoutput$thetahat
ncovs <- length(betahat)
Kcum <- cumsum(c(1, K))
Kdcum <- cumsum(c(1, Kd))
if(full){
betaind <- sum(K) + 1:ncovs
betadind <- sum(Kd) + 1:ncovs
}else{
betaind <- 1:ncovs
betadind <- 1:ncovs
}
# Allocate storage
if(full){
S <- matrix(0, sum(K) + sum(Kd) + 2 * ncovs, sum(K) + sum(Kd) + 2 * ncovs)
}else{
S <- matrix(0, 2 * ncovs, 2 * ncovs)
}
# The sensitivity matrix is expressed as the sum of matrices corresponding
# to each cluster
for(i in 1:m)
{
# Allocate storage for intermediate matrices
Smuij <- rep(0, Ji[i]); Setaij <- rep(0, Ji[i])
if(full){
Smuxij <- matrix(0, Ji[i], sum(K) + ncovs);
Setaxijd <- matrix(0, Ji[i], sum(Kd) + ncovs)
S11 <- matrix(0, sum(K) + ncovs, sum(K) + ncovs);
S12 <- matrix(0, sum(K) + ncovs, sum(Kd) + ncovs);
S13 <- matrix(0, sum(Kd) + ncovs, sum(Kd) + ncovs);
S21 <- rep(0, sum(K) + ncovs); S22 <- rep(0, sum(Kd) + ncovs);
S31 <- rep(0, sum(K) + ncovs); S32 <- rep(0, sum(Kd) + ncovs);
}else{
Smuxij <- matrix(0, Ji[i], ncovs);
Setaxijd <- matrix(0, Ji[i], ncovs)
S11 <- matrix(0, ncovs, ncovs);
S12 <- matrix(0, ncovs, ncovs);
S13 <- matrix(0, ncovs, ncovs);
S21 <- rep(0, ncovs); S22 <- rep(0, ncovs);
S31 <- rep(0, ncovs); S32 <- rep(0, ncovs);
}
# Find the index of the data matrices where cluster i begins
iind0 <- match(i, datamat[, "i"])
if(i < m) iind1 <- match(i + 1, datamat[, "i"]) else
iind1 <- dim(datamat)[1] + 1
iind0d <- match(i, datamatd[, "i"])
if(i < m) iind1d <- match(i + 1, datamatd[, "i"]) else
iind1d <- dim(datamatd)[1] + 1
# Compute Smuij, Setaij, Smuxij, Setaxijd (first pass)
for(ind in iind0:(iind1 - 1))
{
Z <- matrix(datamat[ind, -(1:8)], ncovs, 1)
bZ <- t(Z)%*%as.matrix(betahat)
r <- datamat[ind, "r"]
smax <- datamat[ind, "smax"]
k <- datamat[ind, "k"]
j <- datamat[ind, "j"]
time <- datamat[ind, "time"]
if(smax > 0){
for(s in 1:smax){
mu <- exp(bZ) * alphars[r, s] * A(time, as, r, s)
Smuij[j] <- Smuij[j] + mu
Smuxij[j, betaind] <- Smuxij[j, betaind] + mu * as.vector(Z)
if(full) Smuxij[j, Kcum[r] + s - 1] <-
Smuxij[j, Kcum[r] + s - 1] + mu
}
}
}
for(ind in iind0d:(iind1d - 1))
{
Z <- matrix(datamatd[ind, -(1:8)], ncovs, 1)
bdZ <- t(Z)%*%as.matrix(betadhat)
r <- datamatd[ind, "r"]
smaxd <- datamatd[ind, "smax"]
k <- datamatd[ind, "k"]
j <- datamatd[ind, "j"]
time <- datamatd[ind, "time"]
if(smaxd > 0){
for(s in 1:smaxd){
eta <- exp(bdZ) * alpharsd[r, s] * A(time, asd, r, s)
Setaij[j] <- Setaij[j] + eta
Setaxijd[j, betadind] <- Setaxijd[j, betadind] + eta * as.vector(Z)
if(full) Setaxijd[j, Kdcum[r] + s - 1] <-
Setaxijd[j, Kdcum[r] + s - 1] + eta
}
}
}
phi <- rep(0, Ji[i])
for(j in 1:Ji[i]){
phi[j] <- (1 + nu2hat * Smuij[j]) * (1 + nu2dhat * Setaij[j]) -
thetahat^2 * Smuij[j] * Setaij[j]
}
## Compute the various components of the matrix (second pass)
for(ind in iind0:(iind1 - 1))
{
Z <- matrix(datamat[ind, -(1:8)], ncovs, 1)
bZ <- t(Z)%*%as.matrix(betahat)
Z2 <- Z%*%t(Z)
Z <- as.vector(Z)
r <- datamat[ind, "r"]
smax <- datamat[ind, "smax"]
k <- datamat[ind, "k"]
j <- datamat[ind, "j"]
time <- datamat[ind, "time"]
if(smax > 0){
for(s in 1:smax){
mu <- exp(bZ) * alphars[r, s] * A(time, as, r, s)
S11[betaind, betaind] <- S11[betaind, betaind] +
as.vector(mu) * Z2
if(full){
S11[Kcum[r] + s - 1, Kcum[r] + s - 1] <-
S11[Kcum[r] + s - 1, Kcum[r] + s - 1] + mu
S11[Kcum[r] + s - 1, betaind] <-
S11[Kcum[r] + s - 1, betaind] + mu * Z
S11[betaind, Kcum[r] + s - 1] <-
S11[betaind, Kcum[r] + s - 1] + mu * Z
}
w <- 1 / (1 + wij[i, j] * Smuij[j])
S21[betaind] <- S21[betaind] + w * mu * Z
if(full) S21[Kcum[r] + s - 1] <- S21[Kcum[r] + s - 1] + w * mu
w<- -thetahat * Setaij[j] / phi[j]
S31[betaind] <- S31[betaind] + w * mu * Z
if(full) S31[Kcum[r] + s - 1] <- S31[Kcum[r] + s - 1] + w * mu
}
}
}
for(ind in iind0d:(iind1d - 1))
{
Z <- matrix(datamatd[ind, -(1:8)], ncovs, 1)
bdZ <- t(Z)%*%as.matrix(betadhat)
Z2 <- Z%*%t(Z)
Z <- as.vector(Z)
r <- datamatd[ind, "r"]
smaxd <- datamatd[ind, "smax"]
k <- datamatd[ind, "k"]
j <- datamatd[ind, "j"]
time <- datamatd[ind, "time"]
if(smaxd > 0){
for(s in 1:smaxd){
eta <- exp(bdZ) * alpharsd[r, s] * A(time, asd, r, s)
S13[betadind, betadind] <- S13[betadind, betadind] +
as.vector(eta) * Z2
if(full){
S13[Kdcum[r] + s - 1, Kdcum[r] + s - 1] <-
S13[Kdcum[r] + s - 1, Kdcum[r] + s - 1] + eta
S13[Kdcum[r] + s - 1, betadind] <-
S13[Kdcum[r] + s - 1, betadind] + eta * Z
S13[betadind, Kdcum[r] + s - 1] <-
S13[betadind, Kdcum[r] + s - 1] + eta * Z
}
w<- -thetahat * Smuij[j] / phi[j]
S22[betadind] <- S22[betadind] + w * eta * Z
if(full) S22[Kdcum[r] + s - 1] <- S22[Kdcum[r] + s - 1] + w * eta
w <- 1 / (1 + zij[i, j] * Setaij[j])
S32[betadind] <- S32[betadind] + w * eta * Z
if(full) S32[Kdcum[r] + s - 1] <- S32[Kdcum[r] + s - 1] + w * eta
}
}
}
for(j in 1:Ji[i]){
S11 <- S11 - wij[i, j] / (1 + wij[i, j] * Smuij[j]) *
Smuxij[j, ]%*%t(Smuxij[j, ])
S12 <- S12 - thetahat / phi[j] * Smuxij[j, ]%*%t(Setaxijd[j, ])
S13 <- S13 - zij[i, j] / (1 + zij[i, j] * Setaij[j]) *
Setaxijd[j, ]%*%t(Setaxijd[j, ])
}
S2 <- c(S21, S22)
S3 <- c(S31, S32)
S <- S - rbind(cbind(S11, S12), cbind(t(S12), S13)) +
wi[i] * (sigma2hat * (1 + sigma2dhat * si[i]) * S2%*%t(S2) -
sigma2hat * sigma2dhat * qi[i] * (S3%*%t(S2) + S2%*%t(S3)) +
sigma2dhat * (1 + sigma2hat * pi[i]) * S3%*%t(S3))
}
return(S)
}
#************ makeSens2
#****f* fortranwrappers/fmakeSens2
# NAME
# fmakeSens2 --- construct sensitivity matrix
# FUNCTION
# Construct the sensitivity matrix at the end of the procedure.
# This is a wrapper for the FORTRAN routine fmksens2 or fmksens2full.
# It only gest called with full=FALSE, because for full=TRUE the function
# fmkstderr uses a faster method.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat for event 1
# datamatd data matrix generated by makedatamat for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# betahat regression coefficient estimates for event
# betadhat regression coefficient estimates for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# frailtyoutput output from fupdatefrailties4
# dispersionoutput output from one of the dispersion estimation routines
# K number of discretization intervals for event 1
# Kd number of discretization intervals for event 2
# full boolean indicating whether full matrix should be computed.
# If FALSE, only the sensitivity for betahat is returned.
# OUTPUTS
# S Sensitivity matrix
# SYNOPSIS
fmakeSens2 <- function(m, Ji, datamat, datamatd, alphars, alpharsd, betahat, betadhat,
as, asd, frailtyoutput, dispersionoutput, K, Kd, full = FALSE)
# SOURCE
#
{
# Extract intermediate values
pi <- frailtyoutput$pqrs$pi; qi <- frailtyoutput$pqrs$qi;
ri <- frailtyoutput$pqrs$ri; si <- frailtyoutput$pqrs$si;
wij <- frailtyoutput$pqrs$wij;zij <- frailtyoutput$pqrs$zij;
wi <- frailtyoutput$pqrs$wi
sigma2hat <- dispersionoutput$sigma2hat;
sigma2dhat <- dispersionoutput$sigma2dhat
nu2hat <- dispersionoutput$nu2hat; nu2dhat <- dispersionoutput$nu2dhat
thetahat <- dispersionoutput$thetahat
ncovs1 <- length(betahat)
ncovs2 <- length(betadhat)
# Prepare data for submission to Fortran function fmksens2 or fmksens2full
Z <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
d1 <- dim(Z)[1]
index1 <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
times1 <- datamat[, "time"]
Zd <- matrix(datamatd[, -(1:8)], dim(datamatd)[1], dim(datamatd)[2] - 8)
d2 <- dim(Zd)[1]
index2 <- datamatd[, c("i", "j", "k", "r", "smin", "smax")]
times2 <- datamatd[, "time"]
nr = dim(alphars)[1]
ns = dim(alphars)[2]
nsd = dim(alpharsd)[2]
if(full){
Kcum <- cumsum(c(1, K))
Kdcum <- cumsum(c(1, Kd))
np <- sum(K) + ncovs1
npd <- sum(Kd) + ncovs2
S <- matrix(0, np + npd, np + npd)
# Fortran call
out <- try(.Fortran("fmksens2full",
Smat = as.double(S),
index1 = as.integer(index1), index2 = as.integer(index2),
Z = as.double(Z), Zd = as.double(Zd),
alphars = as.double(alphars), alpharsd = as.double(alpharsd),
as = as.double(as), asd = as.double(asd),
betahat = as.double(betahat), betadhat = as.double(betadhat),
times1 = as.double(times1), times2 = as.double(times2),
pi = as.double(pi), qi = as.double(qi),
ri = as.double(ri), si = as.double(si),
wi = as.double(wi), wij = as.double(wij), zij = as.double(zij),
sig2 = as.double(sigma2hat), sig2d = as.double(sigma2dhat),
nu2 = as.double(nu2hat), nu2d = as.double(nu2dhat),
theta = as.double(thetahat),
ncovs1 = as.integer(ncovs1), ncovs2 = as.integer(ncovs2),
nr = as.integer(nr), ns = as.integer(ns), nsd = as.integer(nsd),
np = as.integer(np), npd = as.integer(npd),
d1 = as.integer(d1), d2 = as.integer(d2),
m = as.integer(m), Ji = as.integer(Ji), maxj = as.integer(max(Ji)),
Kcum = as.integer(Kcum), Kdcum = as.integer(Kdcum), DUP = FALSE))
S <- matrix(out$Smat, np + npd, np + npd)
}else{
S <- matrix(0, ncovs1 + ncovs2, ncovs1 + ncovs2)
# Submit to Fortran
out <- try(.Fortran("fmksens2",
Smat = as.double(S),
index1 = as.integer(index1), index2 = as.integer(index2),
Z = as.double(Z), Zd = as.double(Zd),
alphars = as.double(alphars), alpharsd = as.double(alpharsd),
as = as.double(as), asd = as.double(asd),
betahat = as.double(betahat), betadhat = as.double(betadhat),
times1 = as.double(times1), times2 = as.double(times2),
pi = as.double(pi), qi = as.double(qi),
ri = as.double(ri), si = as.double(si),
wi = as.double(wi), wij = as.double(wij), zij = as.double(zij),
sig2 = as.double(sigma2hat), sig2d = as.double(sigma2dhat),
nu2 = as.double(nu2hat), nu2d = as.double(nu2dhat),
theta = as.double(thetahat),
ncovs1 = as.integer(ncovs1), ncovs2 = as.integer(ncovs2),
nr = as.integer(nr), ns = as.integer(ns), nsd = as.integer(nsd),
d1 = as.integer(d1), d2 = as.integer(d2),
m = as.integer(m), Ji = as.integer(Ji), maxj = as.integer(max(Ji))))
S <- matrix(out$Smat, ncovs1 + ncovs2, ncovs1 + ncovs2)
}
return(S)
}
#************ fmakeSens2
#****f* fortranwrappers/fmkstderr
# NAME
# fmkstderr --- compute standard error
# FUNCTION
# Compute the standard error entirely within FORTRAN. This is a wrapper for
# the FORTRAN routine fmksterr.
# INPUTS
# m number of clusters
# Ji cluster sizes
# datamat data matrix generated by makedatamat for event 1
# datamatd data matrix generated by makedatamat for event 2
# alphars matrix of baseline hazard parameters for event 1
# alpharsd matrix of baseline hazard parameters for event 2
# betahat regression coefficient estimates for event
# betadhat regression coefficient estimates for event 2
# as matrix of discretization breakpoints for event 1
# asd matrix of discretization breakpoints for event 2
# frailtyoutput output from fupdatefrailties4
# dispersionoutput output from one of the dispersion estimation routines
# K number of discretization intervals for event 1
# Kd number of discretization intervals for event 2
# univariate boolean indicating whether the process is univariate
# OUTPUT
# stderr a vector of standard errors for regression and hazard params
# SYNOPSIS
fmkstderr <- function(m, Ji, b, datamat, datamatd, alphars, alpharsd,
betahat, betadhat, as, asd, frailtyoutput, dispersionoutput,
K, Kd, univariate = FALSE)
# SOURCE
#
{
pi <- frailtyoutput$pqrs$pi; qi <- frailtyoutput$pqrs$qi;
ri <- frailtyoutput$pqrs$ri; si <- frailtyoutput$pqrs$si;
wij <- frailtyoutput$pqrs$wij;zij <- frailtyoutput$pqrs$zij;
wi <- frailtyoutput$pqrs$wi
sigma2hat <- dispersionoutput$sigma2hat;
sigma2dhat <- dispersionoutput$sigma2dhat
nu2hat <- dispersionoutput$nu2hat; nu2dhat <- dispersionoutput$nu2dhat
thetahat <- dispersionoutput$thetahat
ncovs1 <- length(betahat)
ncovs2 <- length(betadhat)
# Prepare data for submission to Fortran function fmkstderr
Z <- matrix(datamat[, -(1:8)], dim(datamat)[1], dim(datamat)[2] - 8)
d1 <- dim(Z)[1]
index1 <- datamat[, c("i", "j", "k", "r", "smin", "smax")]
times1 <- datamat[, "time"]
Zd <- matrix(datamatd[, -(1:8)], dim(datamatd)[1], dim(datamatd)[2] - 8)
d2 <- dim(Zd)[1]
index2 <- datamatd[, c("i", "j", "k", "r", "smin", "smax")]
times2 <- datamatd[, "time"]
nr = dim(alphars)[1]
ns = dim(alphars)[2]
nsd = dim(alpharsd)[2]
Kcum <- cumsum(c(1, K))
Kdcum <- cumsum(c(1, Kd))
np <- sum(K) + ncovs1
npd <- sum(Kd) + ncovs2
S <- matrix(0, np + npd, np + npd)
# FORTRAN call
out <- try(.Fortran("fmkstderr", Smat = as.double(S), B = as.double(b),
index1 = as.integer(index1), index2 = as.integer(index2),
Z = as.double(Z), Zd = as.double(Zd),
alphars = as.double(alphars), alpharsd = as.double(alpharsd),
as = as.double(as), asd = as.double(asd),
betahat = as.double(betahat), betadhat = as.double(betadhat),
times1 = as.double(times1), times2 = as.double(times2),
pi = as.double(pi), qi = as.double(qi),
ri = as.double(ri), si = as.double(si),
wi = as.double(wi), wij = as.double(wij), zij = as.double(zij),
sig2 = as.double(sigma2hat), sig2d = as.double(sigma2dhat),
nu2 = as.double(nu2hat), nu2d = as.double(nu2dhat), theta = as.double(thetahat),
ncovs1 = as.integer(ncovs1), ncovs2 = as.integer(ncovs2),
nr = as.integer(nr), ns = as.integer(ns), nsd = as.integer(nsd),
np = as.integer(np), npd = as.integer(npd),
d1 = as.integer(d1), d2 = as.integer(d2),
m = as.integer(m), Ji = as.integer(Ji), maxj = as.integer(max(Ji)),
Kcum = as.integer(Kcum), Kdcum = as.integer(Kdcum), DUP = FALSE))
if(!univariate){
# extract standard error from the output
x <- matrix(out$B, np + npd, ncovs1 + ncovs2)
stderr <- sqrt(x[b == 1])
}else{
# For univariates, the matrix has to be solved. This could be sped
# up in the same way, but I didn't have time.
S <- matrix(out$Smat, np + npd, np + npd)[1:np, 1:np]
stderr <- sqrt(diag(solve(S))[np - (ncovs1 - 1):0])
}
return(stderr)
}
#************ fmkstderr
##################################################
### Main Fitting Routine
##################################################
#****f* 00main/bivrec.agdata
# NAME
# bivrec.agdata --- fitter for bivariate models
# FUNCTION
# The main fitting function. Fits the model, given an Anderson-Gill data set
# and other parameters, iterating between updating frailty, dispersion and
# regression parameters until convergence. After initializing, fitting
# consists of iteratively calling fupdatefrailties4 to update frailty
# estimates, updatedisppears to update dispersion parameter estimtes, and
# updateregprof to update regression parameter estimates. The function
# fmkstderr computes standard errors at the end.
#
# See also the call graph linked from 00main.
#
# This function should not be called directly, rather, the interface
# provided by the bivrec.formula and unirec.formula methods should be used.
# INPUTS
# x a data frame with the following columns:
# i cluster index
# j subject index
# k event counter
# r stratum index
# start interval start time
# stop interval stop time
# delta indicator for event 1
# Delta indicator for event 2
# ... columns of all covariates used in the model
# K1 if > 1, number of discretization breakpoints for event 1
# if < 1, proportion relative to maximum
# can be of length > 1 for different strata
# K2 analogous for event 2
# excludevars1 vector of strings giving covariate names that should
# not be included for process 1
# excludevars2 analogous for process 2
# verbose integer from 1-5 specifying the amount of output printed
# alternating boolean indicating whether the data are episodic
# dispest dispersion parameter estimators to use. Options are
# "pearson", "marginal" and "ohlsson"
# correction determines whether to use Ma's correction. Options are
# "none", "single" or "double"
# computesd boolean, determines whether to compute standard errors
# fullS boolean, determines whether to compute the full
# sensitivity matrix
# fixzero list of parameters that should be fixed at 0. For
# univariate models, this should contain "sigma2dhat",
# "nu2dhat" and "thetahat"
# smooth boolean, whether to smooth the baseline hazards
# maxiter maximum number of iterations permitted before failing
# convergence convergence criterion, absolute change in all params
# initial optionally a list containing initial values for
# betahat, betadhat, Uihat, Vihat, Uijhat, Vijhat and dispparams
# OUTPUTS
# an object of class bivrec, see the package documentation for details
# SYNOPSIS
bivrec.agdata <- function(x, K1 = 10, K2 = 10, excludevars1 = NULL,
excludevars2 = NULL, verbose = 1, alternating = FALSE,
dispest = "pearson", correction = "none", computesd = TRUE,
fullS = TRUE, fixzero = NULL, smooth = FALSE, maxiter = 200,
convergence = 1e-3, initial = NULL)
# SOURCE
#
{
# Read in the input and make basic adjustments
agdata <- x
K <- K1;Kd <- K2;
if(dim(agdata)[2] > 9) if(sum(agdata[, 10] != 0) == 0) agdata <- agdata[, -10]
if(is.null(excludevars1)) vars1 <- NULL else
vars1 <- which(!colnames(agdata)[ - (1:8)]%in%excludevars1)
if(is.null(excludevars2)) vars2 <- NULL else
vars2 <- which(!colnames(agdata)[ - (1:8)]%in%excludevars2)
maxiter.outer <- maxiter; thresh.outer <- convergence;
fixzero <- fixzero[fixzero%in%c("clust1", "clust2", "subj1", "subj2", "cov")]
fixzero[fixzero == "clust1"] <- "sigma2hat"
fixzero[fixzero == "clust2"] <- "sigma2dhat"
fixzero[fixzero == "subj1"] <- "nu2hat"
fixzero[fixzero == "subj2"] <- "nu2dhat"
fixzero[fixzero == "cov"] <- "thetahat"
if(length(fixzero) == 0) fixzero <- NULL
# Check whether a univariate model is being fit
if(all(c("sigma2dhat", "nu2dhat", "thetahat")%in%fixzero))
univariate <- TRUE else univariate <- FALSE
call <- match.call()
# Get the values of m and Ji from the agdata matrix, and set
# global variables with those values.
m <- max(agdata$i)
Jilocal <- rep(0, m)
for(i in 1:m){
Jilocal[i] <- max(agdata$j[agdata$i == i])
}
Ji <- Jilocal
# If the provided K, Kd are of length 1, then assume all strata should
# have the same number of breakpoints
nr <- length(unique(agdata$r))
###########################
#### Begin find initial values for frailties and coefficients
# Initial values are computed using coxph.
# For recurrent events, the response time is the interevent time stop - start,
# and event indicator delta.
if(verbose >= 2) cat("Get initial values\n")
# create separate data frames for each process
agdata1 <- agdata[, -8]
agdata2 <- agdata[, -7];colnames(agdata2)[7] <- "delta"
# remove duplicate rows in each of the two data frames, and adjust the
# times accordingly
if(alternating){
atrisk1 <- c(TRUE,!duplicated(agdata[, 1:2])[ - 1]) |
c(TRUE, agdata$Delta[ - length(agdata$Delta)] == 1)
agdata1 <- agdata[atrisk1, ]
agdata2 <- agdata[!atrisk1, ]
agdata1 <- agdata1[, -8]
agdata2 <- agdata2[, -7];colnames(agdata2)[7] <- "delta"
}else{
# new method
dup <- duplicated(agdata1[, -c(3, 5, 6, 7)])
dup <- c(dup[ - 1], FALSE) & agdata1$delta != 1
agdata1 <- agdata1[!dup, ]
agdata1$start[ - 1] <- agdata1$stop[ - length(agdata1$stop)]
agdata1$start[!duplicated(agdata1[, 1:2])] <- 0
dup <- duplicated(agdata2[, -c(3, 5, 6, 7)])
dup <- c(dup[ - 1], FALSE) & agdata2$delta != 1
agdata2 <- agdata2[!dup, ]
agdata2$start[ - 1] <- agdata2$stop[ - length(agdata2$stop)]
agdata2$start[!duplicated(agdata2[, 1:2])] <- 0
}
# Remove excluded covariates
if(!is.null(vars1)) agdata1 <- agdata1[, c(1:7, 7 + vars1)]
if(!is.null(vars2)) agdata2 <- agdata2[, c(1:7, 7 + vars2)]
# Compute the number of discretization breakpoints, if K or Kd were specified
# as a proportion of the maximum
if(length(K) == 1 & K <= 1) {
Kout <- rep(0, nr)
for(r in 1:nr) Kout[r] <- round(length(unique((agdata1$stop -
agdata1$start)[agdata1$delta == 1 & agdata1$r == r])) * K)
K <- Kout
}
if(length(Kd) == 1 & Kd <= 1) {
Kout <- rep(0, nr)
for(r in 1:nr) Kout[r] <- round(length(unique((agdata2$stop -
agdata2$start)[agdata2$delta == 1 & agdata2$r == r])) * Kd)
Kd <- Kout
}
if(length(K) == 1) K = rep(K, nr)
if(length(Kd) == 1) Kd = rep(Kd, nr)
if(length(Kd) != nr | length(K) != nr) stop("K needs to be as long as the number of strata")
if(univariate) Kd <- 1
if(verbose >= 1) cat("K = ", K, "\t Kd = ", Kd, "\n")
# Covariate names are needed for Cox model fits and for output
covariatenames1 <- paste(colnames(agdata1)[ - (1:7)], collapse = " + ")
covariatenames2 <- paste(colnames(agdata2)[ - (1:7)], collapse = " + ")
if(is.null(initial)){
# gamma frailties
ftype <- "gamma"
# Models need to be fit with both subject - level frailties, and
# cluster - level frailties, resulting in the four models fit below.
# Compute Cox fits with cluster frailties
if(m > 1){
cox.clust1 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~",
covariatenames1, "+ frailty(i, distribution='", ftype, "')",
sep = "")), agdata1))
if(!univariate)
cox.clust2 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~",
covariatenames2, "+ frailty(i, distribution='", ftype, "')",
sep = "")), agdata2))
else
cox.clust2 <- list(frail = rep(0, m), fvar = 0)
}else{
cox.clust1 <- list(frail = 0, fvar = 0)
cox.clust2 <- list(frail = 0, fvar = 0)
}
# Compute Cox fits with subject - level frailties
cox.ind1 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~",
covariatenames1, "+ frailty(i * 1000 + j, distribution='", ftype, "')",
sep = "")), agdata1))
if(!univariate)
cox.ind2 <- try(coxph(as.formula(paste("Surv(stop - start, delta) ~",
covariatenames2, "+ frailty(i * 1000 + j, distribution='", ftype, "')",
sep = "")), agdata2))
else
cox.ind2 <- list(frail = rep(0, sum(Ji)), fvar = 0,
coef = rep(0, dim(agdata2)[2] - 7))
if(inherits(cox.clust1, "try - error") | inherits(cox.clust2, "try - error") |
inherits(cox.ind1, "try - error") | inherits(cox.ind2, "try - error") )
browser()
## Set initial values for frailties based on estimated values
Uihat <- exp(cox.clust1$frail)
Vihat <- exp(cox.clust2$frail)
if(!alternating){
Uijhat <- exp(cox.ind1$frail)
Vijhat <- exp(cox.ind2$frail)
}else{
Uijhat <- exp(cox.ind1$frail)
Vijhat <- rep(1, length(Uijhat))
Vijhat[unique(agdata1$i * 1000 + agdata1$j)%in%unique(agdata2$i * 1000 +
agdata2$j)] <- exp(cox.ind2$frail)
}
## Set initial values for dispersion parameters
sigma2hat <- mean(cox.clust1$fvar)
sigma2dhat <- mean(cox.clust2$fvar)
nu2hat <- max(mean(cox.ind1$fvar) - sigma2hat, .1)
nu2dhat <- max(mean(cox.ind2$fvar) - sigma2hat, .1)
thetahat <- cov(Uijhat, Vijhat)
## Check that dispersion parameters are valid
if(abs(thetahat)<.001) thetahat <- sign(thetahat)*.01
if(abs(sigma2hat)<.001) sigma2hat <- sign(sigma2hat)*.01
if(abs(sigma2dhat)<.001) sigma2dhat <- sign(sigma2dhat)*.01
if(abs(nu2hat)<.001) nu2hat <- sign(nu2hat)*.01
if(abs(nu2dhat)<.001) nu2dhat <- sign(nu2dhat)*.01
## Extract initial regression parameters
betahat <- cox.ind1$coef
betadhat <- cox.ind2$coef
# Save initial dispersion parameters
dispparams <- list(sigma2hat = sigma2hat, sigma2dhat = sigma2dhat,
nu2hat = nu2hat, nu2dhat = nu2dhat, thetahat = thetahat)
# Fix something at zero if desired
dispparams[fixzero] <- 0
if(sum(is.na(betahat)) + sum(is.na(betadhat)) > 0){
warning("Unable to find good starting values")
return(0)
}
## Save all initial values for output
initial <- list(betahat = betahat, betadhat = betadhat,
dispparams = dispparams)
if(verbose >= 1) cat("\t betahat =", betahat, ", betadhat = ",
betadhat, "\n")
if(verbose >= 1) cat("\t dispparams = ", dispparams$sigma2hat,
dispparams$sigma2dhat, dispparams$nu2hat, dispparams$nu2dhat,
dispparams$thetahat, "\n")
}else{ # if initial values are provided, use those
dispparams <- initial$dispparams
Uihat <- initial$Uihat
Uijhat <- initial$Uijhat
Vihat <- initial$Vihat
Vijhat <- initial$Vijhat
betahat <- initial$betahat
betadhat <- initial$betadhat
}
##########################
### Convert Anderson - Gill data into matrix form and initialize matrices
if(verbose >= 2) cat("Initialize matrices for estimation\n")
## Compute breakpoints
as <- makeas(agdata1, K, alternating)
asd <- makeas(agdata2, Kd, alternating)
## Write frailty estimates in the form of a matrix
Uijhatframe <- makeUijframe(m, Ji, Uijhat, Vijhat)
## Create data matrix
datamat <- makedatamat(agdata1, as, alternating)
datamatd <- makedatamat(agdata2, asd, alternating)
## Compute initial estimates of baseline hazard parameters
alphars <- fmakealphars2(m, Ji, datamat, betahat, as, Uijhatframe$Uijmat, smooth)
alpharsd <- fmakealphars2(m, Ji, datamatd, betadhat, asd,
Uijhatframe$Vijmat, smooth)
## Cleanup: Remove Anderson - Gill data
varnames1 <- colnames(agdata1)[ - (1:7)]
varnames2 <- colnames(agdata2)[ - (1:7)]
rm(agdata)
##########################
### Begin iterative estimation procedure
if(verbose >= 2) cat("Iterative estimation procedure\n")
## Initialize convergence criterion
convergence <- 1000
iter <- 0
# Compute ad - hoc correction value
if(is.character(correction)){
if(correction == "none") corrval <- 1
if(correction == "single") corrval <- max(1, m / (m - length(varnames1)))
if(correction == "double") corrval <- max(1, m / (m - 2 * length(varnames1)))
} else corrval <- correction
## Loop until convergence
while((convergence > thresh.outer) & (iter <= maxiter.outer)){
# Starting convergence criterion and iteration counter
conv.inner <- 1000
iter.inner <- 0
######## Step 1: Update Frailty estimates
if(verbose >= 2) cat("\tUpdating frailty estimates\n")
frailtyoutput <- fupdatefrailties4(m, Ji, datamat, datamatd,
alphars, alpharsd, as, asd, betahat, betadhat, dispparams)
######## Step 2: Update Dispersion coefficient estimates
if(dispest == "ohlsson"){
if(verbose >= 2) cat("\tUpdating dispersion parameters (Ohlsson)\n")
# Use Ohlsson estimators
dispersionoutput <- updatedispohls(m, Ji, datamat, datamatd,
alphars, alpharsd, as, asd, betahat, betadhat, fixzero)
if(is.null(names(dispersionoutput))){return(0)}
}
if(dispest == "pearson"){
if(verbose >= 2) cat("\tUpdating dispersion parameters (Pearson)\n")
# Use Pearson estimation code
dispersionoutput <- updatedisppears(m, Ji, frailtyoutput,
dispparams, corrval)
dispersionoutput[fixzero] <- 0
}
if(dispest == "marginal"){
if(verbose >= 2) cat("\tUpdating dispersion parameters (Marginal)\n")
# Use marginal estimation code
dispersionoutput <- updatedispmarg2(m, Ji, datamat, datamatd,
alphars, alpharsd, as, asd, betahat, betadhat, fixzero, univariate)
}
if(length(dispest) == 5){
# it's possible to compute some parameters via Pearson and others by
# marginal estimators. This was for testing only, there's no reason
# to ever do this!
dispersionoutput.marg <- unlist(updatedispmarg2(m, Ji, datamat, datamatd,
alphars, alpharsd, as, asd, betahat, betadhat, fixzero))
dispersionoutput.pears <- unlist(updatedisppears(m, Ji, frailtyoutput,
dispparams, corrval))
dispersionoutput <- dispersionoutput.marg
dispersionoutput[dispest == "p"] <- dispersionoutput.pears[dispest == "p"]
dispersionoutput <- as.list(dispersionoutput)
}
iter.inner <- iter.inner + 1
######## Step 3: Update Regression parameter estimates
if(verbose >= 2) cat("\tUpdating regression parameter estimates ...\n")
regressionoutput <- updateregprof(m, Ji, datamat, datamatd,
alphars, alpharsd, as, asd, betahat, betadhat, K, Kd,
frailtyoutput, dispersionoutput, smooth)
if(is.null(names(regressionoutput))){
return(0)
}
alpharsnew <- regressionoutput$alphars;
alpharsdnew <- regressionoutput$alpharsd;
betahatnew <- regressionoutput$betahat;
betadhatnew <- regressionoutput$betadhat;
# Compute new value of convergence criterion
newconvergence <- sum(abs(betahatnew - betahat)) +
sum(abs(betadhatnew - betadhat)) +
abs(dispparams$sigma2hat - dispersionoutput$sigma2hat) +
abs(dispparams$sigma2dhat - dispersionoutput$sigma2dhat) +
abs(dispparams$nu2hat - dispersionoutput$nu2hat) +
abs(dispparams$nu2dhat - dispersionoutput$nu2dhat) +
abs(dispparams$thetahat - dispersionoutput$thetahat)
if(verbose >= 1) cat("\tConvergence criterion: ", newconvergence, "\n")
# DEBUG: Output state at this iteration
if(verbose >= 2) cat("\t betahat =", betahatnew, ", betadhat = ",
betadhatnew, "\n")
if(verbose >= 2) cat("\t dispparams = ", format(unlist(dispersionoutput),
digits = 4), "\n")
if(is.nan(newconvergence) | iter == maxiter.outer |
(newconvergence - convergence) > 10) {
warning("Outer loop did not converge")
return(0)
}
# Replace parameter values with new parameters
alphars <- alpharsnew
alpharsd <- alpharsdnew
betahat <- betahatnew
betadhat <- betadhatnew
convergence <- newconvergence
dispparams <- dispersionoutput
iter <- iter + 1
}
## Format for output, compute standard errors, etc
betahat <- regressionoutput$betahat
betadhat <- regressionoutput$betadhat
alphars <- as.vector(regressionoutput$alphars)
alpharsd <- as.vector(regressionoutput$alpharsd)
if(computesd){
# Compute standard errors either using full sensitivity matrix, or only
# the sensitivity of the regression coefficients
if(fullS){
np <- length(betahat) + sum(K);npd <- length(betadhat) + sum(Kd)
b <- matrix(0, np + npd, length(betahat) + length(betadhat))
b[c(sum(K) + 1:length(betahat), sum(K) + sum(Kd) + length(betahat) +
1:length(betadhat)), 1:dim(b)[2]] <- diag(1, dim(b)[2])
stderr <- fmkstderr(m, Ji, b, datamat, datamatd, regressionoutput$alphars,
regressionoutput$alpharsd, betahat, betadhat, as, asd,
frailtyoutput, dispersionoutput, K, Kd, univariate)
}else{
S <- fmakeSens2(m, Ji, datamat, datamatd, regressionoutput$alphars,
regressionoutput$alpharsd, betahat, betadhat, as, asd,
frailtyoutput, dispersionoutput, K, Kd, fullS)
if(univariate) S <- S[1:length(betahat), 1:length(betahat)]
stderr <- sqrt(diag(-solve(S)))
}
std_betahat <- stderr[1:length(betahat)]
std_betadhat <- stderr[length(betahat) + 1:length(betadhat)]
}else{
stderr <- 0
std_betahat <- 0
std_betadhat <- 0
}
# Compute p-values and summaries
pval = pmin(pnorm(betahat / std_betahat), 1 - pnorm(betahat / std_betahat))
pval.d = pmin(pnorm(betadhat / std_betadhat), 1 - pnorm(betadhat / std_betadhat))
varnames <- unique(c(varnames1, varnames2))
summary.reg <- as.data.frame(matrix(NA, length(varnames), 6), row.names = varnames)
colnames(summary.reg) <- c("coeff.1", "se.1", "pval.1", "coeff.2", "se.2", "pval.2")
summary.reg[match(varnames1, varnames), 1:3] <- cbind(betahat, std_betahat, pval)
summary.reg[match(varnames2, varnames), 3 + 1:3] <-
cbind(betadhat, std_betadhat, pval.d)
rhohat <- dispparams$thetahat / sqrt((dispparams$sigma2hat + dispparams$nu2hat) *
(dispparams$sigma2dhat + dispparams$nu2dhat))
summary.disp <- c(dispparams$sigma2hat, dispparams$sigma2dhat, dispparams$nu2hat,
dispparams$nu2dhat, dispparams$thetahat, rhohat)
names(summary.disp) <- c("sigma2hat", "sigma2dhat", "nu2hat",
"nu2dhat", "thetahat", "rhohat")
return(list(call = call,
summary.reg = summary.reg,
summary.disp = summary.disp,
regressionoutput = regressionoutput,
frailtyoutput = frailtyoutput,
dispparams = dispparams,
initial = initial,
stderr = stderr))
}
#************ bivrec.agdata
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.