Nothing
#' Parametric Proportional Hazards Regression
#'
#' Proportional hazards model with parametric baseline hazard(s). Allows for
#' stratification with different scale and shape in each stratum, and left
#' truncated and right censored data.
#'
#' The parameterization is the same as in \code{\link{coxreg}} and
#' \code{\link[survival]{coxph}}, but different from the one used by
#' \code{\link[survival]{survreg}} (which is not a proportional hazards
#' modelling function). The model is \deqn{S(t; a, b, \beta, z) =
#' S_0((t/b)^a)^{\exp((z-mean(z))\beta)}}{% S(t; a, b, beta, z) =
#' S0((t/b)^a)^exp((z - mean(z)) beta)} where S0 is some standardized survivor
#' function.
#'
#' @param formula a formula object, with the response on the left of a ~
#' operator, and the terms on the right. The response must be a survival
#' object as returned by the Surv function.
#' @param data a data.frame in which to interpret the variables named in the
#' formula.
#' @param na.action a missing-data filter function, applied to the model.frame,
#' after any subset argument has been used. Default is
#' \code{options()$na.action}.
#' @param dist Which distribution? Default is "weibull", with the alternatives
#' "ev" (Extreme value), "gompertz", "pch" (piecewise constant hazards
#' function), "loglogistic" and "lognormal". A special case like the
#' \code{exponential} can be obtained by choosing "weibull" in combination with
#' \code{shape = 1}, or "pch" without \code{cuts}.
#' @param cuts Only used with \code{dist = "pch"}. Specifies the points in time
#' where the hazard function jumps. If omitted, an exponential model is fitted.
#' @param init vector of initial values of the iteration. Default initial
#' value is zero for all variables.
#' @param shape If positive, the shape parameter is fixed at that value (in
#' each stratum). If zero or negative, the shape parameter is estimated. If
#' more than one stratum is present in data, each stratum gets its own
#' estimate. Only relevant for the Weibull and Extreme Value distributions.
#' @param param Applies only to the Gompertz distribution: "canonical" is
#' defined in the description of the \code{\link{Gompertz}} distribution;
#' "rate" transforms \code{scale} to 1/log(scale), giving the same
#' parametrization as in Stata and SAS. The latter thus allows for a negative
#' rate, or a "cure" (Gompertz) model. The default is "canonical"; if this
#' results in extremely large scale and/or shape estimates, consider trying
#' "rate".
#' @param control a list with components \code{eps} (convergence criterion),
#' \code{maxiter} (maximum number of iterations), and \code{silent} (logical,
#' controlling amount of output). You can change any component without mention
#' the other(s).
#' @param singular.ok Not used.
#' @param model Not used.
#' @param x Return the design matrix in the model object?
#' @param y Return the response in the model object?
#' @return A list of class \code{c("phreg", "coxreg")} with components
#' \item{coefficients}{Fitted parameter estimates.}
#' \item{cuts}{Cut points for
#' the "pch" distribution. \code{NULL} otherwise.}
#' \item{hazards}{The estimated
#' constant levels in the case of the "pch" distribution. \code{NULL}
#' otherwise.}
#' \item{var}{Covariance matrix of the estimates.}
#' \item{loglik}{Vector of length two; first component is the value at the
#' initial parameter values, the second componet is the maximized value.}
#' \item{score}{The score test statistic (at the initial value).}
#' \item{linear.predictors}{The estimated linear predictors.}
#' \item{means}{Means of the columns of the design matrix, except those columns
#' corresponding to a factor level. Otherwise all
#' zero.}
#' \item{w.means}{Weighted (against exposure time) means of covariates;
#' weighted relative frequencies of levels of factors.}
#' \item{n}{Number of
#' spells in indata (possibly after removal of cases with NA's).}
#' \item{n.events}{Number of events in data.}
#' \item{terms}{Used by extractor functions.}
#' \item{assign}{Used by extractor functions.} %
#' \item{wald.test}{The Wald test statistic (at the initial value).}
#' \item{y}{The Surv vector.}
#' \item{isF}{Logical vector indicating the
#' covariates that are factors.}
#' \item{covars}{The covariates.}
#' \item{ttr}{Total Time at Risk.}
#' \item{levels}{List of levels of factors.}
#' \item{formula}{The calling formula.}
#' \item{call}{The call.}
#' \item{method}{The method.}
#' \item{convergence}{Did the optimization
#' converge?}
#' \item{fail}{Did the optimization fail? (Is \code{NULL} if not).}
#' \item{pfixed}{TRUE if shape was fixed in the estimation.}
#' @note The lognormal and loglogistic baseline distributions are extended to a
#' three-parameter family by adding a "proportionality" parameter (multiplying
#' the baseline hazard function). The log of the estimated parameter turns up
#' as '(Intercept)' in the printed output. The reason for this extension is
#' that the standard lognormal and loglogistic distributions are not closed
#' under proportional hazards.
#' @section Warning: The lognormal and loglogistic distributions are included
#' on an experimental basis for the moment. Use with care, results may be
#' unreliable!
#'
#' The gompertz distribution has an exponentially increasing hazard function
#' under the canonical parametrization. This may cause instability in the
#' convergence of the fitting algorithm in the case of near-exponential data.
#' It may be resolved by using \code{param = "rate"}.
#' @author Göran Broström
#' @seealso \code{\link{coxreg}}, \code{\link{check.dist}},
#' \code{link{aftreg}}.
#' @keywords survival regression
#' @examples
#'
#' data(mort)
#' fit <- phreg(Surv(enter, exit, event) ~ ses, data = mort)
#' fit
#' plot(fit)
#' fit.cr <- coxreg(Surv(enter, exit, event) ~ ses, data = mort)
#' check.dist(fit.cr, fit)
#'
#' @export phreg
phreg <- function (formula = formula(data),
data = parent.frame(),
na.action = getOption("na.action"),
dist = "weibull",
cuts = NULL,
init,
shape = 0,
## Means shape is estimated, ie true Weibull; > 0 fixed!
param = c("canonical", "rate"),
control = list(eps = 1e-8, maxiter = 20, trace = FALSE),
singular.ok = TRUE,
model = FALSE,
x = FALSE,
y = TRUE)
{
if (dist == "pch") warning("Use the function pchreg instead. This will soon be deprecated.")
param <- param[1]
pfixed <- any(shape > 0) & dist %in% c("weibull", "ev") # Fixed 2020-07-26!
call <- match.call()
m <- match.call(expand.dots = FALSE)
temp <- c("", "formula", "data", "na.action")
m <- m[match(temp, names(m), nomatch = 0)] # m is a call
special <- "strata"
Terms <- if (missing(data))
terms(formula, special)
else terms(formula, special, data = data) # Terms is a 'terms' 'formula'
m$formula <- Terms
m[[1]] <- as.name("model.frame")
m <- eval(m, parent.frame())
##return(m)
Y <- model.extract(m, "response")
if (!inherits(Y, "Surv"))
stop("Response must be a survival object")
weights <- model.extract(m, "weights")
offset <- attr(Terms, "offset")
tt <- length(offset)
offset <- if (tt == 0)
rep(0, nrow(Y))
else if (tt == 1)
m[[offset]]
else {
ff <- m[[offset[1]]]
for (i in 2:tt) ff <- ff + m[[offset[i]]]
ff
}
attr(Terms, "intercept") <- 1
strats <- attr(Terms, "specials")$strata
dropx <- NULL
if (length(strats)) {
##if (dist == "pch") # Changed 2.4-0
## stop("No strata allowed in the pch model (yet)")
temp <- survival::untangle.specials(Terms, "strata", 1)
dropx <- c(dropx, temp$terms)
if (length(temp$vars) == 1)
strata.keep <- m[[temp$vars]]
else strata.keep <- survival::strata(m[, temp$vars], shortlabel = TRUE)
strats <- as.numeric(strata.keep)
}
if (length(dropx))
newTerms <- Terms[-dropx]
else newTerms <- Terms
X <- model.matrix(newTerms, m)
##return(X)
if (!(dist %in% c("lognormal", "loglogistic"))){
assign <- lapply(survival::attrassign(X, newTerms)[-1], function(x) x - 1)
}else{
assign <- lapply(survival::attrassign(X, newTerms), function(x) x - 1)
}
## Lognormal & loglogistic need an intercept:
if (!(dist %in% c("lognormal", "loglogistic"))){
X <- X[, -1, drop = FALSE]
intercept <- FALSE
ncov <- NCOL(X)
}else{
intercept <- TRUE
ncov <- NCOL(X) - 1
}
nullModel <- ncov == 0
#########################################
if (ncov){
if (length(dropx)){
covars <- names(m)[-c(1, (dropx + 1))]
}else{
covars <- names(m)[-1]
}
isF <- logical(length(covars))
for (i in 1:length(covars)){
if (length(dropx)){
if (is.logical(m[, -(dropx + 1)][, (i + 1)])){
m[, -(dropx + 1)][, (i + 1)] <-
as.factor(m[, -(dropx + 1)][, (i + 1)])
}
isF[i] <- is.factor(m[, -(dropx + 1)][, (i + 1)])## ||
##is.logical(m[, -(dropx + 1)][, (i + 1)]) )
}else{
if (is.logical(m[, (i + 1)])){
m[, (i + 1)] <- as.factor(m[, (i + 1)])
}
isF[i] <- is.factor(m[, (i + 1)])## ||
##is.logical(m[, (i + 1)]) )
}
}
ant.fak <- sum(isF)
}else{ #!ncov
isF <- logical(0)
ant.fak <- 0
}
##cat("ant.fak = ", ant.fak, "\n")
if (ant.fak){
levels <- list()
index <- 0
for ( i in 1:length(covars) ){
if (isF[i]){
index <- index + 1
if (length(dropx)){
levels[[i]] <- levels(m[, -(dropx + 1)][, (i + 1)])
}else{
levels[[i]] <- levels(m[, (i + 1)])
}
}else{
levels[[i]] <- NULL
}
}
}else{
levels <- NULL
}
isI <- logical(NCOL(X))
if (ant.fak){
indx <- 0
for (i in seq_len(length(covars))){
indx <- indx + 1
if (isF[i]){
isI[indx] <- TRUE
isI[indx] <- TRUE
if (length(levels[[i]]) >= 3){
for (j in 3:length(levels[[i]])){
indx <- indx + 1
isI[indx] <- TRUE
}
}
}
}
}
##########################################
type <- attr(Y, "type")
if (type != "right" && type != "counting")
stop(paste("This model doesn't support \"", type, "\" survival data",
sep = ""))
if (NCOL(Y) == 2){
Y <- cbind(numeric(NROW(Y)), Y)
}
n.events <- sum(Y[, 3] != 0)
if (n.events == 0) stop("No events; no sense in continuing!")
if (missing(init)){ # Is this wise? No!
if (ncov){
##init <- coxreg(formula, data = data)$coefficients
init <- rep(0, ncov)
}else{
init <- numeric(0)
}
}
if (is.list(control)){
if (is.null(control$eps)) control$eps <- 1e-8
if (is.null(control$maxiter)) control$maxiter <- 10
if (is.null(control$trace)) control$trace <- FALSE
}else{
stop("control must be a list")
}
if (dist == "gompertz"){
if (param == "canonical"){
fit <- gompreg(X,
Y,
strats,
offset,
init,
control)
}else if (param == "rate"){
fit <- gompregRate(X,
Y,
strats,
offset,
init,
control)
}else{
stop(paste(param, " is not a known parametrization."))
}
}else if(dist == "pch"){
if (missing(cuts)){
##stop("'dist = pch' needs 'cuts' to be set")
cuts <- numeric(0) # Exponential distribution(s)
}
fit <- pchreg.fit(X,
Y,
cuts,
offset,
strats,
init,
control)
}else{
fit <- phreg.fit(X,
Y,
dist,
strats,
offset,
init,
shape,
control)
}
if (fit$fail){
warning(paste("Failed with error code ", fit$fail))
return(1)
}
if (ncov){
##cat("ncov = ", ncov, "\n")
fit$linear.predictors <- offset + X %*%
fit$coefficients[1:(ncov + intercept)]
}else{
fit$linear.predictors <- numeric(0)
##fit$means <- numeric(0)
}
##score <- exp(lp)
##cat("fit$means == ", fit$means, "\n")
if (!fit$fail){
fit$fail <- NULL
}else{
out <- paste("Singular hessian; suspicious variable No. ",
as.character(fit$fail), ":\n",
names(coefficients)[fit$fail], " = ",
as.character(fit$value),
"\nTry running with fixed shape", sep = "")
stop(out)
}
fit$convergence <- as.logical(fit$conver)
fit$conver <- NULL ## Ugly!
###########################################################################
## Crap dealt with ......
if (is.character(fit)) {
fit <- list(fail = fit)
class(fit) <- "mlreg"
}
else if (is.null(fit$fail)){
if (!is.null(fit$coef) && any(is.na(fit$coef))) {
vars <- (1:length(fit$coef))[is.na(fit$coef)]
msg <- paste("X matrix deemed to be singular; variable",
paste(vars, collapse = " "))
if (singular.ok)
warning(msg)
else stop(msg)
}
fit$n <- nrow(Y)
fit$terms <- Terms
fit$assign <- assign
if (FALSE){ ## Out-commented...(why?)
if (length(fit$coef) && is.null(fit$wald.test)) {
nabeta <- !is.na(fit$coef)
if (is.null(init))
temp <- fit$coef[nabeta]
else temp <- (fit$coef - init)[nabeta]
##fit$wald.test <-
##survival:::coxph.wtest(fit$var[nabeta, nabeta],
## temp, control$toler.chol)$test
}
}
na.action <- attr(m, "na.action")
if (length(na.action))
fit$na.action <- na.action
if (model)
fit$model <- m
if (x) {
fit$x <- X
if (length(strats))
fit$strata <- strata.keep
}
if (y)
fit$y <- Y
}
##if (!is.null(weights) && any(weights != 1))
## fit$weights <- weights
##########################################
s.wght <- (Y[, 2] - Y[, 1])## * weights
fit$ttr <- sum(s.wght)
if (ncov){
fit$isF <- isF
fit$covars <- covars
fit$w.means <- list()
for (i in 1:length(fit$covars)){
nam <- fit$covars[i]
col.m <- which(nam == names(m))
if (isF[i]){
n.lev <- length(levels[[i]])
fit$w.means[[i]] <- numeric(n.lev)
for (j in 1:n.lev){
who <- m[, col.m] == levels[[i]][j]
fit$w.means[[i]][j] <-
sum( s.wght[who] ) / fit$ttr ## * 100, if in per cent
}
}else{
##fit$w.means[[i]] <- sum(s.wght * m[, col.m]) / fit$ttr
fit$w.means[[i]] <- weighted.mean(m[, col.m], s.wght)
}
}
}
##########################################
fit$ttr <- sum(s.wght)
##names(fit$coefficients) <- coef.names
fit$levels <- levels
fit$formula <- formula(Terms)
fit$call <- call
fit$dist <- dist
fit$n.events <- n.events
fit$nullModel <- nullModel # Added 2020-07-26
##class(fit) <- c("phreg", "weibreg", "coxreg", "coxph")
if (fit$dist == "pch"){
class(fit) <- c("pchreg", "phreg")
}else{
class(fit) <- "phreg"
}
if (dist %in% c("weibull", "cv")){
fit$pfixed <- pfixed
}else{
fit$pfixed <- FALSE
}
if (length(strats))
fit$strata <- names(strats)
if (length(strats)){
fit$strata <- levels(as.factor(strata.keep))
}
fit
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.