Nothing
#################################
# Estimation of regression models
#' Discrete Survival Basic Regression Model Fitting
#'
#' Wrapper for estimation of discrete survival general linear model. Several preprocessing
#' options such as time-dependent covariates are available.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLong") and with
#' time dependent covariates ("dataLongTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumn Column name of the event indicator (class "character").
#' It is required that this is a binary variable with 1=="event" and 0=="censored".
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param timeAsFactor Should the time intervals be coded as factor (class "logical")?
#' Default is FALSE. In the default settings the column is treated as quantitative variable (class "numeric").
#' @param family Specifies the assumption about the response distribution and the link function.
#' Default value is the discrete survival continuation ratio model with logit-link
#' (for comparison see \code{\link[stats]{family}}).
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[stats]{glm}}.
#' @return Returns an object of class c("glm", "lm").
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included the formula would be
#' "~ timeInt + X1". The variable \emph{timeInt} is constructed before estimation of the model.
#' @note Model is rescaled to exclude intercepts.
#' @seealso \code{\link{dataLong}}, \code{\link{dataLongTimeDep}}, \code{\link[stats]{glm}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur [1:100, ]
#'
#' # Estimate discrete survival continuation ratio model
#' estRegModel <- estReg(dataShort = SubUnempDur, dataTransform = "dataLong",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumn = "censor1", timeColumn = "spell")
#' summary(estRegModel)
#'
#' # Predictions
#' SubUnempDurLong <- dataLong(dataShort = SubUnempDur,
#' eventColumn = "censor1", timeColumn = "spell", timeAsFactor=TRUE)
#' preds <- predict(estRegModel, newdata = SubUnempDurLong)
#' head(preds)
#'
#' @export estReg
estReg <- function(dataShort, dataTransform = "dataLong",
formulaVariable =~ timeInt, timeColumn, eventColumn,
idColumn=NULL, timeAsFactor=TRUE, family=stats::binomial,
storeAugData=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLong"){
datLong <- dataLong(dataShort = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
timeAsFactor=timeAsFactor)
}
if(dataTransform == "dataLongTimeDep"){
datLong <- dataLongTimeDep(dataSemiLong = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
idColumn=idColumn, timeAsFactor=timeAsFactor)
}
# Construct formula
constructFormula <- formula(paste("y ~ 0 +",
paste(attr(terms(formulaVariable),"term.labels"),
collapse=" + "), sep = " "))
# Fit glm
RES <- glm(formula = constructFormula, family = family, data = datLong, ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Discrete Survival Additive Regression Model Fitting
#'
#' Wrapper for estimation of discrete survival generalized additive models. Several preprocessing
#' options such as time-dependent covariates are available.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLong") and with
#' time dependent covariates ("dataLongTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumn Column name of the event indicator (class "character").
#' It is required that this is a binary variable with 1=="event" and 0=="censored".
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param timeAsFactor Should the time intervals be coded as factor (class "logical")?
#' Default is FALSE. In the default settings the column is treated as quantitative variable (class "numeric").
#' @param family Specifies the assumption about the response distribution and the link function.
#' Default value is the discrete survival continuation ratio model with logit-link
#' (for comparison see \code{\link[stats]{family}}).
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[mgcv]{gam}}.
#' @return Returns an object of class "gam".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included as smooth functions
#' the formula would be "~ s(timeInt) + s(X1)". The variable \emph{timeInt} is constructed before estimation of the model.
#' @note Splines can also be used to include heterogeneity of mixed models with
#' penalized fixed effects. An advantage of this approach is that
#' one does not have to assume a specific distribution for the random effects.
#'
#' Model is rescaled to exclude the intercept.
#' @seealso \code{\link{dataLong}}, \code{\link{dataLongTimeDep}},
#' \code{\link[mgcv]{gam}}, \code{\link{estRegFrailty}}
#' @references
#' \insertRef{bergerTutorial}{discSurv} \cr\cr
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate discrete survival continuation ratio model
#' estRegSmoothModel <- estRegSmooth(dataShort = SubUnempDur, dataTransform = "dataLong",
#' formulaVariable =~ s(timeInt) + s(age) + ui + logwage * ui,
#' eventColumn = "censor1", timeColumn = "spell")
#' summary(estRegSmoothModel)
#'
#' # Estimated smooth functions
#' plot(estRegSmoothModel)
#'
#' @export estRegSmooth
estRegSmooth <- function(dataShort, dataTransform = "dataLong",
formulaVariable =~ s(timeInt), timeColumn, eventColumn,
idColumn=NULL, timeAsFactor=FALSE,
family=stats::binomial, storeAugData=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLong"){
datLong <- dataLong(dataShort = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
timeAsFactor=timeAsFactor)
}
if(dataTransform == "dataLongTimeDep"){
datLong <- dataLongTimeDep(dataSemiLong = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
idColumn=idColumn, timeAsFactor=timeAsFactor)
}
# Construct formula
constructFormula <- formula(paste("y ~ 0 +",
paste(attr(terms(formulaVariable),"term.labels"),
collapse=" + "), sep = " "))
# Fit glm
RES <- mgcv::gam(formula = constructFormula, family = family, data = datLong,
...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Discrete Survival Tree Fitting
#'
#' Wrapper for estimation of discrete survival classification and regression tree.
#' Several preprocessing options such as time-dependent covariates are available.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLong") and with
#' time dependent covariates ("dataLongTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumn Column name of the event indicator (class "character").
#' It is required that this is a binary variable with 1=="event" and 0=="censored".
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param timeAsFactor Should the time intervals be coded as factor (class "logical")?
#' Default is FALSE. In the default settings the column is treated as quantitative variable (class "numeric").
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param responseAsFactor Should the response be converted to factor? Default is TRUE.
#' Representation as factor is technically important for classification split rules such as
#' Gini index. For details see \code{\link[rpart]{rpart}}.
#' @param \dots Specification of additional arguments in function \code{\link[rpart]{rpart}}.
#' @return Returns an object of class "rpart".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example, if the two variables \emph{timeInt} and \emph{X1} should be included,
#' the formula would be "~ timeInt + X1". The variable \emph{timeInt} is constructed before
#' estimation of the model.
#' @seealso \code{\link{dataLong}}, \code{\link{dataLongTimeDep}}, \code{\link[rpart]{rpart}}
#' @references
#' \insertRef{schmidSurvivalTreeEpi}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate discrete survival tree
#' estTreeModel <- estTree(dataShort = SubUnempDur, dataTransform = "dataLong",
#' formulaVariable =~ timeInt + age + ui + logwage + ui,
#' eventColumn = "censor1", timeColumn = "spell")
#'
#' # Graphical illustration of fitted tree
#' rpart.plot::rpart.plot(estTreeModel)
#'
#' @export estTree
estTree <- function(dataShort, dataTransform = "dataLong",
formulaVariable =~ timeInt, timeColumn,
eventColumn, idColumn=NULL,
timeAsFactor=FALSE, storeAugData=TRUE,
responseAsFactor=TRUE,
... ){
# Data transformation
if(dataTransform=="dataLong"){
datLong <- dataLong(dataShort = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
timeAsFactor=timeAsFactor)
}
if(dataTransform == "dataLongTimeDep"){
datLong <- dataLongTimeDep(dataSemiLong = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
idColumn=idColumn, timeAsFactor = timeAsFactor)
}
if(responseAsFactor){
datLong$y <- factor(datLong$y, levels=c(0, 1))
}
# Construct formula
constructFormula <- formula(paste("y ~",
paste(attr(terms(formulaVariable),"term.labels"),
collapse=" + "), sep = " "))
# Fit glm
RES <- rpart::rpart(formula = constructFormula, data = datLong, method="class", ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Discrete Survival Random Forest Fitting
#'
#' Wrapper for estimation of discrete survival random forest.
#' Several preprocessing options such as time-dependent covariates are available.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLong") and with
#' time dependent covariates ("dataLongTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumn Column name of the event indicator (class "character").
#' It is required that this is a binary variable with 1=="event" and 0=="censored".
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param timeAsFactor Should the time intervals be coded as factor (class "logical")?
#' Default is FALSE. In the default settings the column is treated as quantitative variable (class "numeric").
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param responseAsFactor Should the response be converted to factor? Default is TRUE.
#' Representation as factor is technically important for classification split rules such as
#' Gini index. For details see \code{\link[ranger]{ranger}}.
#' @param \dots Specification of additional arguments in function \code{\link[ranger]{ranger}}.
#' @return Returns an object of class "ranger".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example, if the two variables \emph{timeInt} and \emph{X1} should be included,
#' the formula would be "~ timeInt + X1". The variable \emph{timeInt} is constructed before
#' estimation of the model.
#' @seealso \code{\link{dataLong}}, \code{\link{dataLongTimeDep}}, \code{\link[ranger]{ranger}}
#' @references
#' \insertRef{schmidHellinger}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate discrete survival random forest
#' estRFModel <- estForest(dataShort = SubUnempDur, dataTransform = "dataLong",
#' formulaVariable =~ timeInt + age + ui + logwage + ui,
#' eventColumn = "censor1", timeColumn = "spell")
#' estRFModel
#'
#' @export estForest
estForest <- function(dataShort, dataTransform = "dataLong",
formulaVariable =~ timeInt, timeColumn,
eventColumn, idColumn=NULL,
timeAsFactor=FALSE, storeAugData=TRUE,
responseAsFactor=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLong"){
datLong <- dataLong(dataShort = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
timeAsFactor = timeAsFactor)
}
if(dataTransform == "dataLongTimeDep"){
datLong <- dataLongTimeDep(dataSemiLong = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
idColumn=idColumn, timeAsFactor = timeAsFactor)
}
# Convert response to factor
if(responseAsFactor){
datLong$y <- factor(datLong$y, levels=c(0, 1))
}
# Construct formula
constructFormula <- formula(paste("y ~",
paste(attr(terms(formulaVariable),"term.labels"),
collapse=" + "), sep = " "))
# Fit
RES <- ranger::ranger(formula = constructFormula, data = datLong,
probability=TRUE, ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Discrete Survival Frailty Regression Model Fitting
#'
#' Wrapper for estimation of discrete survival frailty regression model.
#' Smooth functions of covariates are supported. Several preprocessing options,
#' such as time-dependent covariates, are available.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLong") and with
#' time dependent covariates ("dataLongTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumn Column name of the event indicator (class "character").
#' It is required that this is a binary variable with 1=="event" and 0=="censored".
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param timeAsFactor Should the time intervals be coded as factor (class "logical")?
#' Default is FALSE. In the default settings the column is treated as quantitative variable (class "numeric").
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[ranger]{ranger}}.
#' @return Returns an object of class "merMod".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example, if the two variables \emph{timeInt} and \emph{X1} should be included,
#' the formula would be "~ timeInt + X1". The variable \emph{timeInt} is constructed before
#' estimation of the model.
#' @note Model is rescaled to exclude the intercept.
#' @seealso \code{\link{dataLong}}, \code{\link{dataLongTimeDep}},
#' \code{\link[lme4]{glmer}}, \code{\link{estRegSmooth}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate discrete survival frailty regression model
#' estFrailtyModel <- estRegFrailty(dataShort = SubUnempDur,
#' dataTransform = "dataLong", formulaVariable =~timeInt + (1 | obj),
#' eventColumn = "censor1", timeColumn = "spell")
#' estFrailtyModel
#'
#' @export estRegFrailty
estRegFrailty <- function(dataShort, dataTransform = "dataLong",
formulaVariable =~ timeInt + (1|obj), timeColumn,
eventColumn, idColumn=NULL,
timeAsFactor=FALSE, storeAugData=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLong"){
datLong <- dataLong(dataShort = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
timeAsFactor = timeAsFactor)
}
if(dataTransform == "dataLongTimeDep"){
datLong <- dataLongTimeDep(dataSemiLong = dataShort,
timeColumn=timeColumn, eventColumn=eventColumn,
idColumn=idColumn, timeAsFactor = timeAsFactor)
}
# Construct formula
constructFormula <- formula(paste("y ~ 0 +",
paste(attr(terms(formulaVariable),"term.labels"),
collapse=" + "), sep = " "))
# Fit
RES <- lme4::glmer(formula = constructFormula, data = datLong,
family=binomial, ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
##########################
# gumbel
# Description:
# Specifies the link function of gumbel distribution function in the contex of general, linear models
# Can be used in function glm
# Output
# Should be used in context with glm in the base package
#' Gumbel Link Function
#'
#' Constructs the link function with gumbel distribution in approriate format
#' for use in generalized, linear models.
#'
#' Insert this function into a binary regression model
#'
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#'
#' Matthias Schmid \email{matthias.schmid@@imbie.uni-bonn.de}
#' @seealso \code{\link{glm}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with copenhagen stroke study
#' library(pec)
#' data(cost)
#' head(cost)
#'
#' # Take subsample and convert time to months
#' costSub <- cost [1:50, ]
#' costSub$time <- ceiling(costSub$time/30)
#' costLong <- dataLong(dataShort = costSub, timeColumn = "time", eventColumn = "status",
#' timeAsFactor=TRUE)
#' gumbelModel <- glm(formula = y ~ timeInt + diabetes, data = costLong,
#' family = binomial(link = gumbel()))
#'
#' # Estimate hazard given prevStroke and no prevStroke
#' hazPrevStroke <- predict(gumbelModel, newdata=data.frame(timeInt = factor(1:143),
#' diabetes = factor(rep("yes", 143), levels = c("no", "yes"))), type = "response")
#' hazWoPrevStroke <- predict(gumbelModel, newdata = data.frame(timeInt = factor(1:143),
#' diabetes=factor(rep("no", 143), levels = c("no", "yes"))), type = "response")
#'
#' # Estimate survival function
#' SurvPrevStroke <- cumprod(1 - hazPrevStroke)
#' SurvWoPrevStroke <- cumprod(1 - hazWoPrevStroke)
#'
#' # Example graphics of survival curves with and without diabetes
#' plot(x = 1:143, y = SurvWoPrevStroke, type = "l", xlab = "Months",
#' ylab = "S (t|x)", las = 1, lwd = 2, ylim = c(0,1))
#' lines(x = 1:143, y = SurvPrevStroke, col = "red", lwd = 2)
#' legend("topright", legend = c("Without diabetes", "Diabetes"),
#' lty = 1, lwd =2, col = c("black", "red"))
#'
#' @export gumbel
gumbel <- function()
{
linkfun <- function(mu) - log(- log(mu) )
linkinv <- function(eta) exp(- exp(-eta))
mu.eta <- function(eta) exp(- exp(-eta)) * exp(-eta)
valideta <- function(eta) TRUE
link <- paste("gumbel")
structure(list(linkfun = linkfun, linkinv = linkinv,
mu.eta = mu.eta, valideta = valideta,
name = link), class = "link-glm")
}
#########################
# estCumHaz
#' Cumulative Hazard Function
#'
#' Estimates the cumulative hazard function \eqn{\Gamma}(T = t|x) based on estimated hazard rates.
#' The hazard rates may or may not depend on covariates. The covariates have to
#' be equal across all estimated hazard rates. Therefore the given hazard rates
#' should only vary over time.
#'
#' The argument \emph{hazards} must be given for all intervals \eqn{\left[ a_0, a_1 \right), \left[ a_1,
#' a_2 \right), ..., \left[ a_{q-1}, a_q \right), \left[ a_q, \infty \right)}.
#'
#' @param hazards Estimated hazard rates (class "numeric")
#' @return Estimated probabilities of survival (class "numeric")
#' @note It is assumed that all time points up to the last
#' theoretical interval \eqn{\left[ a_q, \infty \right)} are available. If not already present,
#' these can be added manually.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur[1:100, ]
#'
#' # Convert to long format
#' UnempLong <- dataLong(dataShort = subUnempDur, timeColumn = "spell", eventColumn = "censor1")
#'
#' # Estimate binomial model with logit link
#' Fit <- glm(formula = y ~ timeInt + age + logwage, data=UnempLong, family = binomial())
#'
#' # Estimate discrete survival function given age, logwage of first person
#' hazard <- predict(Fit, newdata = subset(UnempLong, obj == 1), type = "response")
#' cumHazCondX <- estCumHaz(c(hazard, 1))
#' cumHazCondX
#'
#' @export estCumHaz
estCumHaz <- function(hazards) {
# Input checks
if(!is.vector(hazards)) {stop(
"Argument *hazards* is not a vector! Please specify a numeric vector of estimated hazard rates.")}
if(!all(-sqrt(.Machine$double.eps) <= hazards &
hazards - 1 <= sqrt(.Machine$double.eps))) {
stop("Argument *hazards* is not a vector of probabilities! Please specify a numeric vector of estimated hazard rates.")}
erg <- cumsum(hazards)
names(erg) <- paste("CumHaz(T=", 1:length(hazards), ")", sep="")
class(erg) <- "discSurvEstCumHaz"
return(erg)
}
#########################
# estSurv
# Description
# Estimates the discrete survival function given discrete hazard rates
# The hazard rates may or not depend on covariates, but the covariates have to be equal for all estimates of the hazard rate!
# Therefore the hazard rates only depend on time.
# Input
# haz: Estimated hazard rates, which only depend on a time interval (class "numeric).
# It is assumed that the estimates contain the intervals [0, a_1), [a_1, a_2), ..., [a_{q-1}, a_{q}) and the last interval is computed
# Output
# Survival function over all intervals [0, a_1), [a_1, a_2), ..., [a_{q-1}, a_{q}), [a_q, a_{\infty}
#' Survival Function
#'
#' Estimates the survival function S(T = t|x) based on estimated hazard rates.
#' The hazard rates may or may not depend on covariates. The covariates have to
#' be equal across all estimated hazard rates. Therefore the given hazard rates
#' should only vary over time.
#'
#' The argument \emph{hazards} must be given for all intervals \eqn{\left[ a_0, a_1 \right), \left[ a_1,
#' a_2 \right), ..., \left[ a_{q-1}, a_q \right), \left[ a_q, \infty \right)}.
#'
#' @param hazards Estimated hazard rates (class "numeric")
#' @return Estimated probabilities of survival (class "numeric")
#' @note It is assumed that all time points up to the last
#' theoretical interval \eqn{\left[ a_q, \infty \right)} are available. If not already present,
#' these can be added manually.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @seealso \code{\link{estMargProb}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#'
#' # Example unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur [1:100, ]
#'
#' # Convert to long format
#' UnempLong <- dataLong(dataShort = subUnempDur, timeColumn = "spell", eventColumn = "censor1")
#' head(UnempLong)
#'
#' # Estimate binomial model with logit link
#' Fit <- glm(formula = y ~ timeInt + age + logwage, data=UnempLong, family = binomial())
#'
#' # Estimate discrete survival function given age, logwage of first person
#' hazard <- predict(Fit, newdata = subset(UnempLong, obj == 1), type = "response")
#' SurvivalFuncCondX <- estSurv(c(hazard, 1))
#' plot(SurvivalFuncCondX)
#'
#' @export estSurv
estSurv <- function(hazards) {
# Input checks
if(!is.vector(hazards)) {stop(
"Argument *hazards* is not a vector! Please specify a numeric vector of estimated hazard rates.")}
if(!all(-sqrt(.Machine$double.eps) <= hazards &
hazards - 1 <= sqrt(.Machine$double.eps))) {
stop("Argument *hazards* is not a vector of probabilities! Please specify a numeric vector of estimated hazard rates.")}
erg <- cumprod(1 - hazards)
names(erg) <- paste("S(T=", 1:length(hazards), ")", sep="")
class(erg) <- "discSurvEstSurv"
return(erg)
}
#########################
# estSurvCens
# Description
# Estimates the censoring survival function using life table estimator
#' Survival Function Of Censoring Process
#'
#' Estimates the marginal survival function G(T=t) of the censoring process based on
#' a life table estimator. Compatible with single event and competing risks data.
#'
#'
#' @param dataShort Data in original short format (class "data.frame").
#' @param timeColumn Name of column with discrete time intervals (class "character").
#' @param eventColumns Names of the event columns of \code{dataShort} (class "character"). In the
#' competing risks case the event columns have to be in dummy encoding format
#' (class "numeric").
#' @param censInterval Assumption about when censoring takes places within an interval
#' on the continuous time scale (class "character").
#' Possible values are "start", "middle", "end". Default is "middle".
#' @return Named vector of estimated survival function of the censoring process
#' for all time points except the last theoretical interval.
#' @note In the censoring survival function the last time interval \eqn{\left[ a_q, \infty \right)}
#' has the probability of zero.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @seealso \code{\link{estSurv}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Load unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur [1:100, ]
#'
#' ######################
#' # Single event example
#'
#' # Estimate censoring survival function G(t)
#' estG <- estSurvCens(dataShort = subUnempDur, timeColumn = "spell",
#' eventColumns = "censor1")
#' estG
#'
#' #########################
#' # Competing risks example
#'
#' # Estimate censoring survival function G(t)
#' estG <- estSurvCens(dataShort = subUnempDur, timeColumn = "spell",
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"))
#' estG
#'
#'
#' @export estSurvCens
estSurvCens <- function(dataShort, timeColumn, eventColumns,
censInterval="middle") {
# # Expand short data to long competing risks format
# dataLongFormat <- dataLongCompRisks (dataShort = dataShort, timeColumn = timeColumn, eventColumns = eventColumns,
# timeAsFactor = timeAsFactor)
#
# # Construct censoring data
# dataShortLongCens <- dataCensoring (dataShortLong = dataLongFormat, respColumn = "y",
# timeColumn = "timeInt")
# Convert to censoring format
dataShortCens <- dataCensoring (dataShort = dataShort, eventColumns = eventColumns,
timeColumn = timeColumn)
# Estimate nonparametric survival function of censoring variable
tempLifeTab <- lifeTable(dataShort = dataShortCens, timeColumn = "timeCens",
eventColumn = "yCens", censInterval=censInterval)
preG <- tempLifeTab [[1]] [, "S"]
GT <- c(1, preG)
names(GT) <- paste("t=", 0:length(preG), sep="")
return(GT)
}
#########################
# estMeasures
#' Fit Discrete Survival Measures Based On Hazards
#'
#' Wrapper to estimate survival functions, cumulative hazards and
#' marginal probabilities of all individuals of a given data set.
#'
#' @param hazards Estimated hazards of the event (class "numeric").
#' @param obj Integer identification number of each individual.
#' Usually this information is computed during data augmentation (class "numeric").
#' @return List of estimated measures (class "list"). There are three list elements with
#' following contents: \itemize{
#' \item {surv} List of estimated survival curves for each individual.
#' \item {cumHaz} List of estimated cumulative hazards for each individual.
#' \item {margProb} List of estimated marginal probabilities for each individual.
#' }
#' @note It is assumed that the data set was preprocessed with functions such as
#' \code{\link{dataLong}} or \code{\link{dataLongTimeDep}}.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @seealso \code{\link{estSurv}}, \code{\link{estCumHaz}},
#' \code{\link{estMargProb}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Load unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur[1:100, ]
#'
#' #########################
#' # Estimate hazard rates
#'
#' estRegModel <- estReg(dataShort = subUnempDur, dataTransform = "dataLong",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumn = "censor1", timeColumn = "spell")
#' estHaz <- predict(estRegModel, type="response")
#'
#' ######################
#' # Single event example
#'
#' # Estimate all survival functions of a given data set
#' measures1 <- estMeasures(hazards=estHaz,
#' obj=attr(estRegModel, "augData")$obj)
#'
#' # Survival function of first individual
#' measures1$surv[[1]]
#'
#' # Cumulative hazard of first individual
#' measures1$cumHaz[[1]]
#'
#' # Marginal probabilities of first individual
#' measures1$margProb[[1]]
#'
#' @export estMeasures
estMeasures <- function(hazards, obj) {
# Prepare data
splitData <- split(hazards, f=obj)
RES <- list(surv=NA, cumHaz=NA, margProb=NA)
# Estimate survival functions
RES$surv <- lapply(splitData, function(x){
estSurv(c(x, 1))
})
# Estimate cumulative hazards
RES$cumHaz <- lapply(splitData, function(x){
estCumHaz(c(x, 1))
})
# Estimate marginal probabilities
RES$margProb <- lapply(splitData, function(x){
estMargProb(c(x, 1))
})
return(RES)
}
#########################
# estMargProb
# Description
# Estimates the discrete marginal probability P(T = t) given discrete hazard rates
# The hazard rates may or not depend on covariates, but the covariates have to be equal for all estimates of the hazard rate!
# Therefore the hazard rates only depend on time.
# Input
# haz: Estimated hazard rates, which only depend on a time interval (class "numeric").
# It is assumed that the estimates contain the intervals [0, a_1), [a_1, a_2), ..., [a_{q-1}, a_{q}) and the last interval is computed
# Output
# Survival function over all intervals [0, a_1), [a_1, a_2), ..., [a_{q-1}, a_{q}), [a_q, a_{\infty}
#' Marginal Probabilities
#'
#' Estimates the marginal probability P(T=t|x) based on estimated discrete hazards.
#' The discrete hazards may or may not depend on covariates. The covariates have to
#' be equal across all estimated hazard rates. Therefore the given discrete hazards
#' should only vary over time.
#'
#' The argument \emph{hazards} must be given for all intervals \eqn{\left[ a_0, a_1 \right), \left[ a_1,
#' a_2 \right), ..., \left[ a_{q-1}, a_q \right), \left[ a_q, \infty \right)}.
#'
#' @param hazards Estimated discrete hazards (class "numeric")
#' @return Estimated marginal probabilities (class "numeric")
#' @note It is assumed that all time points up to the last interval \eqn{\left[ a_q, \infty \right)}
#' are available. If not already present, these can be added manually.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @seealso \code{\link{estSurv}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur [1:100, ]
#'
#' # Convert to long format
#' UnempLong <- dataLong(dataShort = subUnempDur, timeColumn = "spell", eventColumn = "censor1")
#' head(UnempLong)
#'
#' # Estimate binomial model with logit link
#' Fit <- glm(formula = y ~ timeInt + age + logwage, data = UnempLong, family = binomial())
#'
#' # Estimate discrete survival function given age, logwage of first person
#' hazard <- predict(Fit, newdata = subset(UnempLong, obj == 1), type = "response")
#'
#' # Estimate marginal probabilities given age, logwage of first person
#' MarginalProbCondX <- estMargProb (c(hazard, 1))
#' MarginalProbCondX
#' sum(MarginalProbCondX)==1 # TRUE: Marginal probabilities must sum to 1!
#'
#' @export estMargProb
estMargProb <- function(hazards) {
# Input checks
if(!is.vector(hazards)) {stop("Argument *hazards* is not a vector! Please specify a numeric vector of estimated hazard rates.")}
if(!all(-sqrt(.Machine$double.eps) <= hazards &
hazards - 1 <= sqrt(.Machine$double.eps))) {
stop("Argument *hazards* is not between 0 and 1! Please specify a numeric vector of probabilities.")
}
if(length(hazards) == 0) {stop("Argument *hazards* is empty! Please specify a numeric vector of probabilities.")}
if(length(hazards) > 1) {
EstSurv <- estSurv(hazards)
EstProb <- c(hazards[1], sapply(2:length(hazards), function (x) hazards [x] * EstSurv [x - 1]))
names(EstProb) <- paste("P(T=", 1:length(hazards), ")", sep = "")
}
else {
EstProb <- 1
names(EstProb) <- paste("P(T=", 1, ")", sep = "")
}
class(EstProb) <- "discSurvEstMargProb"
return(EstProb)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.