Nothing
#########################
# estMeasuresCompRisks
#' Fit Discrete Survival Measures Based On Cause-Specific Hazards
#'
#' Wrapper to estimate competing risks all cause survival functions,
#' cumulative hazards and marginal probabilities of all individuals of a given data set.
#'
#' @param hazards Estimated discrete hazards of the events (numeric class c("matrix", "array")).
#' Discrete hazards of each time interval are stored in the rows and the number
#' of columns equal to the number of events.
#' @param obj Integer identification number of each individual.
#' Usually this information is computed during data augmentation (class "numeric").
#' @return List of estimated measures (class "list"). There are three list elements with
#' following contents: \itemize{
#' \item {surv_a} List of estimated all cause survival curves for each individual.
#' \item {cumHaz_a} List of estimated all cause cumulative hazards for each individual.
#' \item {margProb_a} List of estimated all cause marginal probabilities for each individual.
#' }
#' @note It is assumed that the data set was preprocessed with functions such as
#' \code{\link{dataLong}} or \code{\link{dataLongTimeDep}}.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @seealso \code{\link{estSurv}}, \code{\link{estCumHaz}},
#' \code{\link{estMargProb}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Load unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur[1:100, ]
#'
#' ######################################
#' # Estimate cause-specific hazard rates
#'
#' estModel <- estRegSmoothCompRisks(dataShort = subUnempDur, dataTransform = "dataLongCompRisks",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#'
#' # Estimate cause-specific hazards (without censoring "e0")
#' estHaz <- predict(estModel, type="response")[, -1]
#'
#' ######################
#' # Single event example
#'
#' # Estimate all survival functions of a given data set
#' measures1CompRisks <- estMeasuresCompRisks(hazards=estHaz,
#' obj=attr(estModel, "augData")$obj)
#'
#' # All cause survival function of first individual
#' measures1CompRisks$surv_a[[1]]
#'
#' # All cause cumulative hazard of first individual
#' measures1CompRisks$cumHaz_a[[1]]
#'
#' # All cause marginal probabilities of first individual
#' measures1CompRisks$margProb_a[[1]]
#'
#' @export estMeasuresCompRisks
estMeasuresCompRisks <- function(hazards, obj) {
# Estimate all cause hazards
hazards_a <- rowSums(hazards)
# Prepare data
splitData <- split(hazards_a, f=obj)
RES <- list(surv_a=NA, cumHaz_a=NA, margProb_a=NA)
# Estimate survival functions
RES$surv_a <- lapply(splitData, function(x){
estSurv(c(x, 1))
})
# Estimate cumulative hazards
RES$cumHaz_a <- lapply(splitData, function(x){
estCumHaz(c(x, 1))
})
# Estimate marginal probabilities
RES$margProb_a <- lapply(splitData, function(x){
estMargProb(c(x, 1))
})
return(RES)
}
##################################################
# Discrete survival competing risks model fitting
#' Discrete Survival Model Fitting For Competing Risks
#'
#' Wrapper for estimation of parametric discrete survival models for cause-specific competing risk models.
#' Several preprocessing options such as time-dependent covariates are available and
#' linear as well as smooth effects can be specified.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLongCompRisks") and with
#' time dependent covariates ("dataLongCompRisksTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumns Character vector giving the column names of the event
#' indicators without censoring column (class "character"). It is required that all events are
#' binary encoded. If the sum of all event indicators is zero, then this is
#' interpreted as a censored observation. Alternatively a column name of a
#' factor representing competing events can be given. In this case the argument
#' \emph{eventColumnsAsFactor} has to be set TRUE and the first level is assumed to
#' represent censoring.
#' @param eventColumnsAsFactor Should the argument eventColumns be intepreted
#' as column name of a factor variable (class "logical")? Default is FALSE.
#' @param timeAsFactor Specifies if the computed discrete time intervals should be
#' converted to a categorical variable (class "logical"). Default is FALSE.
#' In the default settings the discret time intervals are treated
#' as quantitative (class "numeric").
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param family Specifies the assumption about the response distribution and the link function.
#' Default value is the discrete survival cause-specific competing risks model with
#' multinomial logistic function (function \code{\link[VGAM]{multinomial}}).
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[VGAM]{vgam}}.
#' @return Returns an object of class "vgam".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included the formula would be
#' "~ timeInt + X1". The variable \emph{timeInt} is constructed before estimation of the model.
#' @seealso \code{\link{dataLongCompRisks}}, \code{\link{dataLongCompRisksTimeDep}},
#' \code{\link[VGAM]{vgam}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv} \cr\cr
#' \insertRef{schmidCompRisks}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate cause-specific hazard rates
#' estModel <- estRegCompRisks(dataShort = SubUnempDur, dataTransform = "dataLongCompRisks",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#' estModel
#'
#' @export estRegCompRisks
estRegCompRisks <- function(dataShort, dataTransform = "dataLongCompRisks",
formulaVariable =~ timeInt, timeColumn,
eventColumns, eventColumnsAsFactor=FALSE,
timeAsFactor=FALSE, idColumn=NULL,
family=VGAM::multinomial, storeAugData=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLongCompRisks"){
datLong <- dataLongCompRisks(dataShort = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns, timeAsFactor=timeAsFactor,
eventColumnsAsFactor = eventColumnsAsFactor)
}
if(dataTransform == "dataLongCompRisksTimeDep"){
datLong <- dataLongCompRisksTimeDep(dataSemiLong = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns,
eventColumnsAsFactor = eventColumnsAsFactor,
timeAsFactor=timeAsFactor, idColumn=idColumn)
}
# Construct formula
eventColumns <- grep("e[0-9]+", names(datLong), value = TRUE)
constructFormula <- formula(paste("cbind(", paste(eventColumns, collapse = ","), ")",
"~ ", paste(attr(terms(formulaVariable),"term.labels"), collapse=" + "), sep = " "))
# Fit
RES <- VGAM::vglm(formula=constructFormula, family=family, data=datLong, ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
##################################################
# Discrete survival competing risks model fitting
#' Discrete Survival Additive Model Fitting For Competing Risks
#'
#' Wrapper for estimation of discrete survival generalized, additive models for cause-specific competing risk models.
#' Several preprocessing options such as time-dependent covariates are available and
#' linear as well as smooth effects can be specified.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLongCompRisks") and with
#' time dependent covariates ("dataLongCompRisksTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumns Character vector giving the column names of the event
#' indicators without censoring column (class "character"). It is required that all events are
#' binary encoded. If the sum of all event indicators is zero, then this is
#' interpreted as a censored observation. Alternatively a column name of a
#' factor representing competing events can be given. In this case the argument
#' \emph{eventColumnsAsFactor} has to be set TRUE and the first level is assumed to
#' represent censoring.
#' @param eventColumnsAsFactor Should the argument eventColumns be intepreted
#' as column name of a factor variable (class "logical")? Default is FALSE.
#' @param timeAsFactor Specifies if the computed discrete time intervals should be
#' converted to a categorical variable (class "logical"). Default is FALSE.
#' In the default settings the discret time intervals are treated
#' as quantitative (class "numeric").
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param family Specifies the assumption about the response distribution and the link function.
#' Default value is the discrete survival cause-specific competing risks model with
#' multinomial logistic function (function \code{\link[VGAM]{multinomial}}).
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[VGAM]{vgam}}.
#' @return Returns an object of class "vgam".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included the formula would be
#' "~ timeInt + X1". The variable \emph{timeInt} is constructed before estimation of the model.
#' @seealso \code{\link{dataLongCompRisks}}, \code{\link{dataLongCompRisksTimeDep}},
#' \code{\link[VGAM]{vgam}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv} \cr\cr
#' \insertRef{schmidCompRisks}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate cause-specific hazard rates
#' estModel <- estRegSmoothCompRisks(dataShort = SubUnempDur, dataTransform = "dataLongCompRisks",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#' estModel
#'
#' @export estRegSmoothCompRisks
estRegSmoothCompRisks <- function(dataShort, dataTransform = "dataLongCompRisks",
formulaVariable =~ timeInt, timeColumn,
eventColumns, eventColumnsAsFactor=FALSE,
timeAsFactor=FALSE, idColumn=NULL,
family=VGAM::multinomial, storeAugData=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLongCompRisks"){
datLong <- dataLongCompRisks(dataShort = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns, timeAsFactor=timeAsFactor)
}
if(dataTransform == "dataLongCompRisksTimeDep"){
datLong <- dataLongCompRisksTimeDep(dataSemiLong = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns,
eventColumnsAsFactor = eventColumnsAsFactor,
timeAsFactor=timeAsFactor, idColumn=idColumn)
}
# Construct formula
eventColumns <- grep("e[0-9]+", names(datLong), value = TRUE)
constructFormula <- formula(paste("cbind(", paste(eventColumns, collapse = ","), ")",
"~ ", paste(attr(terms(formulaVariable),"term.labels"), collapse=" + "), sep = " "))
# Fit
RES <- VGAM::vgam(formula=constructFormula, family=family, data=datLong, ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Discrete Survival Tree Fitting For Competing Risks
#'
#' Wrapper for estimation of discrete survival tree for cause-specific competing risk models.
#' Several preprocessing options such as time-dependent covariates are available.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLongCompRisks") and with
#' time dependent covariates ("dataLongCompRisksTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumns Character vector giving the column names of the event
#' indicators without censoring column (class "character"). It is required that all events are
#' binary encoded. If the sum of all event indicators is zero, then this is
#' interpreted as a censored observation. Alternatively a column name of a
#' factor representing competing events can be given. In this case the argument
#' \emph{eventColumnsAsFactor} has to be set TRUE and the first level is assumed to
#' represent censoring.
#' @param eventColumnsAsFactor Should the argument eventColumns be intepreted
#' as column name of a factor variable (class "logical")? Default is FALSE.
#' @param timeAsFactor Specifies if the computed discrete time intervals should be
#' converted to a categorical variable (class "logical"). Default is FALSE.
#' In the default settings the discret time intervals are treated
#' as quantitative (class "numeric").
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[VGAM]{vgam}}.
#' @return Returns an object of class "rpart".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included the formula would be
#' "~ timeInt + X1". The variable \emph{timeInt} is constructed before estimation of the model.
#' @seealso \code{\link{dataLongCompRisks}}, \code{\link{dataLongCompRisksTimeDep}},
#' \code{\link[rpart]{rpart}}
#' @references
#' \insertRef{bergerClassTree}{discSurv} \cr\cr
#' \insertRef{schmidCompRisks}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate GEE models for all events
#' estModel <- estRegSmoothCompRisks(dataShort = SubUnempDur, dataTransform = "dataLongCompRisks",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#' estModel
#'
#' @export estTreeCompRisks
estTreeCompRisks <- function(dataShort, dataTransform = "dataLongCompRisks",
formulaVariable =~ timeInt, timeColumn,
eventColumns, eventColumnsAsFactor=FALSE,
timeAsFactor=FALSE, idColumn=NULL,
storeAugData=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLongCompRisks"){
datLong <- dataLongCompRisks(dataShort = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns, timeAsFactor=timeAsFactor,
responseAsFactor=TRUE,
eventColumnsAsFactor=eventColumnsAsFactor)
}
if(dataTransform == "dataLongCompRisksTimeDep"){
datLong <- dataLongCompRisksTimeDep(dataSemiLong = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns,
eventColumnsAsFactor = eventColumnsAsFactor,
timeAsFactor=timeAsFactor, idColumn=idColumn,
responseAsFactor=TRUE)
}
# Construct formula
constructFormula <- formula(paste("responses ~", paste(attr(terms(formulaVariable),"term.labels"), collapse=" + "), sep = " "))
# Fit
RES <- rpart::rpart(formula=constructFormula, data=datLong, method="class", ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Discrete Survival Random Forest Fitting For Competing Risks
#'
#' Wrapper for estimation of discrete survival random forest for cause-specific competing risk models.
#' Several preprocessing options such as time-dependent covariates are available.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLongCompRisks") and with
#' time dependent covariates ("dataLongCompRisksTimeDep"). The default is set to the former.
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumns Character vector giving the column names of the event
#' indicators without censoring column (class "character"). It is required that all events are
#' binary encoded. If the sum of all event indicators is zero, then this is
#' interpreted as a censored observation. Alternatively a column name of a
#' factor representing competing events can be given. In this case the argument
#' \emph{eventColumnsAsFactor} has to be set TRUE and the first level is assumed to
#' represent censoring.
#' @param eventColumnsAsFactor Should the argument eventColumns be intepreted
#' as column name of a factor variable (class "logical")? Default is FALSE.
#' @param timeAsFactor Specifies if the computed discrete time intervals should be
#' converted to a categorical variable (class "logical"). Default is FALSE.
#' In the default settings the discret time intervals are treated
#' as quantitative (class "numeric").
#' @param idColumn Name of column of identification number of persons (class "character").
#' Default is set to use function \code{\link{dataLong}}, that does not need this argument.
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[VGAM]{vgam}}.
#' @return Returns an object of class "ranger".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included the formula would be
#' "~ timeInt + X1". The variable \emph{timeInt} is constructed before estimation of the model.
#' @seealso \code{\link{dataLongCompRisks}}, \code{\link{dataLongCompRisksTimeDep}},
#' \code{\link[rpart]{rpart}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate GEE models for all events
#' estModel <- estRegSmoothCompRisks(dataShort = SubUnempDur, dataTransform = "dataLongCompRisks",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#' estModel
#'
#' @export estForestCompRisks
estForestCompRisks <- function(dataShort, dataTransform = "dataLongCompRisks",
formulaVariable =~ timeInt, timeColumn,
eventColumns, eventColumnsAsFactor=FALSE,
timeAsFactor=FALSE, idColumn=NULL, storeAugData=TRUE,
... ){
# Data transformation
if(dataTransform=="dataLongCompRisks"){
datLong <- dataLongCompRisks(dataShort = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns, timeAsFactor=timeAsFactor,
responseAsFactor=TRUE)
}
if(dataTransform == "dataLongCompRisksTimeDep"){
datLong <- dataLongCompRisksTimeDep(dataSemiLong = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns,
eventColumnsAsFactor = eventColumnsAsFactor,
timeAsFactor=timeAsFactor, idColumn=idColumn,
responseAsFactor=TRUE)
}
# Construct formula
constructFormula <- formula(paste("responses ~", paste(attr(terms(formulaVariable),"term.labels"), collapse=" + "), sep = " "))
# Fit
RES <- ranger::ranger(formula=constructFormula, data=datLong, probability=TRUE, ...)
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Discrete Survival Subdistribution Fitting
#'
#' Wrapper for estimation of discrete survival regression subdistribution hazard models
#' that includes data preprocessing.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param timeColumn Character giving the column name of the observed times.
#' It is required that the observed times are discrete (class "integer").
#' @param eventColumns Character vector giving the column names of the event
#' indicators without censoring column (class "character"). It is required that all events are
#' binary encoded. If the sum of all event indicators is zero, then this is
#' interpreted as a censored observation. Alternatively a column name of a
#' factor representing competing events can be given. In this case the argument
#' \emph{eventColumnsAsFactor} has to be set TRUE and the first level is assumed to
#' represent censoring.
#' @param eventColumnsAsFactor Should the argument eventColumns be intepreted
#' as column name of a factor variable (class "logical")? Default is FALSE.
#' @param eventFocus Column name of the event of interest or type 1 event (class "character").
#' @param timeAsFactor Specifies if the computed discrete time intervals should be
#' converted to a categorical variable (class "logical"). Default is FALSE.
#' In the default settings the discret time intervals are treated
#' as quantitative (class "numeric").
#' @param family Specifies the assumption about the response distribution and the link function.
#' Default value is the discrete survival continuation ratio model with logit-link
#' (for comparison see \code{\link[stats]{family}}).
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Specification of additional arguments in function \code{\link[stats]{glm}}.
#' @return Returns an object of class c("glm", "lm"). The attribute "subDistWeights"
#' saves the subdistribution weights used in the estimation.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included the formula would be
#' "~ timeInt + X1". The variable \emph{timeInt} is constructed before estimation of the model.
#' @seealso \code{\link{dataLongCompRisks}}, \code{\link{estCompRisksGEE}},
#' \code{\link{estRegSmoothCompRisks}}, \code{\link[stats]{glm}}
#' @references
#' \insertRef{bergerSubdist}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate GEE models for all events
#' estModel <- estRegSubDist(dataShort = SubUnempDur,
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell",
#' eventFocus="censor1")
#' summary(estModel)
#'
#' @export estRegSubDist
estRegSubDist <- function(dataShort,
formulaVariable =~ timeInt, timeColumn,
eventColumns, eventColumnsAsFactor=FALSE,
eventFocus, timeAsFactor=FALSE, family=stats::binomial,
storeAugData=TRUE, ... ){
# Data transformation
datLong <- dataLongSubDist(dataShort = dataShort, timeColumn=timeColumn,
eventColumns = eventColumns, timeAsFactor=timeAsFactor,
eventFocus=eventFocus, eventColumnsAsFactor=eventColumnsAsFactor)
# Construct formula
constructFormula <- formula(paste("y ~ ", paste(
attr(terms(formulaVariable),"term.labels"), collapse=" + "), sep = " "))
# Fit
RES <- glm(formula=constructFormula, data=datLong, weights=datLong$subDistWeights,
family=family, ...)
attr(RES, which="subDistWeights") <- datLong$subDistWeights
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
#' Survival Function For Competing Risks
#'
#' Computes the overall survival function S(T>t|x) for all causes based on
#' estimated hazards of a competing risks model. The discrete hazards may or
#' may not depend on covariates. The covariates have to be equal across all
#' estimated hazards. Therefore the given discrete hazards should only vary
#' over time.
#'
#' The argument \emph{hazards} must be given for all intervals \eqn{\left[ a_0, a_1 \right), \left[ a_1,
#' a_2 \right), ..., \left[ a_{q-1}, a_q \right), \left[ a_q, \infty \right)}.
#'
#' @param hazards Estimated discrete hazards (numeric class c("matrix", "array")).
#' Discrete hazards of each time interval are stored in the rows and the number
#' of columns equal to the number of events.
#' @return Estimated all cause survival probabilities (class "numeric")
#' @note It is assumed that all time points up to the last interval \eqn{\left[ a_q, \infty \right)}
#' are available. If not already present, these can be added manually.
#' @author Moritz Berger \email{moritz.berger@@zi-mannheim.de}
#' @seealso \code{\link{estSurv}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords competing_risks discrete_survival
#' @examples
#'
#' # Example unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur [1:100, ]
#'
#' # Convert to long format
#' UnempLong <- dataLongCompRisks(dataShort = subUnempDur, timeColumn = "spell",
#' eventColumns = c("censor1", "censor4"))
#' head(UnempLong)
#'
#' # Estimate continuation ratio model with logit link
#' vglmFit <- VGAM::vglm(formula = cbind(e0, e1, e2) ~ timeInt + age + logwage, data = UnempLong,
#' family = VGAM::multinomial(refLevel = "e0"))
#'
#' # Estimate discrete survival function given age, logwage of first person
#' hazards <- VGAM::predictvglm(vglmFit, newdata = subset(UnempLong, obj == 1), type = "response")[,-1]
#' SurvivalFuncCondX <- estSurvCompRisks(rbind(hazards, 0.5))
#' SurvivalFuncCondX
#'
#'
#' @export estSurvCompRisks
estSurvCompRisks <- function(hazards) {
# Input checks
if(!is.matrix(hazards)) {stop(
"Argument *hazards* is not a matrix! Please specify a matrix of estimated hazards with the time intervals in the rows
and the events in the columns.")}
if(!all(-sqrt(.Machine$double.eps) <= hazards &
hazards - 1 <= sqrt(.Machine$double.eps))) {
stop("Argument *hazards* is not a matrix of probabilities! Please specify a matrix of estimated hazards with the
time intervals in the rows and the events in the columns.")}
overall_haz <- apply(hazards, 1, sum)
erg <- cumprod(1 - overall_haz)
names(erg) <- paste("S_a(T=", 1:nrow(hazards), ")", sep = "")
return(erg)
}
#########################
# estCumHazCompRisks
#' Cumulative Hazard Function For Competing Risks
#'
#' Estimates the overall cumulative hazard function \eqn{\Gamma}(T = t|x) based
#' on the overall hazard rate in competing risks. The hazard rates may or may
#' not depend on covariates. The covariates have to be equal across all
#' estimated hazard rates. Therefore the given hazard rates should only vary
#' over time.
#'
#' The argument \emph{hazards} must be given for all intervals \eqn{\left[ a_0, a_1 \right), \left[ a_1,
#' a_2 \right), ..., \left[ a_{q-1}, a_q \right), \left[ a_q, \infty \right)}.
#'
#' @param hazards Estimated discrete hazards (numeric class c("matrix", "array")).
#' Discrete hazards of each time interval are stored in the rows and the number
#' of columns equal to the number of events.
#' @return Estimated cumulative all cause hazard (class "numeric")
#' @note It is assumed that all time points up to the last
#' theoretical interval \eqn{\left[ a_q, \infty \right)} are available. If not already present,
#' these can be added manually.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords survival
#' @examples
#'
#'
#' # Example unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur[1:100, ]
#'
#' # Estimate binomial model with logit link
#' estModel <- estRegSmoothCompRisks(dataShort = subUnempDur,
#' dataTransform = "dataLongCompRisks",
#' formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#'
#' # Estimate discrete survival function given age, logwage of first person
#' predFrame <- attr(estModel, "augData")
#' hazard <- predict(estModel, newdata=predFrame[predFrame$obj==1, ], type = "response")
#' cumHazCondX <- estCumHazCompRisks(cbind(hazard, 1))
#' cumHazCondX
#'
#' @export estCumHazCompRisks
estCumHazCompRisks <- function(hazards) {
# Input checks
if(!is.matrix(hazards)) {stop(
"Argument *hazards* is not a matrix! Please specify a matrix of estimated hazards with the time intervals in the rows
and the events in the columns.")}
if(!all(-sqrt(.Machine$double.eps) <= hazards &
hazards - 1 <= sqrt(.Machine$double.eps))) {
stop("Argument *hazards* is not a matrix of probabilities! Please specify a matrix of estimated hazards with the
time intervals in the rows and the events in the columns.")}
overall_haz <- apply(hazards, 1, sum)
erg <- cumsum(overall_haz)
names(erg) <- paste("CumHaz_a(T=", 1:nrow(hazards), ")", sep = "")
return(erg)
}
#######################################################
#' Marginal Probabilities For Competing Risks
#'
#' Estimates the marginal probability P(T = t, R = r|x) based on estimated
#' discrete hazards of a competing risks model. The discrete hazards may or may
#' not depend on covariates. The covariates have to be equal across all
#' estimated discrete hazards. Therefore the given discrete hazards should only
#' vary over time.
#'
#' The argument \emph{hazards} must be given for all intervals \eqn{\left[ a_0, a_1 \right), \left[ a_1,
#' a_2 \right), ..., \left[ a_{q-1}, a_q \right), \left[ a_q, \infty \right)}.
#'
#' @param hazards Estimated discrete hazards ("numeric matrix").
#' Discrete hazards of each time interval are stored in the rows and the number of columns equal to the number of events.
#' @return Estimated marginal probabilities ("numeric matrix")
#' @note It is assumed that all time points up to the last interval \eqn{\left[ a_q, \infty \right)}
#' are available. If not already present, these can be added manually.
#' In competing risk settings the marginal probabilities of the last theoretical interval
#' depend on the assumptions on the discrete hazards in the last theoretical interval.
#' However the estimation of all previous discrete intervals is not affected by those assumptions.
#' @author Moritz Berger \email{moritz.berger@@zi-mannheim.de}
#' @seealso \code{\link{estMargProb}}
#' @references
#' \insertRef{tutzModelDisc}{discSurv}
#' @keywords competing risks discrete survival
#' @examples
#'
#' # Example unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' subUnempDur <- UnempDur [1:100, ]
#'
#' # Convert to long format
#' UnempLong <- dataLongCompRisks(dataShort = subUnempDur, timeColumn = "spell",
#' eventColumns = c("censor1", "censor4"))
#' head(UnempLong)
#'
#' # Estimate continuation ratio model with logit link
#' vglmFit <- VGAM::vglm(formula = cbind(e0, e1, e2) ~ timeInt + age + logwage, data = UnempLong,
#' family = VGAM::multinomial(refLevel = "e0"))
#'
#' # Estimate discrete survival function given age, logwage of first person
#' hazards <- VGAM::predictvglm(vglmFit, newdata = subset(UnempLong, obj == 1), type = "response")[,-1]
#'
#' # Estimate marginal probabilities given age, logwage of first person
#' # Example 1
#' # Assumption: Discrete hazards in last theoretical interval are equal for both event types
#' MarginalProbCondX <- estMargProbCompRisks(rbind(hazards, 0.5))
#' MarginalProbCondX
#' all.equal(sum(MarginalProbCondX), 1) # TRUE: Marginal probabilities must sum to 1!
#'
#' # Example 2
#' # Assumption: Discrete hazards in last theoretical interval are event1=, event2=
#' MarginalProbCondX2 <- estMargProbCompRisks(rbind(hazards, c(0.75, 0.25)))
#' MarginalProbCondX2
#' all.equal(sum(MarginalProbCondX2), 1) # TRUE: Marginal probabilities must sum to 1!
#'
#' # Compare marginal probabilities given X
#' all.equal(MarginalProbCondX[1:5, ], MarginalProbCondX2[1:5, ])
#' all.equal(MarginalProbCondX[6, ], MarginalProbCondX2[6, ])
#'
#' @export estMargProbCompRisks
estMargProbCompRisks <- function(hazards) {
# Input checks
if(!is.matrix(hazards)) {stop(
"Argument *hazards* is not a matrix! Please specify a matrix of estimated hazards with the time intervals in the rows
and the events in the columns.")}
if(!all(-sqrt(.Machine$double.eps) <= hazards &
hazards - 1 <= sqrt(.Machine$double.eps))) {
stop("Argument *hazards* is not a matrix of probabilities! Please specify a matrix of estimated hazards with the
time intervals in the rows and the events in the columns.")}
EstSurv <- estSurvCompRisks(hazards)
EstProb <- matrix(NA, nrow=nrow(hazards), ncol = ncol(hazards))
for(j in 1:ncol(hazards)){
EstProb[,j] <- hazards[,j]*c(1,EstSurv[1:(nrow(hazards)-1)])
}
rownames(EstProb) <- paste("P_a(T=", 1:nrow(hazards), ")", sep = "")
colnames(EstProb) <- colnames(hazards)
return(EstProb)
}
###############################################################################
#' Cumulative Incidence Function For Competing Risks
#'
#' Estimates the cumulative incidence function of a discrete time competing risks model
#' given covariates P(T <= t, event = k | x).
#'
#' @param hazards Estimated discrete hazard rates of all events (class "matrix").
#' Each column represents one event. The first column is assumed to contain the censoring case
#' and the discrete hazards should only vary over time in each row.
#' @param eventFocus Column that represent the discrete hazards of the primary event (class "integer").
#' @param obj Integer identification number of each individual.
#' Usually this information is computed during data augmentation (class "numeric").
#' @return Returns cumulative incidence function of the primary event. If
#' argument \emph{obj} is not empty a list of vectors is returned.
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details The covariates set is required to be constant across rows.
#' If argument \emph{obj} is given, then for each unique individual with specific
#' covariates a cumulative incidence function is computed.
#' @seealso \code{\link{estCompRisksGEE}}, \code{\link{dataLongCompRisks}},
#' \code{\link{dataLongCompRisksTimeDep}}, \code{\link[geepack]{geeglm}}
#' @references
#' \insertRef{minjungDiscComp}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur [1:100, ]
#'
#' # Estimate GEE models for all events
#' estGEE <- estCompRisksGEE(dataShort = SubUnempDur, dataTransform = "dataLongCompRisks",
#' corstr = "independence", formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#'
#' # Estimate hazards of all events given the covariates of third person
#' SubUnempDurLong <- dataLongCompRisks(dataShort = SubUnempDur,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#' preds <- predict(estGEE, subset(SubUnempDurLong, obj == 3))
#'
#' # Estimate cumulative incidence function
#' cumIncGEE <- estCumInc(preds, eventFocus = 2)
#' plot(cumIncGEE)
#'
#' @export estCumInc
estCumInc <- function(hazards, eventFocus=1, obj=NULL){
if( is.null(obj) ){
if( is.null(dim(hazards)) ){
hazards <- matrix(hazards, ncol=1)
eventFocus <- 1
}
# Estimate cumulative incidence function
allCauseHazard <- rowSums(hazards)
P_T_k <- hazards[, eventFocus] *
c(1, estSurv(c(allCauseHazard, 1))[1:(dim(hazards)[1] - 1)] )
cumInc1 <- cumsum(P_T_k)
names(cumInc1) <- paste("P(T<=", 1:length(cumInc1), ", event)", sep = "")
class(cumInc1) <- "discSurvEstCumInc"
return(cumInc1)
} else{
if( is.null(dim(hazards)) ){
hazards <- matrix(hazards, ncol=1)
eventFocus <- 1
}
# Split hazards
splitData <- split(hazards, f=obj)
# Estimate survival functions
RES <- lapply(splitData, function(x){
x1 <- matrix(x, ncol=ncol(hazards))
allCauseHazard <- rowSums(x1)
P_T_k <- x1[, eventFocus] *
c(1, estSurv(c(allCauseHazard, 1))[1:(dim(x1)[1] - 1)] )
cumInc1 <- cumsum(P_T_k)
names(cumInc1) <- paste("P(T<=", 1:length(cumInc1),
", event)", sep = "")
class(cumInc1) <- "discSurvEstCumInc"
cumInc1
})
return(RES)
}
}
#########################################################
# Common covariance of parameter estimates between events
#' GEE Covariance Of All Events For Competing Risks
#'
#' @description Estimates covariance of estimated parameters of all competing events generalized estimation equation models using sandwich approach.
#' @param modelEst Discrete time competing risks GEE model prediction model (class "dCRGEE").
#' @param printProgess Should progress status be printed during estimation (class "logical")? Default is FALSE.
#' @return Returns symmetric matrix of rows and columns dimension "number of competing risks" * "number of regression parameters" ("numeric matrix").
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @seealso \code{\link{estCompRisksGEE}}, \code{\link{dataLongCompRisks}}, \code{\link{dataLongCompRisksTimeDep}},
#' \code{\link[geepack]{geeglm}}
#' @references
#' \insertRef{minjungDiscComp}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur [1:100, ]
#'
#' # Estimate GEE models for all events
#' estGEE <- estCompRisksGEE(dataShort = SubUnempDur, dataTransform = "dataLongCompRisks",
#' corstr = "independence", formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2"), timeColumn = "spell")
#'
#' \dontrun{
#' # Estimate covariance matrix of estimated parameters and competing events
#' estCovar <- covarGEE(modelEst=estGEE)
#' estCovar
#'
#' # Covariances of estimated parameters of one event equal the diagonal blocks
#' lengthParameters <- length(estGEE[[1]]$coefficients)
#' noCompEvents <- length(estGEE)
#' meanAbsError <- rep(NA, noCompEvents)
#' for( k in 1:noCompEvents ){
#'
#' relInd <- (1 + (k-1) * lengthParameters) : (k * lengthParameters)
#' meanAbsError[k] <- mean(abs(estCovar[relInd, relInd] - estGEE[[k]]$robust.variance))
#'
#' }
#' mean(meanAbsError)
#' # -> Covariance estimates within each event are equal to diagonal blocks in
#' # complete covariance matrix with very small differences due to numerical accuracy.
#' }
#'
#' @export covarGEE
covarGEE <- function(modelEst, printProgess=FALSE){
# 1. Matrix A
nObs <- max(modelEst[[1]]$id)
k_max <- length(modelEst)
pMax <- length(modelEst[[1]]$coefficients)
matA <- matrix(0, nrow=k_max*pMax, ncol=k_max*pMax)
for( k in 1:k_max ){
tempMat <- H_k(k=k, modelEst=modelEst)
relInd <- ( 1 + (k-1) * pMax):(k * pMax)
matA[relInd, relInd] <- solve(tempMat)
if(printProgess){
if( k %% round(sqrt(k_max)) == 0 ){
cat("Matrix 1 of 2 progress:", round(k/k_max*100, 2), "%", "\n")
}
}
}
# 2. Matrix B
matBsum <- matrix(0, nrow=k_max * pMax, ncol=k_max * pMax)
for( i in 1:nObs ){
uVec <- rep(NA, k_max * pMax)
for( k in 1:k_max ){
relInd <- ( 1 + (k-1) * pMax):(k * pMax)
uVec[relInd] <- U_k_i(k=k, i=i, modelEst=modelEst)
}
matBsum <- matBsum + outer(uVec, uVec)
if(printProgess){
if( i %% round(sqrt(nObs)) == 0 ){
cat("Matrix 2 of 2 progress:", round(i/nObs*100, 2), "%", "\n")
}
}
}
matB <- matBsum / nObs
# 3. Output
finalMat <- matA %*% matB %*% matA / nObs
namesTemp <- c(sapply(1:k_max, function(x) paste(
names(modelEst)[[x]], "_", names(modelEst[[x]]$coefficients), sep="")))
dimnames(finalMat) <- list(namesTemp, namesTemp)
return(finalMat)
}
################################################################################
# Internal GEE functions
#' @rdname estCompRisksGEE
#' @param k Competing cause k (class "integer")
#' @param i Cluster number i (class "integer"), e. g. patients or hospitals
#' @param l Discrete time interval l (class "integer")
#' @param modelEst Discrete time competing risks GEE model prediction model (class "dCRGEE").
# #' @author Thomas Welchowski
#' @keywords survival
#' @noRd
gradientLogLikGEE <- function(k, i, l, modelEst){
currentModel <- modelEst[[k]]
exp_eta <- exp(currentModel$linear.predictors)
datLong <- attr(currentModel, "datLong")
modelMat <- model.matrix(currentModel$terms, datLong)
dimModelMat <- dim(modelMat)
gradAllObs <- matrix(NA, nrow=dimModelMat[1], ncol=dimModelMat[2])
for( j in 1:dim(modelMat)[2] ){
varInterest <- modelMat[, j]
gradAllObs[, j] <- currentModel$y * varInterest / ( 1 + exp_eta ) -
(1-currentModel$y) * varInterest * exp_eta / ( 1 + exp_eta )
}
dimnames(gradAllObs)[[2]] <- dimnames(modelMat)[[2]]
gradAllObsSub <- gradAllObs[datLong$obj==i & datLong$timeInt==l, ]
gradAllObsSub
}
#' @rdname estCompRisksGEE
#' @param k Competing event k (class "integer")
#' @param i Cluster number i (class "integer"), e. g. patients or hospitals
#' @param modelEst Discrete time competing risks GEE model prediction model (class "dCRGEE").
# #' @author Thomas Welchowski
#' @keywords survival
#' @noRd
U_k_i <- function(k, i, modelEst){
currentModel <- modelEst[[k]]
datLong <- attr(currentModel, "datLong")
modelMat <- model.matrix(currentModel$terms, datLong)
n_i <- max(datLong[currentModel$id==i, "timeInt"])
rowSums(sapply(1:n_i, function(m) gradientLogLikGEE(k=k, i=i, l=m, modelEst=modelEst)))
}
#' @rdname estCompRisksGEE
#' @param k Competing event k (class "integer")
#' @param i Cluster number i (class "integer"), e. g. patients or hospitals
#' @param l Discrete time interval l (class "integer")
#' @param modelEst Discrete time competing risks GEE model prediction model (class "dCRGEE").
# #' @author Thomas Welchowski
#' @keywords survival
#' @noRd
hessianLogitLikGEE <- function(k, i, l, modelEst){
currentModel <- modelEst[[k]]
datLong <- attr(currentModel, "datLong")
exp_eta <- exp(currentModel$linear.predictors)
modelMat <- model.matrix(currentModel$terms, datLong)
dimModelMat <- dim(modelMat)
restrictObs <- currentModel$id==i & datLong$timeInt==l
hessMat <- matrix(NA, nrow=dimModelMat[2], ncol=dimModelMat[2])
for( j in 1:dim(modelMat)[2] ){
for( m in 1:dim(modelMat)[2] ){
varInterest1 <- modelMat[restrictObs, j]
varInterest2 <- modelMat[restrictObs, m]
hessMat[m, j] <- - varInterest1 * varInterest2 * exp_eta[restrictObs] /
( 1 + exp_eta[restrictObs] )^2
}
}
dimnames(hessMat)[[1]] <- dimnames(modelMat)[[2]]
dimnames(hessMat)[[2]] <- dimnames(modelMat)[[2]]
hessMat
}
#' @rdname estCompRisksGEE
#' @param k Competing event k (class "integer")
#' @param modelEst Discrete time competing risks GEE model prediction model (class "dCRGEE").
# #' @author Thomas Welchowski
#' @keywords survival
#' @noRd
H_k <- function(k, modelEst){
currentModel <- modelEst[[k]]
datLong <- attr(currentModel, "datLong")
modelMat <- model.matrix(currentModel$terms, datLong)
nObs <- max(currentModel$id)
matListSumObs <- vector("list", nObs)
for( u in 1:nObs ){
n_i <- max(datLong[currentModel$id==u, "timeInt"])
matListSum <- vector("list", n_i)
for( v in 1:n_i ){
if(v==1){
matListSum[[v]] <- hessianLogitLikGEE(k=k, i=u, l=v, modelEst=modelEst)
} else{
matListSum[[v]] <- matListSum[[v-1]] + hessianLogitLikGEE(k=k, i=u, l=v, modelEst=modelEst)
}
}
if(u==1){
matListSumObs[[u]] <- matListSum[[n_i]]
} else{
matListSumObs[[u]] <- matListSumObs[[u-1]] + matListSum[[n_i]]
}
}
- matListSumObs[[nObs]] / nObs
}
###############################################################################
#' @rdname estCompRisksGEE
#' @param object Discrete time competing risks GEE model prediction model (class "dCRGEE").
# #' @author Thomas Welchowski
#' @param newdata New data set to be used for prediction (class "data.frame").
#' @keywords survival
#' @method predict dCRGEE
#' @export
predict.dCRGEE <- function (object, newdata, ...) {
predMat <- matrix(NA, nrow = dim(newdata)[1], ncol = length(object))
for(k in 1:length(object)){
# Linear predictor
fittedVal1 <- c(model.matrix(object[[k]]$terms, newdata) %*%
object[[k]]$coefficients)
# Predictions
predMat[, k] <- exp(fittedVal1) / ( 1 + exp(fittedVal1) )
}
# Add probability of censoring
predMat <- cbind(1 - rowSums(predMat), predMat)
dimnames(predMat)[[2]] <- c("e0", names(object))
return(predMat)
}
########################################
# GEE model for discrete competing risks
#' GEE Model Fitting For Competing Risks
#'
#' Estimates generalized estimation equation model for each competing event separately.
#' Dependence within person IDs is accounted for by assuming a working covariance structure.
#'
#' @param dataShort Original data set in short format with each row corresponding to one independent
#' observation (class "data.frame").
#' @param dataTransform Specification of the data transformation function from short to long format (class "character").
#' There are two available options: Without time dependent covariates ("dataLongCompRisks") and with
#' time dependent covariates ("dataLongCompRisksTimeDep"). The default is set to the former.
#' @param corstr Assumption of correlation structure (class "character"). The following are
#' permitted: '"independence"', '"exchangeable"', '"ar1"', '"unstructured"' and '"userdefined".
#' @param formulaVariable Specifies the right hand side of the regression formula (class "formula").
#' The default is to use the discrete time variable, which corresponds to a covariate free hazard.
#' It is recommended to always include the discrete time variable "timeInt".
#' @param storeAugData Should the augmented data set be saved (class "logical")?
#' Defaults is TRUE. The data set is available as attribute "augData".
#' @param \dots Additional arguments to data transformation
#' functions \code{\link{dataLongCompRisks}} or \code{\link{dataLongCompRisksTimeDep}}.
#' Preprocessing function argument \emph{responseAsFactor} has to be set to FALSE (Default).
#' @return Returns an object of class "geeglm".
#' @author Thomas Welchowski \email{t.welchowski@@psychologie.uzh.ch}
#' @details Variables in argument \emph{formulaVariable} need to be separated by "+ ".
#' For example if the two variables \emph{timeInt} and \emph{X1} should be included the formula would be
#' "~ timeInt + X1". The variable \emph{timeInt} is constructed before estimation of the model.
#' @seealso \code{\link{covarGEE}}, \code{\link{dataLongCompRisks}}, \code{\link{dataLongCompRisksTimeDep}},
#' \code{\link[geepack]{geeglm}}
#' @references
#' \insertRef{minjungDiscComp}{discSurv}
#' @keywords survival
#' @examples
#'
#' # Example with unemployment data
#' library(Ecdat)
#' data(UnempDur)
#'
#' # Select subsample
#' SubUnempDur <- UnempDur [1:100, ]
#'
#' # Estimate GEE models for all events
#' estGEE <- estCompRisksGEE(dataShort = SubUnempDur, dataTransform = "dataLongCompRisks",
#' corstr = "independence", formulaVariable =~ timeInt + age + ui + logwage * ui,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#' names(estGEE)
#' estGEE[[1]]
#'
#' # Predictions
#' SubUnempDurLong <- dataLongCompRisks(dataShort = SubUnempDur,
#' eventColumns = c("censor1", "censor2", "censor3", "censor4"), timeColumn = "spell")
#' preds <- predict(estGEE, newdata = SubUnempDurLong)
#' head(preds)
#'
#' @export estCompRisksGEE
estCompRisksGEE <- function(dataShort, dataTransform = "dataLongCompRisks",
corstr = "independence",
formulaVariable =~ timeInt, storeAugData=TRUE, ... ){
# Data transformation
if(dataTransform=="dataLongCompRisks"){
datLong <- dataLongCompRisks(dataShort = dataShort, responseAsFactor=FALSE, ... )
}
if(dataTransform == "dataLongCompRisksTimeDep"){
datLong <- dataLongCompRisksTimeDep(dataSemiLong = dataShort, responseAsFactor = FALSE, ... )
}
# Construct formula
eventColumns <- grep("e[0-9]+", names(datLong), value = TRUE)
# Delete censoring column
eventColumns <- eventColumns[-which(eventColumns == "e0")]
eventColumns_length <- length(eventColumns)
constructFormula <- sapply(1:eventColumns_length,
function(x) formula(paste(paste(
eventColumns[x], collapse = ","),
"~ ", paste(attr(terms(formulaVariable),
"term.labels"), collapse=" + "),
sep = " ")))
# Fit GEE
RES <- vector(mode = "list", length = eventColumns_length)
names(RES) <- paste(eventColumns)
for(k in 1:eventColumns_length){
# Package geepack
# RES[[k]] <- geepack::geeglm(formula = constructFormula[[k]],
# family = stats::binomial,
# data = datLong, corstr = corstr, id = datLong$obj)
# Package gee
RES[[k]] <- gee::gee(formula = constructFormula[[k]],
family = stats::binomial,
data = datLong, corstr = corstr, id = datLong$obj)
attr(RES[[k]], "datLong") <- datLong
}
class(RES) <- "dCRGEE"
if(storeAugData){
attr(RES, "augData") <- datLong
}
return(RES)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.