#' Setup survival part for outcome m
#' @description Setup survival part for outcome m (internal function)
#' input:
#' @param formula with inla.surv() object as an outcome
#' @param formLong formula from the longitudinal part, if any
#' @param dataSurv dataset(s) for the survival part
#' @param LSurvdat dataset for the longitudinal part converted to survival format (internal, used to get covariates
#' if missing in the survival dataset when sharing linear predictors including covariates from longitudinal into survival)
#' @param timeVar names of the variables that are time-dependent (only linear for now)
#' @param assoc association parameters between K longitudinal outcomes and M survival outcomes (list of K vectors of size M)
#' @param id name of the variable that gives the individual id
#' @param m identifies the outcome among 1:M time-to-event outcomes
#' @param K number of longitudinal outcomes
#' @param M number of survival outcomes
#' @param NFT maximum number of functions of time (fixed value)
#' @param corLong boolean that indicates if random effects across longitudinal markers are correlated,
#' when multiple longitudinal markers are included in the model
#' @param dataOnly boolean for internal use, indicates if only preparing data (i.e., not fitting the model)
#' @param SurvInfo information about survival submodels for internal use
#' @param strata covariate for stratified proportional hazards model
#' output:
#' @return YS_data includes the values of the survival outcome and covariates associated to this survival part,
#' with the association parameters but the provided id are temporary and they will be updated after
#' the cox expansion to make them unique and allow for time dependency
#' @return YSformF formula for this survival outcome (not including association parameters)
#' @importFrom lme4 nobars findbars
#' @export
setup_S_model <- function(formula, formLong, dataSurv, LSurvdat, timeVar, assoc, id, m, K, M, NFT, corLong, dataOnly, SurvInfo, strata){
# Event outcome
YS <- strsplit(as.character(formula), split="~")[[2]]
FE_formS <- nobars(formula)
RES <- findbars(formula) # random effects included? (i.e., frailty)
YS_FE_elements <- gsub("\\s", "", strsplit(strsplit(as.character(FE_formS), split="~")[[3]], split=c("\\+"))[[1]])
FML <- ifelse(strsplit(as.character(FE_formS), split="~")[[3]]=="-1", 1, strsplit(as.character(FE_formS), split="~")[[3]])
YSform2 <- formula(paste(" ~ ", FML))
# if(length(which(sapply(dataSurv, class)=="factor"))>0){ # deal with factors when modalities are missing
# factors_columns <- which(sapply(dataSurv, class)=="factor")
# for(fctc in factors_columns){
# if(length(unique(dataSurv[, fctc]))< length(levels(dataSurv[, fctc]))) dataSurv[, fctc] <- as.integer(dataSurv[, fctc])-1
# }
# }
DFS <- model.matrix(YSform2, model.frame(YSform2, dataSurv, na.action=na.pass))
if(colnames(DFS)[1]=="(Intercept)") colnames(DFS)[1] <- "Intercept"
if(grepl("inla.surv", YS)){
# attach(dataSurv)
if(dataOnly & length(paste0(SurvInfo$survOutcome))>1) assign(paste0(SurvInfo$survOutcome)[2], dataSurv)
YS <- with(dataSurv, eval(parse(text=YS)))
YSname <- paste0("S", m)
# detach(dataSurv)
}else{
YSname <- YS
YS <- get(YS)
}
YS_data <- c(list(YS), as.list(as.data.frame(DFS)))
names(YS_data)[1] <- YSname
names(YS_data) <- paste0(gsub(":", ".X.", gsub("\\s", ".", names(YS_data))), "_S", m)
YSformF <- formula(paste0(YSname, "_S", m, " ~ -1 +", paste0(paste0(gsub(":", ".X.", gsub("\\s", ".", colnames(DFS))), "_S", m, collapse="+"))))
CLid <- 0 # keep track for unique id if corLong is true for shared random effects independently
# strata?
if(!is.null(strata)){
YS_data <- append(YS_data, list(dataSurv[, strata]))
names(YS_data)[length(YS_data)] <- strata
}
# association
if(length(assoc)!=0){
YS_assoc <- unlist(assoc[1:K])[seq(m, K*M, by=M)] # extract K association terms associated to time-to-event m
for(k in 1:length(YS_assoc)){
if(TRUE %in% (YS_assoc %in% c("CV", "CS", "CV_CS"))){
# add covariates that are being shared through the association
FE_form <- nobars(formLong[[k]])
if(dim(LSurvdat)[1] != dim(dataSurv)[1] & !F %in% c(rownames(attr(terms(FE_form), "factors")) %in% colnames(dataSurv))){
DFS2 <- as.data.frame(model.matrix(FE_form, model.frame(FE_form, dataSurv, na.action=na.pass)))
}else{
DFS2 <- as.data.frame(model.matrix(FE_form, model.frame(FE_form, LSurvdat[which(LSurvdat[[id]] %in% dataSurv[[id]]),], na.action=na.pass)))
}
removeVar <- NULL
for(rmtvar in 1:length(strsplit(colnames(DFS2), ":"))){ # remove any component that contains a timeVar because it will not be useful
if(TRUE %in% (c(timeVar, c(paste0("f", 1:NFT, "(", timeVar, ")"))) %in% strsplit(colnames(DFS2), ":")[[rmtvar]]) |
TRUE %in% (names(YS_data) %in% strsplit(colnames(DFS2), ":")[[rmtvar]])) removeVar <- c(removeVar, rmtvar)
}
colNvar <- colnames(DFS2)
DFS2 <- DFS2[,-c(which(colNvar %in% c("(Intercept)")), removeVar)]
if(is.null(dim(DFS2))){
YS_data <- append(YS_data, list(DFS2))
names(YS_data)[length(names(YS_data))] <- colNvar[-c(which(colNvar %in% c("(Intercept)")), removeVar)]
}else if(dim(DFS2)[2]>0){
names(DFS2) <- gsub(":", ".X.", gsub("\\s", ".", names(DFS2)))
YS_data <- append(YS_data, DFS2)
}
RW <- which(sapply(YS_data, length)[-1] != dim(dataSurv)[1])
# overwrite variables that are not properly captured
if(length(RW)>0 & (FALSE %in% c(YS_assoc=="CS"))){
for(RWi in 1:length(RW)){
if(!(names(RW[RWi]) %in% colnames(dataSurv))) stop(paste0("I cannot find covariate '", names(RW[RWi]), "' in the survival dataset, \n while it is needed in the requested model (if '", names(RW[RWi]), "' is a \n time-dependent covariate, it needs to be included in the \n survival submodel manually to be properly handled through a \n decomposition of the followup with right censoring and left truncation)."))
YS_data[[RW[RWi]+1]] <- dataSurv[, names(RW[RWi])]
}
}
}
if(TRUE %in% (YS_assoc %in% c("SRE","SRE_ind", "CV", "CS", "CV_CS"))){
# add covariates from the random effects part shared and not already included
RE <- findbars(formLong[[k]])
RE_split <- gsub("\\s", "", strsplit(as.character(RE), split=c("\\|"))[[1]])
RE_elements <- gsub("\\s", "", strsplit(RE_split[[1]], split=c("\\+"))[[1]])
if(length(which(RE_elements==1))>0) RE_elements[which(RE_elements==1)] <- "Intercept"
RE_form <- formula(paste(RE_split[2], "~", "-1+", RE_split[1]))
RE_mat <- as.data.frame(model.matrix(RE_form, model.frame(RE_form, LSurvdat[which(LSurvdat[[id]] %in% dataSurv[[id]]),], na.action=na.pass)))
colnames(RE_mat) <- RE_elements
removeVar <- NULL
for(rmtvar in 1:length(strsplit(colnames(RE_mat), ":"))){ # remove any component that contains a timeVar because it will not be useful
if(TRUE %in% (c(1, timeVar, c(paste0("f", 1:NFT, "(", timeVar, ")"))) %in% strsplit(colnames(RE_mat), ":")[[rmtvar]]) |
TRUE %in% (names(YS_data) %in% strsplit(colnames(RE_mat), ":")[[rmtvar]])) removeVar <- c(removeVar, rmtvar)
}
colNvar <- colnames(RE_mat)
RE_mat <- RE_mat[,-c(which(colNvar %in% c("Intercept")), removeVar)]
if(is.null(dim(RE_mat))){
if(!(colNvar[-c(which(colNvar %in% c("Intercept")), removeVar)] %in% names(YS_data))){
YS_data <- append(YS_data, list(RE_mat))
names(YS_data)[length(names(YS_data))] <- colNvar[-c(which(colNvar %in% c("Intercept")), removeVar)]
}
}else if(dim(RE_mat)[2]>0){
names(RE_mat) <- gsub(":", ".X.", gsub("\\s", ".", names(RE_mat)))
YS_data <- append(YS_data, RE_mat)
}
}
if(YS_assoc[k]%in%c("CV", "CS")){ # one vector
assign(paste0(YS_assoc[k], "_L", k, "_S", m), c(as.integer(dataSurv[,id]))) # unique id set up after cox expansion
YS_data <- append(YS_data, list(get(paste0(YS_assoc[k], "_L", k, "_S", m))))
names(YS_data)[length(names(YS_data))] <- paste0(YS_assoc[k], "_L", k, "_S", m)
}else if(YS_assoc[k]%in%c("SRE")){
assign(paste0(YS_assoc[k], "_L", k, "_S", m), c(as.integer(dataSurv[,id]))) # unique id set up after cox expansion
YS_data <- append(YS_data, list(get(paste0(YS_assoc[k], "_L", k, "_S", m))))
names(YS_data)[length(names(YS_data))] <- paste0(YS_assoc[k], "_L", k, "_S", m)
}else if(YS_assoc[k]%in%c("SRE_ind")){
RE <- findbars(formLong[[k]])
RE_split <- gsub("\\s", "", strsplit(as.character(RE), split=c("\\|"))[[1]])
RE_elements <- gsub("\\s", "", strsplit(RE_split[[1]], split=c("\\+"))[[1]])
if(length(which(RE_elements==1))>0) RE_elements[which(RE_elements==1)] <- "Intercept"
for(i in 1:length(RE_elements)){
assign(paste0("SRE_", RE_elements[i], "_L", k, "_S", m), length(unique(c(as.integer(dataSurv[,id]))))*(i-1+CLid) + c(as.integer(dataSurv[,id]))) # unique id set up after cox expansion
YS_data <- append(YS_data, list(get(paste0("SRE_", RE_elements[i], "_L", k, "_S", m))))
names(YS_data)[length(names(YS_data))] <- paste0("SRE_", RE_elements[i], "_L", k, "_S", m)
}
if(corLong) CLid <- CLid + length(RE_elements)
}else if(YS_assoc[k]%in%c("CV_CS")){ # need two vectors for current value and current slope
assign(paste0("CV_L", k, "_S", m), c(as.integer(dataSurv[,id])))
YS_data <- append(YS_data, list(get(paste0("CV_L", k, "_S", m))))
names(YS_data)[length(names(YS_data))] <- paste0("CV_L", k, "_S", m)
assign(paste0("CS_L", k, "_S", m), c(as.integer(dataSurv[,id])))
YS_data <- append(YS_data, list(get(paste0("CS_L", k, "_S", m))))
names(YS_data)[length(names(YS_data))] <- paste0("CS_L", k, "_S", m)
}else{ # SRE_ind => copy (not done yet)
}
}
}
if(!is.null(RES)){
RE_splitS <- gsub("\\s", "", strsplit(as.character(RES), split=c("\\|"))[[1]])
RE_elementS <- gsub("\\s", "", strsplit(RE_splitS[[1]], split=c("\\+"))[[1]])
RE_formS <- formula(paste(RE_splitS[2], "~", "-1+", RE_splitS[1]))
RE_matS <- model.matrix(RE_formS, model.frame(RE_formS, dataSurv, na.action=na.pass))
if(length(which(RE_elementS==1))>0) RE_elementS[which(RE_elementS==1)] <- "Intercept"
colnames(RE_matS) <- RE_elementS
colnames(RE_matS) <- gsub("\\s", ".", colnames(RE_matS))
colnames(RE_matS) <- sub("\\(","", colnames(RE_matS))
colnames(RE_matS) <- sub(")","", colnames(RE_matS))
}else RE_matS <- NULL
if(!is.null(id)){
if(!id %in% names(YS_data)){ # include id for random effect if not already included for association
YS_data <- append(YS_data, list(dataSurv[,id]))
names(YS_data)[length(names(YS_data))] <- id
}
}
names(YS_data) <- sub("\\(","", names(YS_data))
names(YS_data) <- sub(")","", names(YS_data))
return(list(YS_data=YS_data, YSformF=YSformF, RE_matS=RE_matS))
}
#' Setup outcome for longitudinal marker
#' @description Setup outcome for longitudinal marker (internal function)
#' input:
#' @param formula with lme4 format (fixed effects and random effects in the same object)
#' @param dataset that contains the outcome
#' @param family of the outcome (given to check if the distribution matches but the check is not done yet)
#' @param k identifies the longitudinal marker among 1:K markers
#' output:
#' @return YL.name name of the outcome
#' @return YL values of the outcome
setup_Y_model <- function(formula, dataset, family, k){
dataF <- model.frame(subbars(formula), dataset, na.action=NULL)
YL.name <- paste0(as.character(subbars(formula))[2], "_L", k)
YL <- as.vector(model.response(dataF))
# check if y distribution matches with family here?
return(list(YL.name, YL))
}
#' Setup fixed effects part for longitudinal marker k
#' @description Setup fixed effects part for longitudinal marker k (internal function)
#' input:
#' @param formula with lme4 format (fixed effects and random effects in the same object)
#' @param dataset that contains the outcome
#' @param timeVar name of time variable
#' @param k identifies the longitudinal marker among 1:K markers
#'@param dataOnly boolean for internal use, indicates if only preparing data (i.e., not fitting the model)
#' output:
#' @return colnames(FE) names of the fixed effects (interactions are separated
#' by ".X." instead of ":" to facilitate their manipulation)
#' @return FE values of the fixed effects
setup_FE_model <- function(formula, dataset, timeVar, k, dataOnly){
FE_form <- nobars(formula)
# if(length(which(sapply(dataset, class)=="factor"))>0 & !dataOnly){ # deal with factors when modalities are missing
# factors_columns <- which(sapply(dataset, class)=="factor")
# for(fctc in factors_columns){
# if(length(unique(dataset[, fctc]))< length(levels(dataset[, fctc]))) dataset[, fctc] <- as.integer(dataset[, fctc])-1
# }
# }
FE <- model.matrix(FE_form, model.frame(FE_form, dataset, na.action=na.pass))
#if(colnames(FE)[1]=="(Intercept)") colnames(FE)[1] <- "Intercept"
colnames(FE) <- gsub(":", ".X.", gsub("\\s", ".", colnames(FE)))
colnames(FE) <- sub("\\(","", colnames(FE))
colnames(FE) <- sub(")","", colnames(FE))
OFS <- model.offset(model.frame(FE_form, dataset, na.action=na.pass))
if(is.null(OFS)){
OFS <- rep(0, dim(FE)[1])
}
return(list(colnames(FE), FE, OFS))
}
#' Setup random effects part for longitudinal marker k
#' @description Setup random effects part for longitudinal marker k (internal function)
#' input:
#' @param formula with lme4 format (fixed effects and random effects in the same object)
#' @param dataset that contains the outcome
#' @param k identifies the longitudinal marker among 1:K markers
#' output:
#' @return colnames(RE_mat) names of the random effects
#' @return RE_mat values of the random effects
setup_RE_model <- function(formula, dataset, k){
RE <- findbars(formula)
if(length(RE)==0) stop("No random effects found, the longitudinal part must at least contain one random effect per marker.")
RE_split <- gsub("\\s", "", strsplit(as.character(RE), split=c("\\|"))[[1]])
RE_elements <- gsub("\\s", "", strsplit(RE_split[[1]], split=c("\\+"))[[1]])
RE_form <- formula(paste(RE_split[2], "~", "-1+", RE_split[1]))
RE_mat <- model.matrix(RE_form, model.frame(RE_form, dataset, na.action=na.pass))
if(length(which(RE_elements==1))>0) RE_elements[which(RE_elements==1)] <- "Intercept"
colnames(RE_mat) <- RE_elements
#if(colnames(RE_mat)[1]=="(Intercept)") colnames(RE_mat)[1] <- "Intercept"
colnames(RE_mat) <- gsub("\\s", ".", colnames(RE_mat))
colnames(RE_mat) <- sub("\\(","", colnames(RE_mat))
colnames(RE_mat) <- sub(")","", colnames(RE_mat))
return(list(colnames(RE_mat), RE_mat))
}
#' Setup scopy
#' @description Setup weights for non-linear effects
#' input:
#' @param n number of knots
#' output:
#' @return Matrix with weights for scopy
INLAjoint.scopy.define <- function(n = 5L)
{
stopifnot(n == 2 || n >= 5)
if (n == 2) {
VV = cbind(1, c(-0.5, 0.5))
}
else {
Q <- INLAjoint.rw2(n, scale.model = TRUE)
e <- eigen(Q)
idx.remove <- (n - 1):n
V <- e$vectors[, -idx.remove, drop = FALSE]
lambda <- e$values[-idx.remove]
VV <- matrix(0, n, n)
VV[, 1:2] <- cbind(rep(1, n), seq(-0.5, 0.5, len = n))
m <- n - 2
for (i in seq_len(m)) {
j <- m - i + 3
VV[, j] <- V[, i] * sqrt(1/lambda[i])
}
}
return(list(n = n, W = VV))
}
#' Setup rw2
#' @description Setup random walk of order 2
#' input:
#' @param n number of knots
#' @param order order 2
#' @param ... additional arguments passed to INLAjoint.rw
#' output:
#' @return random walk 2
INLAjoint.rw2 <- function (n, order=2L, ...)
{
INLAjoint.rw(n, order = order, ...)
}
#' Setup rw
#' @description Setup random walk
#' input:
#' @param n number of knots
#' @param order order 1
#' @param sparse boolean, sparsity
#' @param scale.model boolean, scale
#' @param cyclic boolean, cyclic
#' output:
#' @return random walk 1
INLAjoint.rw <- function (n, order = 1L, sparse = TRUE, scale.model = FALSE,
cyclic = FALSE)
{
stopifnot(n >= 1L + 2L * order)
if (scale.model) {
if (!cyclic) {
rd <- order
}
else {
if (order == 1L) {
rd <- 1L
}
else {
rd <- order - 1L
}
}
Q <- INLAjoint.rw(n, order = order, sparse = sparse, scale.model = FALSE,
cyclic = cyclic)
fac <- exp(mean(log(diag(INLAjoint.ginv(as.matrix(Q), rankdef = rd)))))
Q <- fac * Q
}
else {
if (!cyclic) {
U <- diff(diag(n), diff = order)
Q <- t(U) %*% U
}
else {
m <- 2L * order + 1L
k <- 1L + order
U <- diff(diag(m), diff = order)
U <- t(U) %*% U
Q <- toeplitz(c(U[k, k:m], rep(0, n - m), U[k, m:(k +
1L)]))
}
}
return(if (sparse) INLA::inla.as.sparse(Q) else Q)
}
#' Setup ginv
#' @description Setup random walk
#' input:
#' @param x input
#' @param tol tolerance
#' @param rankdef rank
#' output:
#' @return ginv
INLAjoint.ginv <- function (x, tol = sqrt(.Machine$double.eps), rankdef = NULL)
{
if (!is.matrix(x)) {
x <- as.matrix(x)
}
if (length(dim(x)) > 2L || !(is.numeric(x) || is.complex(x))) {
stop("'x' must be a numeric or complex matrix")
}
xsvd <- svd(x)
if (is.complex(x)) {
xsvd$u <- Conj(xsvd$u)
}
if (is.null(rankdef) || rankdef == 0L) {
Positive <- xsvd$d > max(tol * xsvd$d[1L], 0)
}
else {
n <- length(xsvd$d)
stopifnot(rankdef >= 1L && rankdef <= n)
Positive <- c(rep(TRUE, n - rankdef), rep(FALSE, rankdef))
}
if (all(Positive)) {
xsvd$v %*% (1/xsvd$d * t(xsvd$u))
}
else if (!any(Positive)) {
array(0, dim(x)[2L:1L])
}
else {
xsvd$v[, Positive, drop = FALSE] %*% ((1/xsvd$d[Positive]) *
t(xsvd$u[, Positive, drop = FALSE]))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.