Nothing
###
### Tests for calibration curves produced using MLR-IPCW (calib.type = 'mlr')
###
test_that("check calib_msm output", {
## Reduce to 300 individuals
# Extract the predicted transition probabilities out of state j = 1 for first 300 individuals
tp.pred <- tps0 |>
dplyr::filter(j == 1) |>
dplyr::filter(id %in% 1:300) |>
dplyr::select(any_of(paste("pstate", 1:6, sep = "")))
### Reduce ebmtcal
ebmtcal <- ebmtcal |> dplyr::filter(id %in% 1:300)
msebmtcal <- msebmtcal |> dplyr::filter(id %in% 1:300)
## Expect error if generate with CI
expect_error(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j=1,
s=0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
w.covs = c("year", "agecl", "proph", "match"),
mlr.ps.int = 2,
mlr.degree = 2,
CI = 95,
CI.R.boot = 5))
## Calculate observed event probabilities
dat.calib.mlr <-
suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j=1,
s=0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
w.covs = c("year", "agecl", "proph", "match"),
mlr.ps.int = 2,
mlr.degree = 2))
expect_type(dat.calib.mlr, "list")
expect_equal(class(dat.calib.mlr), c("calib_mlr", "calib_msm"))
expect_length(dat.calib.mlr[["mean"]], 6)
expect_length(dat.calib.mlr[["plotdata"]], 6)
expect_length(dat.calib.mlr[["plotdata"]][["state3"]]$id, 265)
expect_length(dat.calib.mlr[["plotdata"]][["state6"]]$id, 265)
expect_no_error(summary(dat.calib.mlr))
})
test_that("check calib_msm output with CI = TRUE (for assess.mean)", {
## Reduce to 300 individuals
# Extract the predicted transition probabilities out of state j = 1 for first 300 individuals
tp.pred <- tps0 |>
dplyr::filter(j == 1) |>
dplyr::filter(id %in% 1:300) |>
dplyr::select(any_of(paste("pstate", 1:6, sep = "")))
### Reduce ebmtcal
ebmtcal <- ebmtcal |> dplyr::filter(id %in% 1:300)
msebmtcal <- msebmtcal |> dplyr::filter(id %in% 1:300)
## Assess mean calibration
dat.calib.mlr <- suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j=1,
s=0,
t = 1826,
tp.pred = tp.pred,
calib.type = 'mlr',
w.covs = c("year", "agecl", "proph", "match"),
mlr.ps.int = 2,
mlr.degree = 2,
CI = 95,
CI.R.boot = 5,
assess.moderate = FALSE,
assess.mean = TRUE))
expect_type(dat.calib.mlr, "list")
expect_equal(class(dat.calib.mlr), c("calib_mlr", "calib_msm"))
expect_length(dat.calib.mlr[["mean"]], 6)
expect_no_error(summary(dat.calib.mlr))
})
test_that("check calib_msm output, (j = 1, s = 0),
Manually define function to estimate weights", {
## Reduce to 300 individuals
# Extract the predicted transition probabilities out of state j = 1 for first 300 individuals
tp.pred <- tps0 |>
dplyr::filter(j == 1) |>
dplyr::filter(id %in% 1:300) |>
dplyr::select(any_of(paste("pstate", 1:6, sep = "")))
### Reduce ebmtcal
ebmtcal <- ebmtcal |> dplyr::filter(id %in% 1:300)
msebmtcal <- msebmtcal |> dplyr::filter(id %in% 1:300)
###
### Calculate observed event probabilities
dat.calib.mlr <-
suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j = 1,
s = 0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
w.covs = c("year", "agecl", "proph", "match"),
mlr.ps.int = 2,
mlr.degree = 2))
###
### Calculate manual weights
weights.manual <-
calc_weights(data.ms = msebmtcal,
data.raw = ebmtcal,
covs = c("year", "agecl", "proph", "match"),
t = 1826,
s = 0,
landmark.type = "state",
j = 1,
max.weight = 10,
stabilised = FALSE)
###
### Calculate observed event probabilities same function as internal procedure, and check it agrees with dat.calib.mlr
dat.calib.mlr.w.manual <-
suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j = 1,
s = 0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
weights = weights.manual$ipcw,
mlr.ps.int = 2,
mlr.degree = 2))
expect_equal(dat.calib.mlr[["plotdata"]][[1]], dat.calib.mlr.w.manual[["plotdata"]][[1]])
###
### Calculate observed event probabilities using an incorrect vector of weights, and see if its different from dat.calib.mlr
dat.calib.mlr.w.manual <-
suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j = 1,
s = 0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
weights = rep(1,nrow(weights.manual)),
mlr.ps.int = 2,
mlr.degree = 2))
expect_false(any(dat.calib.mlr[["plotdata"]][[1]]$obs == dat.calib.mlr.w.manual[["plotdata"]][[1]]$obs))
###
### Calculate observed event probabilities with w.function, where calc_weights_manual = calc_weights (exactly same as internal procedure)
calc_weights_manual <- calc_weights
dat.calib.mlr.w.function <-
suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j = 1,
s = 0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
w.function = calc_weights_manual,
w.covs = c("year", "agecl", "proph", "match"),
mlr.ps.int = 2,
mlr.degree = 2))
expect_type(dat.calib.mlr.w.function, "list")
expect_equal(class(dat.calib.mlr.w.function), c("calib_mlr", "calib_msm"))
expect_length(dat.calib.mlr.w.function[["mean"]], 6)
expect_length(dat.calib.mlr.w.function[["plotdata"]], 6)
expect_length(dat.calib.mlr.w.function[["plotdata"]][[1]]$id, 265)
expect_length(dat.calib.mlr.w.function[["plotdata"]][[4]]$id, 265)
## Check answer is same whether w.function used or not
expect_equal(dat.calib.mlr[["plotdata"]][[1]], dat.calib.mlr.w.function[["plotdata"]][[1]])
expect_equal(dat.calib.mlr[["plotdata"]][[4]], dat.calib.mlr.w.function[["plotdata"]][[4]])
###
### Redefine calc_weights, but change order of all the input arguments (this shouldn't make a difference)
calc_weights_manual <- function(stabilised = FALSE, max.follow = NULL, data.ms, covs = NULL, landmark.type = "state", j = NULL, t, s, max.weight = 10, data.raw){
### Modify everybody to be censored after time t, if a max.follow has been specified
if(!is.null(max.follow)){
if (max.follow == "t"){
data.raw <- dplyr::mutate(data.raw,
dtcens.s = dplyr::case_when(dtcens < t + 2 ~ dtcens.s,
dtcens >= t + 2 ~ 0),
dtcens = dplyr::case_when(dtcens < t + 2 ~ dtcens,
dtcens >= t + 2 ~ t + 2))
} else {
data.raw <- dplyr::mutate(data.raw,
dtcens.s = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens.s,
dtcens >= max.follow + 2 ~ 0),
dtcens = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens,
dtcens >= max.follow + 2 ~ max.follow + 2))
}
}
### Create a new outcome, which is the time until censored from s
data.raw$dtcens.modified <- data.raw$dtcens - s
### Save a copy of data.raw
data.raw.save <- data.raw
### If landmark.type = "state", calculate weights only in individuals in state j at time s
### If landmark.type = "all", calculate weights in all uncensored individuals at time s (note that this excludes individuals
### who have reached absorbing states, who have been 'censored' from the survival distribution is censoring)
if (landmark.type == "state"){
### Identify individuals who are uncensored in state j at time s
ids.uncens <- base::subset(data.ms, from == j & Tstart <= s & s < Tstop) |>
dplyr::select(id) |>
dplyr::distinct(id) |>
dplyr::pull(id)
} else if (landmark.type == "all"){
### Identify individuals who are uncensored time s
ids.uncens <- base::subset(data.ms, Tstart <= s & s < Tstop) |>
dplyr::select(id) |>
dplyr::distinct(id) |>
dplyr::pull(id)
}
### Subset data.ms and data.raw to these individuals
data.ms <- data.ms |> base::subset(id %in% ids.uncens)
data.raw <- data.raw |> base::subset(id %in% ids.uncens)
###
### Create models for censoring in order to calculate the IPCW weights
### Seperate models for estimating the weights, and stabilising the weights (intercept only model)
###
if (!is.null(covs)){
### A model where we adjust for predictor variables
cens.model <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ ",
paste(covs, collapse = "+"),
sep = "")),
data = data.raw)
### Intercept only model (numerator for stabilised weights)
cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
sep = "")),
data = data.raw)
} else if (is.null(covs)){
### If user has not input any predictors for estimating weights, the model for estimating the weights is the intercept only model (i.e. Kaplan Meier estimator)
### Intercept only model (numerator for stabilised weights)
cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
sep = "")),
data = data.raw)
### Assign cens.model to be the same
cens.model <- cens.model.int
}
### Calculate a data frame containing probability of censored and uncenosred at each time point
### The weights will be the probability of being uncensored, at the time of the event for each individual
## Extract baseline hazard
data.weights <- survival::basehaz(cens.model, centered = FALSE)
## Add lp to data.raw.save
data.raw.save$lp <- stats::predict(cens.model, newdata = data.raw.save, type = "lp", reference = "zero")
### Create weights for the cohort at time t - s
### Note for individuals who reached an absorbing state, we take the probability of them being uncensored at the time of reached the
### abosrbing state. For individuals still alive, we take the probability of being uncensored at time t - s.
### Get location of individuals who entered absorbing states or were censored prior to evaluation time
obs.absorbed.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 0)
obs.censored.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 1)
###
### Now create unstabilised probability of (un)censoring weights
### Note that weights are the probability of being uncensored, so if an individual has low probability of being uncesored,
### the inervse of this will be big, weighting them strongly
###
### First assign all individuals a weight of the probability of being uncensored at time t
### This is the linear predictor times the cumulative hazard at time t, and appropriate transformation to get a risk
data.raw.save$pcw <- as.numeric(exp(-exp(data.raw.save$lp)*data.weights$hazard[max(which(data.weights$time <= t - s))]))
## Write a function which will extract the uncensored probability for an individual with linear predictor lp at a given time t
prob.uncens.func <- function(input){
## Assign t and person_id
t <- input[1]
lp <- input[2]
if (t <= 0){
return(NA)
} else if (t > 0){
## Get hazard at appropriate time
if (t < min(data.weights$time)){
bhaz.t <- 0
} else if (t >= min(data.weights$time)){
bhaz.t <- data.weights$hazard[max(which(data.weights$time <= t))]
}
## Return risk
return(exp(-exp(lp)*bhaz.t))
}
}
### Apply this function to all the times at which individuals have entered an absorbing state prior to censoring
data.raw.save$pcw[obs.absorbed.prior] <- apply(data.raw.save[obs.absorbed.prior, c("dtcens.modified", "lp")], 1, FUN = prob.uncens.func)
### For individuals who were censored prior to t, assign the weight as NA
data.raw.save$pcw[obs.censored.prior] <- NA
### Invert these
data.raw.save$ipcw <- 1/data.raw.save$pcw
###
### Stabilise these weights dependent on user-input
###
if (stabilised == TRUE){
## Extract baseline hazard
data.weights.numer <- survival::basehaz(cens.model.int, centered = TRUE)
### Assign all individuals a weight of the probability of being uncesored at time t
data.raw.save$pcw.numer <- as.numeric(exp(-data.weights.numer$hazard[max(which(data.weights.numer$time <= t - s))]))
### Create stabilised weight
data.raw.save$ipcw.stab <- data.raw.save$pcw.numer*data.raw.save$ipcw
}
### Finally cap these at 10 and create output object
### Create output object
if (stabilised == FALSE){
data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "pcw" = data.raw.save$pcw)
} else if (stabilised == TRUE){
data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
data.raw.save$ipcw.stab <- pmin(data.raw.save$ipcw.stab, max.weight)
output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "ipcw.stab" = data.raw.save$ipcw.stab, "pcw" = data.raw.save$pcw)
}
return(output.weights)
}
### Calculate observed event probabilities with new w.function
dat.calib.mlr.w.function <-
suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j = 1,
s = 0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
w.function = calc_weights_manual,
w.covs = c("year", "agecl", "proph", "match"),
mlr.ps.int = 2,
mlr.degree = 2))
expect_type(dat.calib.mlr.w.function, "list")
expect_equal(class(dat.calib.mlr.w.function), c("calib_mlr", "calib_msm"))
expect_length(dat.calib.mlr.w.function[["mean"]], 6)
expect_length(dat.calib.mlr.w.function[["plotdata"]], 6)
expect_length(dat.calib.mlr.w.function[["plotdata"]][[1]]$id, 265)
expect_length(dat.calib.mlr.w.function[["plotdata"]][[4]]$id, 265)
## Check answer is same whether w.function used or not
expect_equal(dat.calib.mlr[["plotdata"]][[1]], dat.calib.mlr.w.function[["plotdata"]][[1]])
expect_equal(dat.calib.mlr[["plotdata"]][[4]], dat.calib.mlr.w.function[["plotdata"]][[4]])
###
### Repeat this process (manual definition of calc_weights), again arguments are in different order, but this time an extra argument is added, which adds 'extra.arg' to every weight.
### This extra arguments is something that could be inputted by user, and want to check it does actually change the answer. It should no longer agree with dat.calb.mlr.
calc_weights_manual_extra <- function(stabilised = FALSE, max.follow = NULL, data.ms, covs = NULL, landmark.type = "state", j = NULL, t, s, max.weight = 10, data.raw, extra.arg = NULL){
### Modify everybody to be censored after time t, if a max.follow has been specified
if(!is.null(max.follow)){
if (max.follow == "t"){
data.raw <- dplyr::mutate(data.raw,
dtcens.s = dplyr::case_when(dtcens < t + 2 ~ dtcens.s,
dtcens >= t + 2 ~ 0),
dtcens = dplyr::case_when(dtcens < t + 2 ~ dtcens,
dtcens >= t + 2 ~ t + 2))
} else {
data.raw <- dplyr::mutate(data.raw,
dtcens.s = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens.s,
dtcens >= max.follow + 2 ~ 0),
dtcens = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens,
dtcens >= max.follow + 2 ~ max.follow + 2))
}
}
### Create a new outcome, which is the time until censored from s
data.raw$dtcens.modified <- data.raw$dtcens - s
### Save a copy of data.raw
data.raw.save <- data.raw
### If landmark.type = "state", calculate weights only in individuals in state j at time s
### If landmark.type = "all", calculate weights in all uncensored individuals at time s (note that this excludes individuals
### who have reached absorbing states, who have been 'censored' from the survival distribution is censoring)
if (landmark.type == "state"){
### Identify individuals who are uncensored in state j at time s
ids.uncens <- base::subset(data.ms, from == j & Tstart <= s & s < Tstop) |>
dplyr::select(id) |>
dplyr::distinct(id) |>
dplyr::pull(id)
} else if (landmark.type == "all"){
### Identify individuals who are uncensored time s
ids.uncens <- base::subset(data.ms, Tstart <= s & s < Tstop) |>
dplyr::select(id) |>
dplyr::distinct(id) |>
dplyr::pull(id)
}
### Subset data.ms and data.raw to these individuals
data.ms <- data.ms |> base::subset(id %in% ids.uncens)
data.raw <- data.raw |> base::subset(id %in% ids.uncens)
###
### Create models for censoring in order to calculate the IPCW weights
### Seperate models for estimating the weights, and stabilising the weights (intercept only model)
###
if (!is.null(covs)){
### A model where we adjust for predictor variables
cens.model <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ ",
paste(covs, collapse = "+"),
sep = "")),
data = data.raw)
### Intercept only model (numerator for stabilised weights)
cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
sep = "")),
data = data.raw)
} else if (is.null(covs)){
### If user has not input any predictors for estimating weights, the model for estimating the weights is the intercept only model (i.e. Kaplan Meier estimator)
### Intercept only model (numerator for stabilised weights)
cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
sep = "")),
data = data.raw)
### Assign cens.model to be the same
cens.model <- cens.model.int
}
### Calculate a data frame containing probability of censored and uncenosred at each time point
### The weights will be the probability of being uncensored, at the time of the event for each individual
## Extract baseline hazard
data.weights <- survival::basehaz(cens.model, centered = FALSE)
## Add lp to data.raw.save
data.raw.save$lp <- stats::predict(cens.model, newdata = data.raw.save, type = "lp", reference = "zero")
### Create weights for the cohort at time t - s
### Note for individuals who reached an absorbing state, we take the probability of them being uncensored at the time of reached the
### abosrbing state. For individuals still alive, we take the probability of being uncensored at time t - s.
### Get location of individuals who entered absorbing states or were censored prior to evaluation time
obs.absorbed.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 0)
obs.censored.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 1)
###
### Now create unstabilised probability of (un)censoring weights
### Note that weights are the probability of being uncensored, so if an individual has low probability of being uncesored,
### the inervse of this will be big, weighting them strongly
###
### First assign all individuals a weight of the probability of being uncensored at time t
### This is the linear predictor times the cumulative hazard at time t, and appropriate transformation to get a risk
data.raw.save$pcw <- as.numeric(exp(-exp(data.raw.save$lp)*data.weights$hazard[max(which(data.weights$time <= t - s))]))
## Write a function which will extract the uncensored probability for an individual with linear predictor lp at a given time t
prob.uncens.func <- function(input){
## Assign t and person_id
t <- input[1]
lp <- input[2]
if (t <= 0){
return(NA)
} else if (t > 0){
## Get hazard at appropriate time
if (t < min(data.weights$time)){
bhaz.t <- 0
} else if (t >= min(data.weights$time)){
bhaz.t <- data.weights$hazard[max(which(data.weights$time <= t))]
}
## Return risk
return(exp(-exp(lp)*bhaz.t))
}
}
### Apply this function to all the times at which individuals have entered an absorbing state prior to censoring
data.raw.save$pcw[obs.absorbed.prior] <- apply(data.raw.save[obs.absorbed.prior, c("dtcens.modified", "lp")], 1, FUN = prob.uncens.func)
### For individuals who were censored prior to t, assign the weight as NA
data.raw.save$pcw[obs.censored.prior] <- NA
### Invert these
data.raw.save$ipcw <- 1/data.raw.save$pcw
###
### Stabilise these weights dependent on user-input
###
if (stabilised == TRUE){
## Extract baseline hazard
data.weights.numer <- survival::basehaz(cens.model.int, centered = TRUE)
### Assign all individuals a weight of the probability of being uncesored at time t
data.raw.save$pcw.numer <- as.numeric(exp(-data.weights.numer$hazard[max(which(data.weights.numer$time <= t - s))]))
### Create stabilised weight
data.raw.save$ipcw.stab <- data.raw.save$pcw.numer*data.raw.save$ipcw
}
### Finally cap these at 10 and create output object
### Create output object
if (stabilised == FALSE){
data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "pcw" = data.raw.save$pcw)
} else if (stabilised == TRUE){
data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
data.raw.save$ipcw.stab <- pmin(data.raw.save$ipcw.stab, max.weight)
output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "ipcw.stab" = data.raw.save$ipcw.stab, "pcw" = data.raw.save$pcw)
}
### Add this extra argument to the weights, to check it does something
output.weights$ipcw <- output.weights$ipcw + extra.arg
return(output.weights)
}
### Calculate observed event probabilities with new w.function
dat.calib.mlr.w.function <-
suppressWarnings(calib_msm(data.ms = msebmtcal,
data.raw = ebmtcal,
j = 1,
s = 0,
t = 1826,
tp.pred = tp.pred, calib.type = 'mlr',
w.function = calc_weights_manual_extra,
w.covs = c("year", "agecl", "proph", "match"),
mlr.ps.int = 2,
mlr.degree = 2,
extra.arg = 0.1))
expect_type(dat.calib.mlr.w.function, "list")
expect_equal(class(dat.calib.mlr.w.function), c("calib_mlr", "calib_msm"))
expect_length(dat.calib.mlr.w.function[["mean"]], 6)
expect_length(dat.calib.mlr.w.function[["plotdata"]], 6)
expect_length(dat.calib.mlr.w.function[["plotdata"]][[1]]$id, 265)
expect_length(dat.calib.mlr.w.function[["plotdata"]][[4]]$id, 265)
## Check answer is same whether w.function used or not
expect_false(any(dat.calib.mlr[["plotdata"]][[1]]$obs == dat.calib.mlr.w.function[["plotdata"]][[1]]$obs))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.