tests/testthat/test-calib_blr.R

###
### Tests for calibration curves produced using BLR-IPCW (calib.type = 'blr')
###

test_that("check calib_msm output, (j = 1, s = 0), curve.type = rcs, stabilised vs unstabilised", {

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), dplyr::any_of(paste("pstate", 1:6, sep = "")))

  ###
  ### Calculate observed event probabilities
  dat.calib.blr <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.covs = c("year", "agecl", "proph", "match"))

  expect_type(dat.calib.blr, "list")
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr[["mean"]], 6)
  expect_length(dat.calib.blr[["plotdata"]], 6)
  expect_length(dat.calib.blr[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr[["plotdata"]][[6]]$id, 1778)
  expect_false(dat.calib.blr[["metadata"]]$CI)
  expect_no_error(summary(dat.calib.blr))

  ###
  ### Calculate observed event probabilities with stabilised weights
  dat.calib.blr.stab <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.covs = c("year", "agecl", "proph", "match"),
             w.stabilised = TRUE)

  expect_type(dat.calib.blr.stab, "list")
  expect_equal(class(dat.calib.blr.stab), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr.stab[["plotdata"]], 6)
  expect_length(dat.calib.blr.stab[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr.stab[["plotdata"]][[6]]$id, 1778)
  expect_false(dat.calib.blr.stab[["metadata"]]$CI)

  ### Check answer is same whether stabilisation used or not
  expect_equal(dat.calib.blr[["plotdata"]][[1]], dat.calib.blr.stab[["plotdata"]][[1]])

})


test_that("check calib_msm output, (j = 1, s = 0), curve.type = loess", {

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), any_of(paste("pstate", 1:6, sep = "")))

  ## Calculate observed event probabilities
  dat.calib.blr <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "loess",
             w.covs = c("year", "agecl", "proph", "match"))

  expect_type(dat.calib.blr, "list")
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr[["mean"]], 6)
  expect_length(dat.calib.blr[["plotdata"]], 6)
  expect_length(dat.calib.blr[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr[["plotdata"]][[6]]$id, 1778)
  expect_false(dat.calib.blr[["metadata"]]$CI)
  expect_no_error(summary(dat.calib.blr))

  ## Calculate observed event probabilities
  dat.calib.blr.stab <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "loess",
             w.covs = c("year", "agecl", "proph", "match"),
             w.stabilised = TRUE)

  expect_type(dat.calib.blr.stab, "list")
  expect_equal(class(dat.calib.blr.stab), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr.stab[["plotdata"]], 6)
  expect_length(dat.calib.blr.stab[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr.stab[["plotdata"]][[6]]$id, 1778)
  expect_false(dat.calib.blr.stab[["metadata"]]$CI)

  ### Check answer is same whether stabilisation used or not
  expect_equal(dat.calib.blr[["plotdata"]][[1]], dat.calib.blr.stab[["plotdata"]][[1]])

  ## Calculate observed event probabilities
  dat.calib.blr.w.function <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "loess",
             w.function = calc_weights,
             w.covs = c("year", "agecl", "proph", "match"))

  expect_type(dat.calib.blr.w.function, "list")
  expect_equal(class(dat.calib.blr.w.function), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr.w.function[["mean"]], 6)
  expect_length(dat.calib.blr.w.function[["plotdata"]], 6)
  expect_length(dat.calib.blr.w.function[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr.w.function[["plotdata"]][[6]]$id, 1778)
  expect_false(dat.calib.blr.w.function[["metadata"]]$CI)

  ### Check answer is same whether stabilisation used or not
  expect_equal(dat.calib.blr[["plotdata"]][[1]], dat.calib.blr.w.function[["plotdata"]][[1]])
  expect_equal(dat.calib.blr[["plotdata"]][[6]], dat.calib.blr.w.function[["plotdata"]][[6]])

})


test_that("check calib_msm output, (j = 1, s = 0), with CI", {

  skip_on_cran()

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), any_of(paste("pstate", 1:6, sep = "")))

  ## Calculate observed event probabilities no CI
  dat.calib.blr.noCI <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.covs = c("year", "agecl", "proph", "match"))

  ## Calculate observed event probabilities with CI
  dat.calib.blr <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.covs = c("year", "agecl", "proph", "match"),
             CI = 95,
             CI.R.boot = 5)

  expect_type(dat.calib.blr, "list")
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr[["mean"]], 6)
  expect_length(dat.calib.blr[["plotdata"]], 6)
  expect_equal(ncol(dat.calib.blr[["plotdata"]][[1]]), 5)
  expect_equal(ncol(dat.calib.blr[["plotdata"]][[6]]), 5)
  expect_length(dat.calib.blr[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr[["plotdata"]][[6]]$id, 1778)
  expect_equal(dat.calib.blr[["metadata"]]$CI, 95)
  expect_no_error(summary(dat.calib.blr))

  expect_equal(dat.calib.blr.noCI[["plotdata"]][[1]]$obs, dat.calib.blr[["plotdata"]][[1]]$obs)
  expect_equal(dat.calib.blr.noCI[["plotdata"]][[6]]$obs, dat.calib.blr[["plotdata"]][[6]]$obs)

})


test_that("check calib_msm output, (j = 3, s = 100)", {

  ## Extract relevant predicted risks from tps100
  tp.pred <- dplyr::select(dplyr::filter(tps100, j == 3), any_of(paste("pstate", 1:6, sep = "")))

  ## Calculate observed event probabilities
  dat.calib.blr <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=3,
             s=100,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.covs = c("year", "agecl", "proph", "match"))

  expect_type(dat.calib.blr, "list")
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr[["mean"]], 4)
  expect_length(dat.calib.blr[["plotdata"]], 4)
  expect_length(dat.calib.blr[["plotdata"]][["state3"]]$id, 359)
  expect_length(dat.calib.blr[["plotdata"]][["state6"]]$id, 359)
  expect_error(dat.calib.blr[["plotdata"]][[6]])
  expect_false(dat.calib.blr[["metadata"]]$CI)
  names(dat.calib.blr[["plotdata"]])

})


test_that("check calib_msm output, (j = 1, s = 0), null covs", {

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), any_of(paste("pstate", 1:6, sep = "")))

  ## Calculate observed event probabilities
  dat.calib.blr <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3)

  expect_type(dat.calib.blr, "list")
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr[["mean"]], 6)
  expect_length(dat.calib.blr[["plotdata"]], 6)
  expect_length(dat.calib.blr[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr[["plotdata"]][[6]]$id, 1778)
  expect_false(dat.calib.blr[["metadata"]]$CI)

  ## Calculate observed event probabilities
  dat.calib.blr <-
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.covs = c("year", "agecl", "proph", "match"),
             w.stabilised = TRUE)

  expect_type(dat.calib.blr, "list")
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_length(dat.calib.blr[["mean"]], 6)
  expect_length(dat.calib.blr[["plotdata"]], 6)
  expect_length(dat.calib.blr[["plotdata"]][[1]]$id, 1778)
  expect_length(dat.calib.blr[["plotdata"]][[6]]$id, 1778)
  expect_false(dat.calib.blr[["metadata"]]$CI)

})


### Run tests for manually inputted predicted probailities
test_that("check calib_msm output, (j = 1, s = 0), curve.type = loess, CI.type = bootstrap", {

  skip_on_cran()

  ## Extract relevant predicted risks from tps0 for creating plots
  tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), any_of(paste("pstate", 1:6, sep = "")))

  ### Create an object of only 50 observations over which to plot, which we specify manually
  id.lmk <- 1:50
  tp.pred.plot <- tps0 |>
    dplyr::filter(id %in% id.lmk) |>
    dplyr::filter(j == 1) |>
    dplyr::select(any_of(paste("pstate", 1:6, sep = "")))

  ## No confidence interval
  dat.calib.blr <- calib_msm(data.ms = msebmtcal,
                            data.raw = ebmtcal,
                            j = 1,
                            s = 0,
                            t = 1826,
                            tp.pred = tp.pred,
                            calib.type = 'blr',
                            curve.type = "loess",
                            tp.pred.plot = tp.pred.plot, transitions.out = NULL)

  ## Should be one less column in plotdata (no patient ids)
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_equal(ncol(dat.calib.blr[["plotdata"]][[1]]), 2)
  expect_equal(nrow(dat.calib.blr[["plotdata"]][[1]]), 50)
  expect_no_error(summary(dat.calib.blr))

  ## With confidence interval
  dat.calib.blr <- calib_msm(data.ms = msebmtcal,
                            data.raw = ebmtcal,
                            j = 1,
                            s = 0,
                            t = 1826,
                            tp.pred = tp.pred,
                            calib.type = 'blr',
                            curve.type = "loess",
                            CI = 95,
                            CI.R.boot = 3,
                            tp.pred.plot = tp.pred.plot, transitions.out = NULL)

  ## Should be one less column in plotdata (no patient ids)
  expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
  expect_equal(ncol(dat.calib.blr[["plotdata"]][[1]]), 4)
  expect_equal(nrow(dat.calib.blr[["plotdata"]][[1]]), 50)
  expect_no_error(summary(dat.calib.blr))

})


### Run tests for warnings with small cohort
test_that("Test warnings for bootstrapping with small cohort", {

  skip_on_cran()

  ## Reduce to 50 individuals
  # Extract the predicted transition probabilities out of state j = 1 for first 100 individuals
  tp.pred <- tps0 |>
    dplyr::filter(id %in% 1:50) |>
    dplyr::filter(j == 1) |>
    dplyr::select(any_of(paste("pstate", 1:6, sep = "")))
  # Reduce ebmtcal to first 50 individuals
  ebmtcal <- ebmtcal |> dplyr::filter(id %in% 1:50)
  # Reduce msebmtcal.cmprsk to first 100 individuals
  msebmtcal <- msebmtcal |> dplyr::filter(id %in% 1:50)

  ## No confidence interval
  expect_warning(calib_msm(data.ms = msebmtcal,
                          data.raw = ebmtcal,
                          j = 1,
                          s = 0,
                          t = 1826,
                          tp.pred = tp.pred,
                          calib.type = 'blr',
                          curve.type = "loess",
                          CI = 95,
                          CI.R.boot = 3,
                          transitions.out = c(1)))

})


test_that("check calib_msm output, (j = 1, s = 0),
          manual weights,
          manually define vector of predicted probabilities,
          manually define transition out,
          estimate curves using rcs", {

            ## Extract relevant predicted risks from tps0
            tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), any_of(paste("pstate", 1:6, sep = "")))

            ## Define t
            t <- 1826

            ## Extract data for plot manually
            ids.uncens <- ebmtcal |>
              subset(dtcens > t | (dtcens < t & dtcens.s == 0)) |>
              dplyr::pull(id)
            tp.pred.plot <- tps0 |>
              dplyr::filter(j == 1 & id %in% ids.uncens) |>
              dplyr::select(any_of(paste("pstate", 1:6, sep = "")))

            ## Calculate manual weights
            weights.manual <-
              calc_weights(data.ms = msebmtcal,
                           data.raw = ebmtcal,
                           t = 1826,
                           s = 0,
                           landmark.type = "state",
                           j = 1,
                           max.weight = 10,
                           stabilised = FALSE)

            ## Calculate observed event probabilities using weights.manual
            dat.calib.blr.w.manual <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       weights = weights.manual$ipcw,
                       tp.pred.plot = tp.pred.plot,
                       transitions.out = c(1,2,3,4,5,6))

            expect_type(dat.calib.blr.w.manual, "list")
            expect_equal(class(dat.calib.blr.w.manual), c("calib_blr", "calib_msm"))
            expect_length(dat.calib.blr.w.manual[["mean"]], 6)
            expect_length(dat.calib.blr.w.manual[["plotdata"]], 6)
            expect_length(dat.calib.blr.w.manual[["plotdata"]][[1]]$pred, 1778)
            expect_length(dat.calib.blr.w.manual[["plotdata"]][[6]]$pred, 1778)
            expect_false(dat.calib.blr.w.manual[["metadata"]]$CI)

          })

test_that("check calib_msm output, (j = 1, s = 0),
          manual weights,
          manually define vector of predicted probabilities,
          manually define transition out,
          estimate curves using loess", {

            ## Extract relevant predicted risks from tps0
            tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), any_of(paste("pstate", 1:6, sep = "")))

            ## Define t
            t <- 1826

            ## Extract data for plot manually
            ids.uncens <- ebmtcal |>
              subset(dtcens > t | (dtcens < t & dtcens.s == 0)) |>
              dplyr::pull(id)
            tp.pred.plot <- tps0 |>
              dplyr::filter(j == 1 & id %in% ids.uncens) |>
              dplyr::select(any_of(paste("pstate", 1:6, sep = "")))

            ## Calculate manual weights
            weights.manual <-
              calc_weights(data.ms = msebmtcal,
                           data.raw = ebmtcal,
                           t = 1826,
                           s = 0,
                           landmark.type = "state",
                           j = 1,
                           max.weight = 10,
                           stabilised = FALSE)

            ## Calculate observed event probabilities using weights.manual
            dat.calib.blr.w.manual <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "loess",
                       rcs.nk = 3,
                       weights = weights.manual$ipcw,
                       tp.pred.plot = tp.pred.plot,
                       transitions.out = c(1,2,3,4,5,6))

            expect_type(dat.calib.blr.w.manual, "list")
            expect_equal(class(dat.calib.blr.w.manual), c("calib_blr", "calib_msm"))
            expect_length(dat.calib.blr.w.manual[["mean"]], 6)
            expect_length(dat.calib.blr.w.manual[["plotdata"]], 6)
            expect_length(dat.calib.blr.w.manual[["plotdata"]][[1]]$pred, 1778)
            expect_length(dat.calib.blr.w.manual[["plotdata"]][[6]]$pred, 1778)
            expect_false(dat.calib.blr.w.manual[["metadata"]]$CI)

          })

test_that("check calib_msm output, (j = 1, s = 0),
          with CI,
          manually define vector of predicted probabilities,
          manually define transition out", {

            skip_on_cran()

            ## Extract relevant predicted risks from tps0
            tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), any_of(paste("pstate", 1:6, sep = "")))

            ## Define t
            t <- 1826

            ## Extract data for plot manually
            ids.uncens <- ebmtcal |>
              subset(dtcens > t | (dtcens < t & dtcens.s == 0)) |>
              dplyr::pull(id)
            tp.pred.plot <- tps0 |>
              dplyr::filter(j == 1 & id %in% ids.uncens) |>
              dplyr::select(any_of(paste("pstate", 1:6, sep = "")))

            ## Calculate observed event probabilities
            dat.calib.blr <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       w.covs = c("year", "agecl", "proph", "match"),
                       CI = 95,
                       CI.R.boot = 5,
                       tp.pred.plot = tp.pred.plot,
                       transitions.out = c(1,2,3,4,5,6))

            expect_type(dat.calib.blr, "list")
            expect_equal(class(dat.calib.blr), c("calib_blr", "calib_msm"))
            expect_length(dat.calib.blr[["mean"]], 6)
            expect_length(dat.calib.blr[["plotdata"]], 6)
            expect_length(dat.calib.blr[["plotdata"]][[1]]$pred, 1778)
            expect_length(dat.calib.blr[["plotdata"]][[6]]$pred, 1778)
            expect_equal(dat.calib.blr[["metadata"]]$CI, 95)

          })

test_that("check calib_msm output, (j = 1, s = 0),
          Manually define function to estimate weights", {

            skip_on_cran()

            ## Extract relevant predicted risks from tps0
            tp.pred <- dplyr::select(dplyr::filter(tps0, j == 1), dplyr::any_of(paste("pstate", 1:6, sep = "")))

            ###
            ### Calculate observed event probabilities
            dat.calib.blr <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       w.covs = c("year", "agecl", "proph", "match"))

            ###
            ### Calculate manual weights
            weights.manual <-
              calc_weights(data.ms = msebmtcal,
                           data.raw = ebmtcal,
                           covs =  c("year", "agecl", "proph", "match"),
                           t = 1826,
                           s = 0,
                           landmark.type = "state",
                           j = 1,
                           max.weight = 10,
                           stabilised = FALSE)

            ###
            ### Calculate observed event probabilities same function as internal procedure, and check it agrees with dat.calib.blr
            dat.calib.blr.w.manual <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       weights = weights.manual$ipcw)

            expect_equal(dat.calib.blr[["plotdata"]][[1]], dat.calib.blr.w.manual[["plotdata"]][[1]])

            ###
            ### Calculate observed event probabilities using an incorrect vector of weights, and see if its different from dat.calib.blr
            dat.calib.blr.w.manual <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       weights = rep(1,nrow(weights.manual)))

            expect_false(any(dat.calib.blr[["plotdata"]][[1]]$obs == dat.calib.blr.w.manual[["plotdata"]][[1]]$obs))

            ###
            ### Calculate observed event probabilities with w.function, where calc_weights_manual = calc_weights (exactly same as internal procedure)
            calc_weights_manual <- calc_weights

            dat.calib.blr.w.function <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       w.function = calc_weights_manual,
                       w.covs = c("year", "agecl", "proph", "match"))

            expect_type(dat.calib.blr.w.function, "list")
            expect_equal(class(dat.calib.blr.w.function), c("calib_blr", "calib_msm"))
            expect_length(dat.calib.blr.w.function[["mean"]], 6)
            expect_length(dat.calib.blr.w.function[["plotdata"]], 6)
            expect_length(dat.calib.blr.w.function[["plotdata"]][[1]]$id, 1778)
            expect_length(dat.calib.blr.w.function[["plotdata"]][[6]]$id, 1778)
            expect_false(dat.calib.blr.w.function[["metadata"]]$CI)

            ## Check answer is same whether w.function used or not
            expect_equal(dat.calib.blr[["plotdata"]][[1]], dat.calib.blr.w.function[["plotdata"]][[1]])
            expect_equal(dat.calib.blr[["plotdata"]][[6]], dat.calib.blr.w.function[["plotdata"]][[6]])

            ###
            ### Redefine calc_weights, but change order of all the input arguments (this shouldn't make a difference)
            calc_weights_manual <- function(stabilised = FALSE, max.follow = NULL, data.ms, covs = NULL, landmark.type = "state", j = NULL, t, s, max.weight = 10, data.raw){

              ### Modify everybody to be censored after time t, if a max.follow has been specified
              if(!is.null(max.follow)){
                if (max.follow == "t"){
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < t + 2 ~ dtcens.s,
                                                                        dtcens >= t + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < t + 2 ~ dtcens,
                                                                      dtcens >= t + 2 ~ t + 2))
                } else {
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens.s,
                                                                        dtcens >= max.follow + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens,
                                                                      dtcens >= max.follow + 2 ~ max.follow + 2))
                }
              }

              ### Create a new outcome, which is the time until censored from s
              data.raw$dtcens.modified <- data.raw$dtcens - s

              ### Save a copy of data.raw
              data.raw.save <- data.raw

              ### If landmark.type = "state", calculate weights only in individuals in state j at time s
              ### If landmark.type = "all", calculate weights in all uncensored individuals at time s (note that this excludes individuals
              ### who have reached absorbing states, who have been 'censored' from the survival distribution is censoring)
              if (landmark.type == "state"){
                ### Identify individuals who are uncensored in state j at time s
                ids.uncens <- base::subset(data.ms, from == j & Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              } else if (landmark.type == "all"){
                ### Identify individuals who are uncensored time s
                ids.uncens <- base::subset(data.ms, Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              }

              ### Subset data.ms and data.raw to these individuals
              data.ms <- data.ms |> base::subset(id %in% ids.uncens)
              data.raw <- data.raw |> base::subset(id %in% ids.uncens)

              ###
              ### Create models for censoring in order to calculate the IPCW weights
              ### Seperate models for estimating the weights, and stabilising the weights (intercept only model)
              ###
              if (!is.null(covs)){
                ### A model where we adjust for predictor variables
                cens.model <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ ",
                                                                      paste(covs, collapse = "+"),
                                                                      sep = "")),
                                              data = data.raw)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
              } else if (is.null(covs)){
                ### If user has not input any predictors for estimating weights, the model for estimating the weights is the intercept only model (i.e. Kaplan Meier estimator)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
                ### Assign cens.model to be the same
                cens.model <- cens.model.int


              }

              ### Calculate a data frame containing probability of censored and uncenosred at each time point
              ### The weights will be the probability of being uncensored, at the time of the event for each individual

              ## Extract baseline hazard
              data.weights <- survival::basehaz(cens.model, centered = FALSE)
              ## Add lp to data.raw.save
              data.raw.save$lp <- stats::predict(cens.model, newdata = data.raw.save, type = "lp", reference = "zero")

              ### Create weights for the cohort at time t - s
              ### Note for individuals who reached an absorbing state, we take the probability of them being uncensored at the time of reached the
              ### abosrbing state. For individuals still alive, we take the probability of being uncensored at time t - s.

              ### Get location of individuals who entered absorbing states or were censored prior to evaluation time
              obs.absorbed.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 0)
              obs.censored.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 1)

              ###
              ### Now create unstabilised probability of (un)censoring weights
              ### Note that weights are the probability of being uncensored, so if an individual has low probability of being uncesored,
              ### the inervse of this will be big, weighting them strongly
              ###

              ### First assign all individuals a weight of the probability of being uncensored at time t
              ### This is the linear predictor times the cumulative hazard at time t, and appropriate transformation to get a risk
              data.raw.save$pcw <- as.numeric(exp(-exp(data.raw.save$lp)*data.weights$hazard[max(which(data.weights$time <= t - s))]))

              ## Write a function which will extract the uncensored probability for an individual with linear predictor lp at a given time t
              prob.uncens.func <- function(input){

                ## Assign t and person_id
                t <- input[1]
                lp <- input[2]

                if (t <= 0){
                  return(NA)
                } else if (t > 0){
                  ## Get hazard at appropriate time
                  if (t < min(data.weights$time)){
                    bhaz.t <- 0
                  } else if (t >= min(data.weights$time)){
                    bhaz.t <- data.weights$hazard[max(which(data.weights$time <= t))]
                  }

                  ## Return risk
                  return(exp(-exp(lp)*bhaz.t))
                }
              }

              ### Apply this function to all the times at which individuals have entered an absorbing state prior to censoring
              data.raw.save$pcw[obs.absorbed.prior] <- apply(data.raw.save[obs.absorbed.prior, c("dtcens.modified", "lp")], 1, FUN = prob.uncens.func)

              ### For individuals who were censored prior to t, assign the weight as NA
              data.raw.save$pcw[obs.censored.prior] <- NA

              ### Invert these
              data.raw.save$ipcw <- 1/data.raw.save$pcw

              ###
              ### Stabilise these weights dependent on user-input
              ###
              if (stabilised == TRUE){

                ## Extract baseline hazard
                data.weights.numer <- survival::basehaz(cens.model.int, centered = TRUE)

                ### Assign all individuals a weight of the probability of being uncesored at time t
                data.raw.save$pcw.numer <- as.numeric(exp(-data.weights.numer$hazard[max(which(data.weights.numer$time <= t - s))]))

                ### Create stabilised weight
                data.raw.save$ipcw.stab <- data.raw.save$pcw.numer*data.raw.save$ipcw
              }

              ### Finally cap these at 10 and create output object

              ### Create output object
              if (stabilised == FALSE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "pcw" = data.raw.save$pcw)
              } else if (stabilised == TRUE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                data.raw.save$ipcw.stab <- pmin(data.raw.save$ipcw.stab, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "ipcw.stab" = data.raw.save$ipcw.stab, "pcw" = data.raw.save$pcw)
              }

              return(output.weights)

            }

            ### Calculate observed event probabilities with new w.function
            dat.calib.blr.w.function <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       w.function = calc_weights_manual,
                       w.covs = c("year", "agecl", "proph", "match"))

            expect_type(dat.calib.blr.w.function, "list")
            expect_equal(class(dat.calib.blr.w.function), c("calib_blr", "calib_msm"))
            expect_length(dat.calib.blr.w.function[["mean"]], 6)
            expect_length(dat.calib.blr.w.function[["plotdata"]], 6)
            expect_length(dat.calib.blr.w.function[["plotdata"]][[1]]$id, 1778)
            expect_length(dat.calib.blr.w.function[["plotdata"]][[6]]$id, 1778)
            expect_false(dat.calib.blr.w.function[["metadata"]]$CI)

            ## Check answer is same whether w.function used or not
            expect_equal(dat.calib.blr[["plotdata"]][[1]], dat.calib.blr.w.function[["plotdata"]][[1]])
            expect_equal(dat.calib.blr[["plotdata"]][[6]], dat.calib.blr.w.function[["plotdata"]][[6]])


            ###
            ### Repeat this process (manual definition of calc_weights), again arguments are in different order, but this time an extra argument is added, which adds 10 to every weight.
            ### This extra arguments is something that could be inputted by user, and want to check it does actually change the answer. It should no longer agree with dat.calb.blr.
            calc_weights_manual <- function(stabilised = FALSE, max.follow = NULL, data.ms, covs = NULL, landmark.type = "state", j = NULL, t, s, max.weight = 10, data.raw, extra.arg = NULL){

              ### Modify everybody to be censored after time t, if a max.follow has been specified
              if(!is.null(max.follow)){
                if (max.follow == "t"){
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < t + 2 ~ dtcens.s,
                                                                        dtcens >= t + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < t + 2 ~ dtcens,
                                                                      dtcens >= t + 2 ~ t + 2))
                } else {
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens.s,
                                                                        dtcens >= max.follow + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens,
                                                                      dtcens >= max.follow + 2 ~ max.follow + 2))
                }
              }

              ### Create a new outcome, which is the time until censored from s
              data.raw$dtcens.modified <- data.raw$dtcens - s

              ### Save a copy of data.raw
              data.raw.save <- data.raw

              ### If landmark.type = "state", calculate weights only in individuals in state j at time s
              ### If landmark.type = "all", calculate weights in all uncensored individuals at time s (note that this excludes individuals
              ### who have reached absorbing states, who have been 'censored' from the survival distribution is censoring)
              if (landmark.type == "state"){
                ### Identify individuals who are uncensored in state j at time s
                ids.uncens <- base::subset(data.ms, from == j & Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              } else if (landmark.type == "all"){
                ### Identify individuals who are uncensored time s
                ids.uncens <- base::subset(data.ms, Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              }

              ### Subset data.ms and data.raw to these individuals
              data.ms <- data.ms |> base::subset(id %in% ids.uncens)
              data.raw <- data.raw |> base::subset(id %in% ids.uncens)

              ###
              ### Create models for censoring in order to calculate the IPCW weights
              ### Seperate models for estimating the weights, and stabilising the weights (intercept only model)
              ###
              if (!is.null(covs)){
                ### A model where we adjust for predictor variables
                cens.model <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ ",
                                                                      paste(covs, collapse = "+"),
                                                                      sep = "")),
                                              data = data.raw)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
              } else if (is.null(covs)){
                ### If user has not input any predictors for estimating weights, the model for estimating the weights is the intercept only model (i.e. Kaplan Meier estimator)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
                ### Assign cens.model to be the same
                cens.model <- cens.model.int


              }

              ### Calculate a data frame containing probability of censored and uncenosred at each time point
              ### The weights will be the probability of being uncensored, at the time of the event for each individual

              ## Extract baseline hazard
              data.weights <- survival::basehaz(cens.model, centered = FALSE)
              ## Add lp to data.raw.save
              data.raw.save$lp <- stats::predict(cens.model, newdata = data.raw.save, type = "lp", reference = "zero")

              ### Create weights for the cohort at time t - s
              ### Note for individuals who reached an absorbing state, we take the probability of them being uncensored at the time of reached the
              ### abosrbing state. For individuals still alive, we take the probability of being uncensored at time t - s.

              ### Get location of individuals who entered absorbing states or were censored prior to evaluation time
              obs.absorbed.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 0)
              obs.censored.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 1)

              ###
              ### Now create unstabilised probability of (un)censoring weights
              ### Note that weights are the probability of being uncensored, so if an individual has low probability of being uncesored,
              ### the inervse of this will be big, weighting them strongly
              ###

              ### First assign all individuals a weight of the probability of being uncensored at time t
              ### This is the linear predictor times the cumulative hazard at time t, and appropriate transformation to get a risk
              data.raw.save$pcw <- as.numeric(exp(-exp(data.raw.save$lp)*data.weights$hazard[max(which(data.weights$time <= t - s))]))

              ## Write a function which will extract the uncensored probability for an individual with linear predictor lp at a given time t
              prob.uncens.func <- function(input){

                ## Assign t and person_id
                t <- input[1]
                lp <- input[2]

                if (t <= 0){
                  return(NA)
                } else if (t > 0){
                  ## Get hazard at appropriate time
                  if (t < min(data.weights$time)){
                    bhaz.t <- 0
                  } else if (t >= min(data.weights$time)){
                    bhaz.t <- data.weights$hazard[max(which(data.weights$time <= t))]
                  }

                  ## Return risk
                  return(exp(-exp(lp)*bhaz.t))
                }
              }

              ### Apply this function to all the times at which individuals have entered an absorbing state prior to censoring
              data.raw.save$pcw[obs.absorbed.prior] <- apply(data.raw.save[obs.absorbed.prior, c("dtcens.modified", "lp")], 1, FUN = prob.uncens.func)

              ### For individuals who were censored prior to t, assign the weight as NA
              data.raw.save$pcw[obs.censored.prior] <- NA

              ### Invert these
              data.raw.save$ipcw <- 1/data.raw.save$pcw

              ###
              ### Stabilise these weights dependent on user-input
              ###
              if (stabilised == TRUE){

                ## Extract baseline hazard
                data.weights.numer <- survival::basehaz(cens.model.int, centered = TRUE)

                ### Assign all individuals a weight of the probability of being uncesored at time t
                data.raw.save$pcw.numer <- as.numeric(exp(-data.weights.numer$hazard[max(which(data.weights.numer$time <= t - s))]))

                ### Create stabilised weight
                data.raw.save$ipcw.stab <- data.raw.save$pcw.numer*data.raw.save$ipcw
              }

              ### Finally cap these at 10 and create output object

              ### Create output object
              if (stabilised == FALSE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "pcw" = data.raw.save$pcw)
              } else if (stabilised == TRUE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                data.raw.save$ipcw.stab <- pmin(data.raw.save$ipcw.stab, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "ipcw.stab" = data.raw.save$ipcw.stab, "pcw" = data.raw.save$pcw)
              }

              ### Add this extra argument to the weights, to check it does something
              output.weights$ipcw <- output.weights$ipcw + extra.arg

              return(output.weights)

            }

            ### Calculate observed event probabilities with new w.function
            dat.calib.blr.w.function <-
              calib_msm(data.ms = msebmtcal,
                       data.raw = ebmtcal,
                       j=1,
                       s=0,
                       t = 1826,
                       tp.pred = tp.pred, calib.type = 'blr',
                       curve.type = "rcs",
                       rcs.nk = 3,
                       w.function = calc_weights_manual,
                       w.covs = c("year", "agecl", "proph", "match"),
                       extra.arg = 10)

            expect_type(dat.calib.blr.w.function, "list")
            expect_equal(class(dat.calib.blr.w.function), c("calib_blr", "calib_msm"))
            expect_length(dat.calib.blr.w.function[["mean"]], 6)
            expect_length(dat.calib.blr.w.function[["plotdata"]], 6)
            expect_length(dat.calib.blr.w.function[["plotdata"]][[1]]$id, 1778)
            expect_length(dat.calib.blr.w.function[["plotdata"]][[6]]$id, 1778)
            expect_false(dat.calib.blr.w.function[["metadata"]]$CI)

            ## Check answer is same whether w.function used or not
            expect_false(any(dat.calib.blr[["plotdata"]][[1]]$obs == dat.calib.blr.w.function[["plotdata"]][[1]]$obs))

          })

test_that("test warnings and errors", {

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps0, j == 3), any_of(paste("pstate", 1:6, sep = "")))

  ## Calculate observed event probabilities
  expect_error(
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.covs = c("year", "agecl", "proph", "match"),
             transitions.out = c(1,2,3,4))
  )

  ## Calculate observed event probabilities
  weights.manual <-
    calc_weights(data.ms = msebmtcal,
                 data.raw = ebmtcal,
                 t = 1826,
                 s = 0,
                 landmark.type = "state",
                 j = 1,
                 max.weight = 10,
                 stabilised = FALSE)$ipcw[-1]

  expect_error(
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             weights = weights.manual)
  )

  ## Write a weights function with the wrong variable names
  calc_weights_error <- function(data.ms, data.raw, covs = NULL, t, s, j = NULL, max.weight = 10, stabilised = FALSE, max.follow = NULL){
    return(data.ms)
  }
  expect_error(
    calib_msm(data.ms = msebmtcal,
             data.raw = ebmtcal,
             j=1,
             s=0,
             t = 1826,
             tp.pred = tp.pred, calib.type = 'blr',
             curve.type = "rcs",
             rcs.nk = 3,
             w.function = calc_weights_error,
             w.covs = c("year", "agecl", "proph", "match"))
  )

  ### check  warnings when there are zero predicted probabilities for valid transitions

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps100, j == 1), dplyr::any_of(paste("pstate", 1:6, sep = "")))
  tp.pred[,1] <- rep(0, nrow(tp.pred))

  ###
  ### Calculate observed event probabilities
  expect_error(calib_msm(data.ms = msebmtcal,
                        data.raw = ebmtcal,
                        j=1,
                        s=100,
                        t = 1826,
                        tp.pred = tp.pred, calib.type = 'blr',
                        curve.type = "loess",
                        rcs.nk = 3,
                        w.covs = c("year", "agecl", "proph", "match")))


  ### check error when there are non-zero predicted probabilities for transitions which do not happen

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps100, j == 1), dplyr::any_of(paste("pstate", 1:6, sep = "")))
  tp.pred[,3] <- runif(nrow(tp.pred), 0, 1)

  ###
  ### Calculate observed event probabilities
  expect_error(calib_msm(data.ms = msebmtcal,
                        data.raw = ebmtcal,
                        j=1,
                        s=100,
                        t = 1826,
                        tp.pred = tp.pred, calib.type = 'blr',
                        curve.type = "loess",
                        rcs.nk = 3,
                        w.covs = c("year", "agecl", "proph", "match")))

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps100, j == 3), dplyr::any_of(paste("pstate", 1:6, sep = "")))
  tp.pred[,1] <- runif(nrow(tp.pred), 0, 1)

  ###
  ### Calculate observed event probabilities
  expect_error(calib_msm(data.ms = msebmtcal,
                        data.raw = ebmtcal,
                        j=3,
                        s=100,
                        t = 1826,
                        tp.pred = tp.pred, calib.type = 'blr',
                        curve.type = "loess",
                        rcs.nk = 3,
                        w.covs = c("year", "agecl", "proph", "match")))

  ### check error when there are zero predicted probabilities for transitions which do happen in dataset

  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps100, j == 3), dplyr::any_of(paste("pstate", 1:6, sep = "")))
  tp.pred[,3] <- rep(0, nrow(tp.pred))

  ###
  ### Calculate observed event probabilities
  expect_error(calib_msm(data.ms = msebmtcal,
                        data.raw = ebmtcal,
                        j=3,
                        s=100,
                        t = 1826,
                        tp.pred = tp.pred, calib.type = 'blr',
                        curve.type = "loess",
                        rcs.nk = 3,
                        w.covs = c("year", "agecl", "proph", "match")))


  ## Extract relevant predicted risks from tps0
  tp.pred <- dplyr::select(dplyr::filter(tps100, j == 3), dplyr::any_of(paste("pstate", 1:6, sep = "")))
  tp.pred[,1] <- rep(0, nrow(tp.pred))

})

Try the calibmsm package in your browser

Any scripts or data that you put into this service are public.

calibmsm documentation built on June 22, 2024, 9:33 a.m.