#' Get Analysis Data
#'
#'
#' @export
#'
tb_get_data <- function(raw_dat_rs,
raw_dat_te,
first_n = 99999,
days_fu = 99999,
study = c("1624", "16113"),
...) {
study <- match.arg(study)
## censor events
f_censor <- function(dat, cut_date = cut_date_fu) {
cut_days <- cut_date - dat[["RANDT"]] + 1
inx <- which(dat[["AVAL"]] > cut_days)
dat[inx, "AVAL"] <- cut_days[inx]
dat[inx, "CNSR"] <- 5
dat[inx, "EVNTDESC"] <- "TRUNCATED AT ANALYSIS"
dat
}
dat_te <- raw_dat_te
dat_rs <- raw_dat_rs %>%
filter(DTYPE == "" &
EVAL == "INDEPENDENT ASSESSOR" &
PARAMCD == "SUMTLDLO")
if (study == "16113") {
dat_rs <- dat_rs %>%
rename(RANDT = RANDDT)
dat_te <- dat_te %>%
rename(RANDT = RANDDT)
dat_or <- raw_dat_rs %>%
filter(PARAMCD == "BSTRIND")
}
if (study == "1624") {
dat_rs <- dat_rs %>%
filter(MITT1FL == "Y")
dat_or <- raw_dat_rs %>%
filter(MITT1FL == "Y" &
PARAMCD == "BSTRIND")
}
dat_or <- dat_or %>%
select(SUBJID, AVALC) %>%
mutate(OR = if_else(AVALC == "PR" | AVALC == "CR",
1,
0)) %>%
group_by(SUBJID) %>%
summarize(OR = max(OR))
dat_id <- dat_rs %>%
select(SUBJID, RANDT, ARM,
BASE, AGE, SEX,
ECOGGR1, STRATA1, P1TERTL) %>%
distinct() %>%
mutate(SCL_AGE = scale(AGE),
SCL_BASE = scale(BASE)) %>%
arrange(RANDT)
cut_date_enroll <- dat_id[min(nrow(dat_id), first_n), "RANDT"]
cut_date_fu <- cut_date_enroll + days_fu
## tumor burden
## on 8/20/2022, change day from AVISITN to ADY, the actual day
dat_tb <- dat_rs %>%
select(SUBJID, VISIT, AVISITN, ADY, PCHG) %>%
arrange(SUBJID, AVISITN, PCHG) %>%
mutate(PCHG = if_else(1 == AVISITN, 0, PCHG / 100),
DAY = if_else(1 == AVISITN, 0, ADY)) %>%
left_join(dat_id, by = "SUBJID") %>%
filter(DAY <= cut_date_fu - RANDT + 1) %>%
filter(RANDT <= cut_date_enroll) %>%
filter(!is.na(BASE))
## survival
dat_os <- dat_te %>%
filter(PARAMCD == "OS")
if (study == "1624") {
dat_os <- dat_os %>%
filter(MITT1FL == "Y")
}
dat_os <- dat_os %>%
select(SUBJID, CNSR, EVNTDESC, AVAL, RANDT) %>%
f_censor() %>%
rename(OS_CNSR = CNSR,
OS_EVENT = EVNTDESC,
OS_DAYS = AVAL)
## pfs
dat_pfs <- dat_te %>%
filter(PARAMCD == "PFS" &
EVAL == "INDEPENDENT ASSESSOR")
if (study == "1624") {
dat_pfs <- dat_pfs %>%
filter(MITT1FL == "Y")
}
dat_pfs <- dat_pfs %>%
select(SUBJID, CNSR, EVNTDESC, AVAL, RANDT) %>%
f_censor() %>%
rename(PFS_CNSR = CNSR,
PFS_EVENT = EVNTDESC,
PFS_DAYS = AVAL)
dat_surv <- dat_id %>%
na.omit %>%
left_join(dat_pfs %>%
select(SUBJID, PFS_CNSR, PFS_EVENT, PFS_DAYS),
by = "SUBJID") %>%
left_join(dat_os %>%
select(SUBJID, OS_CNSR, OS_EVENT, OS_DAYS),
by = "SUBJID") %>%
left_join(dat_or, by = "SUBJID") %>%
mutate(OR = if_else(OR %in% 0:1, OR, 0),
PFS = 0 == PFS_CNSR,
OS = 0 == OS_CNSR) %>%
mutate(OR = factor(OR, 0:1, c("No Response", "PR/CR"))) %>%
arrange(RANDT) %>%
filter(RANDT <= cut_date_enroll)
## missing data
warning(paste("Patients with missing covariates are",
"excluded from the result dataset.\n",
" Consider imputing the missing covariates",
"first (e.g., by mice) \n",
" before calling this function."))
## format data
dat_tb <- tb_data_format(dat_tb, study)
dat_surv <- tb_data_format(dat_surv, study)
## permuate data if necessary
rst <- tb_data_permute(dat_tb, dat_surv, ...)
## add date_fu
rst$cut_date_fu <- cut_date_fu
rst
}
#' Permute data
#'
#'
#'
#' @export
#'
tb_data_permute <- function(dat_tb, dat_surv, permute = FALSE, seed = NULL) {
if (!is.null(seed))
old_seed <- set.seed(seed)
if (permute) {
d_sub <- dat_tb %>%
select(SUBJID, ARM, RANDT) %>%
distinct()
d_sub$SUBJID <- sample(d_sub$SUBJID)
dat_tb <- dat_tb %>%
select(-ARM, -RANDT) %>%
left_join(d_sub, by = "SUBJID")
dat_surv <- dat_surv %>%
select(-ARM, -RANDT) %>%
left_join(d_sub, by = "SUBJID")
}
if (!is.null(seed))
set.seed(old_seed)
list(dat_tb = dat_tb,
dat_surv = dat_surv)
}
#' Format data
#'
#'
#' @export
#'
tb_data_format <- function(dat, study) {
lst_format <- NULL
if ("1624" == study) {
lst_format <-
list("P1TERTL" = c("<50%" = "",
">=50% to <=60%" = "PD-L1 Low",
">60% to <90%" = "PD-L1 Median",
">=90%" = "PD-L1 High"),
"ARM" = c("Chemotherapy" = "Chemotherapy",
"Cemiplimab" = "Cemiplimab 350 mg")
)
} else if ("16113" == study) {
lst_format <-
list("P1TERTL" = c("<50%" = "",
">=50% to <=75%" = "PD-L1 Low",
">75% to <95%" = "PD-L1 Median",
">=95%" = "PD-L1 High"),
"ARM" = c("Chemotherapy" =
"Part 2: Arm A - Placebo + Platinum based Chemotherapy",
"Cemiplimab + Chemotherapy" =
"Part 2: Arm B - Cemiplimab 350 mg + Platinum based Chemotherapy"
)
)
}
if (is.null(lst_format))
return(dat)
## format columns
for (i in seq_len(length(lst_format))) {
vname <- names(lst_format)[i]
if (!(vname %in% names(dat)))
next
dat[[vname]] <- factor(dat[[vname]],
levels = lst_format[[vname]],
labels = names(lst_format[[vname]]))
dat[[vname]] <- droplevels(dat[[vname]])
}
dat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.