Nothing
## ----include=FALSE------------------------------------------------------------
knitr::opts_template$set(
remove_linter_comments = list(tidy = function(code, ...) gsub(pattern = "#\\s?nolint.*", replacement = "", code))
)
## ----setup, eval=FALSE, opts.label=c("remove_linter_comments")----------------
# library(dplyr)
# library(teal.data)
#
# study_duration_secs <- lubridate::seconds(lubridate::years(2))
#
# sample_fct <- function(x, N, ...) { # nolint: object_name.
# checkmate::assert_number(N)
# factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x)
# }
#
# retain <- function(df, value_var, event, outside = NA) {
# indices <- c(1, which(event == TRUE), nrow(df) + 1)
# values <- c(outside, value_var[event == TRUE])
# rep(values, diff(indices))
# }
#
# relvar_init <- function(relvar1, relvar2) {
# if (length(relvar1) != length(relvar2)) {
# message(simpleError(
# "The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements."
# ))
# return(NA)
# }
# List("relvar1" = relvar1, "relvar2" = relvar2)
# }
#
# rel_var <- function(df = NULL, var_name = NULL, var_values = NULL, related_var = NULL) {
# if (is.null(df)) {
# message("Missing data frame argument value.")
# return(NA)
# } else {
# n_relvar1 <- length(unique(df[, related_var, drop = TRUE]))
# n_relvar2 <- length(var_values)
# if (n_relvar1 != n_relvar2) {
# message(paste("Unequal vector lengths for", related_var, "and", var_name))
# return(NA)
# } else {
# relvar1 <- unique(df[, related_var, drop = TRUE])
# relvar2_values <- rep(NA, nrow(df))
# for (r in seq_len(length(relvar1))) {
# matched <- which(df[, related_var, drop = TRUE] == relvar1[r])
# relvar2_values[matched] <- var_values[r]
# }
# return(relvar2_values)
# }
# }
# }
#
# visit_schedule <- function(visit_format = "WEEK",
# n_assessments = 10L,
# n_days = 5L) {
# if (!(toupper(visit_format) %in% c("WEEK", "CYCLE"))) {
# message("Visit format value must either be: WEEK or CYCLE")
# return(NA)
# }
# if (toupper(visit_format) == "WEEK") {
# assessments <- 1:n_assessments
# assessments_ord <- -1:n_assessments
# visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1))
# } else if (toupper(visit_format) == "CYCLE") {
# cycles <- sort(rep(1:n_assessments, times = 1, each = n_days))
# days <- rep(seq(1:n_days), times = n_assessments, each = 1)
# assessments_ord <- 0:(n_assessments * n_days)
# visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days))
# }
# visit_values <- stats::reorder(factor(visit_values), assessments_ord)
# }
#
# rtpois <- function(n, lambda) stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda)
#
# rtexp <- function(n, rate, l = NULL, r = NULL) {
# if (!is.null(l)) {
# l - log(1 - stats::runif(n)) / rate
# } else if (!is.null(r)) {
# -log(1 - stats::runif(n) * (1 - exp(-r * rate))) / rate
# } else {
# stats::rexp(n, rate)
# }
# }
#
# str_extract <- function(string, pattern) regmatches(string, gregexpr(pattern, string))
#
# with_label <- function(x, label) {
# attr(x, "label") <- as.vector(label)
# x
# }
#
# common_var_labels <- c(
# USUBJID = "Unique Subject Identifier",
# STUDYID = "Study Identifier",
# PARAM = "Parameter",
# PARAMCD = "Parameter Code",
# AVISIT = "Analysis Visit",
# AVISITN = "Analysis Visit (N)",
# AVAL = "Analysis Value",
# AVALU = "Analysis Value Unit",
# AVALC = "Character Result/Finding",
# BASE = "Baseline Value",
# BASE2 = "Screening Value",
# ABLFL = "Baseline Record Flag",
# ABLFL2 = "Screening Record Flag",
# CHG = "Absolute Change from Baseline",
# PCHG = "Percentage Change from Baseline",
# ANRIND = "Analysis Reference Range Indicator",
# BNRIND = "Baseline Reference Range Indicator",
# ANRLO = "Analysis Normal Range Lower Limit",
# ANRHI = "Analysis Normal Range Upper Limit",
# CNSR = "Censor",
# ADTM = "Analysis Datetime",
# ADY = "Analysis Relative Day",
# ASTDY = "Analysis Start Relative Day",
# AENDY = "Analysis End Relative Day",
# ASTDTM = "Analysis Start Datetime",
# AENDTM = "Analysis End Datetime",
# VISITDY = "Planned Study Day of Visit",
# EVNTDESC = "Event or Censoring Description",
# CNSDTDSC = "Censor Date Description",
# BASETYPE = "Baseline Type",
# DTYPE = "Derivation Type",
# ONTRTFL = "On Treatment Record Flag",
# WORS01FL = "Worst Observation in Window Flag 01",
# WORS02FL = "Worst Post-Baseline Observation"
# )
## ----adsl, eval=FALSE, opts.label=c("remove_linter_comments")-----------------
# generate_adsl <- function(N = 200) { # nolint: object_name.
# set.seed(1)
# sys_dtm <- lubridate::fast_strptime("20/2/2019 11:16:16.683", "%d/%m/%Y %H:%M:%OS", tz = "UTC")
# country_site_prob <- c(.5, .121, .077, .077, .075, .052, .046, .025, .014, .003)
#
# adsl <- tibble::tibble(
# STUDYID = rep("AB12345", N) %>% with_label("Study Identifier"),
# COUNTRY = sample_fct(
# c("CHN", "USA", "BRA", "PAK", "NGA", "RUS", "JPN", "GBR", "CAN", "CHE"),
# N,
# prob = country_site_prob
# ) %>% with_label("Country"),
# SITEID = sample_fct(1:20, N, prob = rep(country_site_prob, times = 2)),
# SUBJID = paste("id", seq_len(N), sep = "-") %>% with_label("Subject Identifier for the Study"),
# AGE = (sapply(stats::rchisq(N, df = 5, ncp = 10), max, 0) + 20) %>% with_label("Age"),
# SEX = c("F", "M") %>% sample_fct(N, prob = c(.52, .48)) %>% with_label("Sex"),
# ARMCD = c("ARM A", "ARM B", "ARM C") %>% sample_fct(N) %>% with_label("Planned Arm Code"),
# ARM = dplyr::recode(
# .data$ARMCD,
# "ARM A" = "A: Drug X", "ARM B" = "B: Placebo", "ARM C" = "C: Combination"
# ) %>% with_label("Description of Planned Arm"),
# ACTARMCD = .data$ARMCD %>% with_label("Actual Arm Code"),
# ACTARM = .data$ARM %>% with_label("Description of Actual Arm"),
# RACE = c(
# "ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE",
# "MULTIPLE", "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER", "OTHER", "UNKNOWN"
# ) %>%
# sample_fct(N, prob = c(.55, .23, .16, .05, .004, .003, .002, .002)) %>%
# with_label("Race"),
# TRTSDTM = sys_dtm + sample(seq(0, study_duration_secs), size = N, replace = TRUE) %>%
# with_label("Datetime of First Exposure to Treatment"),
# TRTEDTM = c(TRTSDTM + study_duration_secs) %>%
# with_label("Datetime of Last Exposure to Treatment"),
# EOSDY = ceiling(as.numeric(difftime(TRTEDTM, TRTSDTM, units = "days"))) %>%
# with_label("End of Study Relative Day"),
# EOSDT = lubridate::date(TRTEDTM) %>% with_label("End of Study Date"),
# STRATA1 = c("A", "B", "C") %>% sample_fct(N) %>% with_label("Stratification Factor 1"),
# STRATA2 = c("S1", "S2") %>% sample_fct(N) %>% with_label("Stratification Factor 2"),
# BMRKR1 = stats::rchisq(N, 6) %>% with_label("Continuous Level Biomarker 1"),
# BMRKR2 = sample_fct(c("LOW", "MEDIUM", "HIGH"), N) %>% with_label("Continuous Level Biomarker 2")
# )
#
# # associate sites with countries and regions
# adsl <- adsl %>%
# dplyr::mutate(
# SITEID = paste0(.data$COUNTRY, "-", .data$SITEID) %>% with_label("Study Site Identifier"),
# REGION1 = factor(dplyr::case_when(
# COUNTRY %in% c("NGA") ~ "Africa",
# COUNTRY %in% c("CHN", "JPN", "PAK") ~ "Asia",
# COUNTRY %in% c("RUS") ~ "Eurasia",
# COUNTRY %in% c("GBR") ~ "Europe",
# COUNTRY %in% c("CAN", "USA") ~ "North America",
# COUNTRY %in% c("BRA") ~ "South America",
# TRUE ~ as.character(NA)
# )) %>% with_label("Geographic Region 1"),
# SAFFL = factor("Y") %>% with_label("Safety Population Flag")
# ) %>%
# dplyr::mutate(
# USUBJID = paste(.data$STUDYID, .data$SITEID, .data$SUBJID, sep = "-") %>%
# with_label("Unique Subject Identifier")
# )
#
# # disposition related variables
# # using probability of 1 for the "DEATH" level to ensure at least one death record exists
# l_dcsreas <- list(
# choices = c(
# "ADVERSE EVENT", "DEATH", "LACK OF EFFICACY", "PHYSICIAN DECISION",
# "PROTOCOL VIOLATION", "WITHDRAWAL BY PARENT/GUARDIAN", "WITHDRAWAL BY SUBJECT"
# ),
# prob = c(.2, 1, .1, .1, .2, .1, .1)
# )
# l_dthcat_other <- list(
# choices = c(
# "Post-study reporting of death", "LOST TO FOLLOW UP", "MISSING", "SUICIDE", "UNKNOWN"
# ),
# prob = c(.1, .3, .3, .2, .1)
# )
#
# adsl <- adsl %>%
# dplyr::mutate(
# EOSSTT = dplyr::case_when(
# EOSDY == max(EOSDY, na.rm = TRUE) ~ "COMPLETED",
# EOSDY < max(EOSDY, na.rm = TRUE) ~ "DISCONTINUED",
# is.na(TRTEDTM) ~ "ONGOING"
# ) %>% with_label("End of Study Status")
# ) %>%
# dplyr::mutate(
# EOTSTT = .data$EOSSTT %>% with_label("End of Treatment Status")
# ) %>%
# dplyr::mutate(
# DCSREAS = ifelse(
# .data$EOSSTT == "DISCONTINUED",
# sample(x = l_dcsreas$choices, size = N, replace = TRUE, prob = l_dcsreas$prob),
# as.character(NA)
# ) %>% with_label("Reason for Discontinuation from Study")
# )
#
# tmc_ex_adsl <- adsl %>%
# dplyr::mutate(DTHDT = dplyr::case_when(
# DCSREAS == "DEATH" ~ lubridate::date(TRTEDTM + lubridate::days(sample(0:50, size = N, replace = TRUE)))
# ) %>% with_label("Date of Death"))
#
# save(tmc_ex_adsl, file = "data/tmc_ex_adsl.rda", compress = "xz")
# }
## ----adae, eval=FALSE---------------------------------------------------------
# generate_adae <- function(adsl = tmc_ex_adsl,
# max_n_aes = 5) {
# set.seed(1)
# lookup_ae <- tibble::tribble(
# ~AEBODSYS, ~AELLT, ~AEDECOD, ~AEHLT, ~AEHLGT, ~AETOXGR, ~AESOC, ~AESER, ~AEREL,
# "cl A.1", "llt A.1.1.1.1", "dcd A.1.1.1.1", "hlt A.1.1.1", "hlgt A.1.1", "1", "cl A", "N", "N",
# "cl A.1", "llt A.1.1.1.2", "dcd A.1.1.1.2", "hlt A.1.1.1", "hlgt A.1.1", "2", "cl A", "Y", "N",
# "cl B.1", "llt B.1.1.1.1", "dcd B.1.1.1.1", "hlt B.1.1.1", "hlgt B.1.1", "5", "cl B", "Y", "Y",
# "cl B.2", "llt B.2.1.2.1", "dcd B.2.1.2.1", "hlt B.2.1.2", "hlgt B.2.1", "3", "cl B", "N", "N",
# "cl B.2", "llt B.2.2.3.1", "dcd B.2.2.3.1", "hlt B.2.2.3", "hlgt B.2.2", "1", "cl B", "Y", "N",
# "cl C.1", "llt C.1.1.1.3", "dcd C.1.1.1.3", "hlt C.1.1.1", "hlgt C.1.1", "4", "cl C", "N", "Y",
# "cl C.2", "llt C.2.1.2.1", "dcd C.2.1.2.1", "hlt C.2.1.2", "hlgt C.2.1", "2", "cl C", "N", "Y",
# "cl D.1", "llt D.1.1.1.1", "dcd D.1.1.1.1", "hlt D.1.1.1", "hlgt D.1.1", "5", "cl D", "Y", "Y",
# "cl D.1", "llt D.1.1.4.2", "dcd D.1.1.4.2", "hlt D.1.1.4", "hlgt D.1.1", "3", "cl D", "N", "N",
# "cl D.2", "llt D.2.1.5.3", "dcd D.2.1.5.3", "hlt D.2.1.5", "hlgt D.2.1", "1", "cl D", "N", "Y"
# )
#
# aag <- utils::read.table(
# sep = ",", header = TRUE,
# text = paste(
# "NAMVAR,SRCVAR,GRPTYPE,REFNAME,REFTERM,SCOPE",
# "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 aesi,dcd D.2.1.5.3,",
# "CQ01NAM,AEDECOD,CUSTOM,D.2.1.5.3/A.1.1.1.1 aesi,dcd A.1.1.1.1,",
# "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 aesi,dcd C.1.1.1.3,BROAD",
# "SMQ01NAM,AEDECOD,SMQ,C.1.1.1.3/B.2.2.3.1 aesi,dcd B.2.2.3.1,BROAD",
# "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 aesi,dcd Y.9.9.9.9,NARROW",
# "SMQ02NAM,AEDECOD,SMQ,Y.9.9.9.9/Z.9.9.9.9 aesi,dcd Z.9.9.9.9,NARROW",
# sep = "\n"
# ), stringsAsFactors = FALSE
# )
#
# adae <- Map(
# function(id, sid) {
# n_aes <- sample(c(0, seq_len(max_n_aes)), 1)
# i <- sample(seq_len(nrow(lookup_ae)), n_aes, TRUE)
# dplyr::mutate(
# lookup_ae[i, ],
# USUBJID = id,
# STUDYID = sid
# )
# },
# adsl$USUBJID,
# adsl$STUDYID
# ) %>%
# Reduce(rbind, .) %>%
# `[`(c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8, 9)) %>%
# dplyr::mutate(
# AETERM = gsub("dcd", "trm", .data$AEDECOD) %>% with_label("Reported Term for the Adverse Event"),
# AESEV = dplyr::case_when(
# AETOXGR == 1 ~ "MILD",
# AETOXGR %in% c(2, 3) ~ "MODERATE",
# AETOXGR %in% c(4, 5) ~ "SEVERE"
# ) %>% with_label("Severity/Intensity")
# )
#
# # merge adsl to be able to add AE date and study day variables
# adae <- dplyr::inner_join(adae, adsl, by = c("STUDYID", "USUBJID"), multiple = "all") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::mutate(ASTDTM = sample(
# seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
# size = 1
# )) %>%
# dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
# # add 1 to end of range incase both values passed to sample() are the same
# dplyr::mutate(AENDTM = sample(
# seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
# size = 1
# )) %>%
# dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
# dplyr::mutate(LDOSEDTM = dplyr::case_when(
# TRTSDTM < ASTDTM ~ lubridate::as_datetime(stats::runif(1, TRTSDTM, ASTDTM)),
# TRUE ~ ASTDTM
# )) %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ASTDTM, .data$AETERM)
#
# adae <- adae %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(AESEQ = seq_len(dplyr::n())) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$ASTDTM,
# .data$AETERM,
# .data$AESEQ
# )
#
# outcomes <- c(
# "UNKNOWN",
# "NOT RECOVERED/NOT RESOLVED",
# "RECOVERED/RESOLVED WITH SEQUELAE",
# "RECOVERING/RESOLVING",
# "RECOVERED/RESOLVED"
# )
#
# adae <- adae %>%
# dplyr::mutate(
# AEOUT = factor(ifelse(
# .data$AETOXGR == "5",
# "FATAL",
# as.character(sample_fct(outcomes, nrow(adae), prob = c(0.1, 0.2, 0.1, 0.3, 0.3)))
# )) %>% with_label("Outcome of Adverse Event"),
# TRTEMFL = ifelse(.data$ASTDTM >= .data$TRTSDTM, "Y", "") %>%
# with_label("Treatment Emergent Analysis Flag")
# )
#
# l_aag <- split(aag, interaction(aag$NAMVAR, aag$SRCVAR, aag$GRPTYPE, drop = TRUE))
#
# # Create aesi flags
# l_aesi <- lapply(l_aag, function(d_adag, d_adae) {
# names(d_adag)[names(d_adag) == "REFTERM"] <- d_adag$SRCVAR[1]
# names(d_adag)[names(d_adag) == "REFNAME"] <- d_adag$NAMVAR[1]
#
# if (d_adag$GRPTYPE[1] == "CUSTOM") {
# d_adag <- d_adag[-which(names(d_adag) == "SCOPE")]
# } else if (d_adag$GRPTYPE[1] == "SMQ") {
# names(d_adag)[names(d_adag) == "SCOPE"] <- paste0(substr(d_adag$NAMVAR[1], 1, 5), "SC")
# }
#
# d_adag <- d_adag[-which(names(d_adag) %in% c("NAMVAR", "SRCVAR", "GRPTYPE"))]
# d_new <- dplyr::left_join(x = d_adae, y = d_adag, by = intersect(names(d_adae), names(d_adag)))
# d_new[, dplyr::setdiff(names(d_new), names(d_adae)), drop = FALSE]
# }, adae)
# adae <- dplyr::bind_cols(adae, l_aesi)
#
# actions <- c(
# "DOSE RATE REDUCED",
# "UNKNOWN",
# "NOT APPLICABLE",
# "DRUG INTERRUPTED",
# "DRUG WITHDRAWN",
# "DOSE INCREASED",
# "DOSE NOT CHANGED",
# "DOSE REDUCED",
# "NOT EVALUABLE"
# )
#
# tmc_ex_adae <- adae %>%
# dplyr::mutate(
# AEACN = factor(ifelse(
# .data$AETOXGR == "5",
# "NOT EVALUABLE",
# as.character(sample_fct(actions, nrow(adae), prob = c(0.05, 0.05, 0.05, 0.01, 0.05, 0.1, 0.45, 0.1, 0.05)))
# )) %>% with_label("Action Taken With Study Treatment")
# ) %>%
# col_relabel(
# AEBODSYS = "Body System or Organ Class",
# AELLT = "Lowest Level Term",
# AEDECOD = "Dictionary-Derived Term",
# AEHLT = "High Level Term",
# AEHLGT = "High Level Group Term",
# AETOXGR = "Analysis Toxicity Grade",
# AESOC = "Primary System Organ Class",
# AESER = "Serious Event",
# AEREL = "Analysis Causality",
# AESEQ = "Sponsor-Defined Identifier",
# LDOSEDTM = "End Time/Time of Last Dose",
# CQ01NAM = "CQ 01 Reference Name",
# SMQ01NAM = "SMQ 01 Reference Name",
# SMQ01SC = "SMQ 01 Scope",
# SMQ02NAM = "SMQ 02 Reference Name",
# SMQ02SC = "SMQ 02 Scope"
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adae)[is.na(col_labels(tmc_ex_adae))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adae[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adae, file = "data/tmc_ex_adae.rda", compress = "xz")
# }
## ----adaette, eval=FALSE------------------------------------------------------
# generate_adaette <- function(adsl = tmc_ex_adsl) {
# set.seed(1)
# lookup_adaette <- tibble::tribble(
# ~ARM, ~CATCD, ~CAT, ~LAMBDA, ~CNSR_P,
# "ARM A", "1", "any adverse event", 1 / 80, 0.4,
# "ARM B", "1", "any adverse event", 1 / 100, 0.2,
# "ARM C", "1", "any adverse event", 1 / 60, 0.42,
# "ARM A", "2", "any serious adverse event", 1 / 100, 0.3,
# "ARM B", "2", "any serious adverse event", 1 / 150, 0.1,
# "ARM C", "2", "any serious adverse event", 1 / 80, 0.32,
# "ARM A", "3", "a grade 3-5 adverse event", 1 / 80, 0.2,
# "ARM B", "3", "a grade 3-5 adverse event", 1 / 100, 0.08,
# "ARM C", "3", "a grade 3-5 adverse event", 1 / 60, 0.23
# )
# evntdescr_sel <- "Preferred Term"
# cnsdtdscr_sel <- c(
# "Clinical Cut Off",
# "Completion or Discontinuation",
# "End of AE Reporting Period"
# )
#
# random_patient_data <- function(patient_info) {
# startdt <- lubridate::date(patient_info$TRTSDTM)
# trtedtm <- lubridate::floor_date(dplyr::case_when(
# is.na(patient_info$TRTEDTM) ~ lubridate::date(patient_info$TRTSDTM) + study_duration_secs,
# TRUE ~ lubridate::date(patient_info$TRTEDTM)
# ), unit = "day")
# enddts <- c(patient_info$EOSDT, lubridate::date(trtedtm))
# enddts_min_index <- which.min(enddts)
# adt <- enddts[enddts_min_index]
# adtm <- lubridate::as_datetime(adt)
# ady <- as.numeric(adt - startdt + 1)
# data.frame(
# ARM = patient_info$ARM,
# STUDYID = patient_info$STUDYID,
# SITEID = patient_info$SITEID,
# USUBJID = patient_info$USUBJID,
# PARAMCD = "AEREPTTE",
# PARAM = "Time to end of AE reporting period",
# CNSR = 0,
# AVAL = lubridate::days(ady) / lubridate::years(1),
# AVALU = "YEARS",
# EVNTDESC = ifelse(enddts_min_index == 1, "Completion or Discontinuation", "End of AE Reporting Period"),
# CNSDTDSC = NA,
# ADTM = adtm,
# ADY = ady,
# stringsAsFactors = FALSE
# )
# }
#
# paramcd_hy <- c("HYSTTEUL", "HYSTTEBL")
# param_hy <- c("Time to Hy's Law Elevation in relation to ULN", "Time to Hy's Law Elevation in relation to Baseline")
# param_init_list <- relvar_init(param_hy, paramcd_hy)
# adsl_hy <- dplyr::select(adsl, "STUDYID", "USUBJID", "TRTSDTM", "SITEID", "ARM")
# adaette_hy <- expand.grid(
# STUDYID = unique(adsl$STUDYID),
# USUBJID = adsl$USUBJID,
# PARAM = as.factor(param_init_list$relvar1),
# stringsAsFactors = FALSE
# )
#
# adaette_hy <- dplyr::left_join(adaette_hy, adsl_hy, by = c("STUDYID", "USUBJID"), multiple = "all") %>%
# dplyr::mutate(
# PARAMCD = factor(rel_var(
# df = as.data.frame(adaette_hy),
# var_values = param_init_list$relvar2,
# related_var = "PARAM"
# ))
# ) %>%
# dplyr::mutate(
# CNSR = sample(c(0, 1), prob = c(0.1, 0.9), size = dplyr::n(), replace = TRUE),
# EVNTDESC = dplyr::if_else(
# .data$CNSR == 0,
# "First Post-Baseline Raised ALT or AST Elevation Result",
# NA_character_
# ),
# CNSDTDSC = dplyr::if_else(.data$CNSR == 0, NA_character_,
# sample(c("Last Post-Baseline ALT or AST Result", "Treatment Start"),
# prob = c(0.9, 0.1),
# size = dplyr::n(), replace = TRUE
# )
# )
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(ADTM = dplyr::case_when(
# CNSDTDSC == "Treatment Start" ~ TRTSDTM,
# TRUE ~ TRTSDTM + sample(seq(0, study_duration_secs), size = dplyr::n(), replace = TRUE)
# )) %>%
# dplyr::mutate(
# ADY_int = lubridate::date(ADTM) - lubridate::date(TRTSDTM) + 1,
# ADY = as.numeric(ADY_int),
# AVAL = lubridate::days(ADY_int) / lubridate::weeks(1),
# AVALU = "WEEKS"
# ) %>%
# dplyr::select(-TRTSDTM, -ADY_int)
#
# random_ae_data <- function(lookup_info, patient_info, patient_data) {
# cnsr <- sample(c(0, 1), 1, prob = c(1 - lookup_info$CNSR_P, lookup_info$CNSR_P))
# ae_rep_tte <- patient_data$AVAL[patient_data$PARAMCD == "AEREPTTE"]
# data.frame(
# ARM = rep(patient_data$ARM, 2),
# STUDYID = rep(patient_data$STUDYID, 2),
# SITEID = rep(patient_data$SITEID, 2),
# USUBJID = rep(patient_data$USUBJID, 2),
# PARAMCD = c(
# paste0("AETTE", lookup_info$CATCD),
# paste0("AETOT", lookup_info$CATCD)
# ),
# PARAM = c(
# paste("Time to first occurrence of", lookup_info$CAT),
# paste("Number of occurrences of", lookup_info$CAT)
# ),
# CNSR = c(cnsr, NA),
# AVAL = c(
# ifelse(cnsr == 1, ae_rep_tte, rtexp(1, lookup_info$LAMBDA * 365.25, r = ae_rep_tte)),
# ifelse(cnsr == 1, 0, rtpois(1, lookup_info$LAMBDA * 365.25))
# ),
# AVALU = c("YEARS", NA),
# EVNTDESC = c(ifelse(cnsr == 0, sample(evntdescr_sel, 1), ""), NA),
# CNSDTDSC = c(ifelse(cnsr == 1, sample(cnsdtdscr_sel, 1), ""), NA),
# stringsAsFactors = FALSE
# ) %>% dplyr::mutate(
# ADY = dplyr::if_else(is.na(AVALU), NA_real_, ceiling(as.numeric(lubridate::dyears(AVAL), "days"))),
# ADTM = dplyr::if_else(
# is.na(AVALU),
# lubridate::as_datetime(NA),
# patient_info$TRTSDTM + lubridate::days(ADY)
# )
# )
# }
#
# adaette <- split(adsl, adsl$USUBJID) %>%
# lapply(function(patient_info) {
# patient_data <- random_patient_data(patient_info)
# lookup_arm <- lookup_adaette %>%
# dplyr::filter(.data$ARM == as.character(patient_info$ARMCD))
# ae_data <- split(lookup_arm, lookup_arm$CATCD) %>%
# lapply(random_ae_data, patient_data = patient_data, patient_info = patient_info) %>%
# Reduce(rbind, .)
# dplyr::bind_rows(patient_data, ae_data)
# }) %>%
# Reduce(rbind, .)
# adaette <- rbind(adaette, adaette_hy)
#
# tmc_ex_adaette <- adsl %>%
# dplyr::inner_join(
# dplyr::select(adaette, -"SITEID", -"ARM"),
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::arrange(.data$ADTM) %>%
# dplyr::mutate(PARAM = as.factor(.data$PARAM)) %>%
# dplyr::mutate(PARAMCD = as.factor(.data$PARAMCD)) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$ADTM
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adaette)[is.na(col_labels(tmc_ex_adaette))]),
# function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adaette[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adaette, file = "data/tmc_ex_adaette.rda", compress = "xz")
# }
## ----adcm, eval=FALSE---------------------------------------------------------
# generate_adcm <- function(adsl = tmc_ex_adsl,
# max_n_cms = 5L) {
# set.seed(1)
# lookup_cm <- tibble::tribble(
# ~CMCLAS, ~CMDECOD, ~ATIREL,
# "medcl A", "medname A_1/3", "PRIOR",
# "medcl A", "medname A_2/3", "CONCOMITANT",
# "medcl A", "medname A_3/3", "CONCOMITANT",
# "medcl B", "medname B_1/4", "CONCOMITANT",
# "medcl B", "medname B_2/4", "PRIOR",
# "medcl B", "medname B_3/4", "PRIOR",
# "medcl B", "medname B_4/4", "CONCOMITANT",
# "medcl C", "medname C_1/2", "CONCOMITANT",
# "medcl C", "medname C_2/2", "CONCOMITANT"
# )
#
# adcm <- Map(function(id, sid) {
# n_cms <- sample(c(0, seq_len(max_n_cms)), 1)
# i <- sample(seq_len(nrow(lookup_cm)), n_cms, TRUE)
# dplyr::mutate(
# lookup_cm[i, ],
# USUBJID = id,
# STUDYID = sid
# )
# }, adsl$USUBJID, adsl$STUDYID) %>%
# Reduce(rbind, .) %>%
# `[`(c(4, 5, 1, 2, 3)) %>%
# dplyr::mutate(CMCAT = .data$CMCLAS %>% with_label("Category for Medication"))
#
# # merge adsl to be able to add CM date and study day variables
# adcm <- dplyr::inner_join(
# adcm,
# adsl,
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::mutate(ASTDTM = sample(
# seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
# size = 1
# )) %>%
# dplyr::mutate(ASTDY = ceiling(difftime(ASTDTM, TRTSDTM, units = "days"))) %>%
# # add 1 to end of range incase both values passed to sample() are the same
# dplyr::mutate(AENDTM = sample(
# seq(lubridate::as_datetime(ASTDTM), lubridate::as_datetime(TRTENDT + 1), by = "day"),
# size = 1
# )) %>%
# dplyr::mutate(AENDY = ceiling(difftime(AENDTM, TRTSDTM, units = "days"))) %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(STUDYID, USUBJID, ASTDTM)
#
# tmc_ex_adcm <- adcm %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(CMSEQ = seq_len(dplyr::n())) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ASTDTM, .data$CMSEQ) %>%
# dplyr::mutate(
# ATC1 = paste("ATCCLAS1", substr(.data$CMDECOD, 9, 9)) %>% with_label("ATC Level 1 Text"),
# ATC2 = paste("ATCCLAS2", substr(.data$CMDECOD, 9, 9)) %>% with_label("ATC Level 2 Text"),
# ATC3 = paste("ATCCLAS3", substr(.data$CMDECOD, 9, 9)) %>% with_label("ATC Level 3 Text"),
# ATC4 = paste("ATCCLAS4", substr(.data$CMDECOD, 9, 9)) %>% with_label("ATC Level 4 Text")
# ) %>%
# dplyr::mutate(
# CMINDC = sample(c(
# "Nausea", "Hypertension", "Urticaria", "Fever",
# "Asthma", "Infection", "Diabete", "Diarrhea", "Pneumonia"
# ), dplyr::n(), replace = TRUE) %>% with_label("Indication"),
# CMDOSE = sample(1:99, dplyr::n(), replace = TRUE) %>% with_label("Dose per Administration"),
# CMTRT = substr(.data$CMDECOD, 9, 13) %>% with_label("Reported Name of Drug, Med, or Therapy"),
# CMDOSU = sample(c(
# "ug/mL", "ug/kg/day", "%", "uL", "DROP",
# "umol/L", "mg", "mg/breath", "ug"
# ), dplyr::n(), replace = TRUE) %>% with_label("Dose Units")
# ) %>%
# dplyr::mutate(
# CMROUTE = sample(c(
# "INTRAVENOUS", "ORAL", "NASAL",
# "INTRAMUSCULAR", "SUBCUTANEOUS", "INHALED", "RECTAL", "UNKNOWN"
# ), dplyr::n(), replace = TRUE) %>% with_label("Route of Administration"),
# CMDOSFRQ = sample(c(
# "Q4W", "QN", "Q4H", "UNKNOWN", "TWICE",
# "Q4H", "QD", "TID", "4 TIMES PER MONTH"
# ), dplyr::n(), replace = TRUE) %>% with_label("Dosing Frequency per Interval")
# ) %>%
# col_relabel(
# CMCLAS = "Medication Class",
# CMDECOD = "Standardized Medication Name",
# ATIREL = "Time Relation of Medication",
# CMSEQ = "Sponsor-Defined Identifier"
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adcm)[is.na(col_labels(tmc_ex_adcm))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adcm[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adcm, file = "data/tmc_ex_adcm.rda", compress = "xz")
# }
## ----adeg, eval=FALSE---------------------------------------------------------
# generate_adeg <- function(adsl = tmc_ex_adsl,
# n_assessments = 3L,
# n_days = 3L,
# max_n_eg = 3L) {
# set.seed(1)
# param <- c("QT Duration", "RR Duration", "Heart Rate", "ECG Interpretation")
# paramcd <- c("QT", "RR", "HR", "ECGINTP")
# paramu <- c("msec", "msec", "beats/min", "")
# visit_format <- "WEEK"
#
# param_init_list <- relvar_init(param, paramcd)
# unit_init_list <- relvar_init(param, paramu)
#
# adeg <- expand.grid(
# STUDYID = unique(adsl$STUDYID),
# USUBJID = adsl$USUBJID,
# PARAM = as.factor(param_init_list$relvar1),
# AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
# stringsAsFactors = FALSE
# )
#
# adeg$PARAMCD <- as.factor(rel_var(
# df = adeg,
# var_name = "PARAMCD",
# var_values = param_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# adeg <- adeg %>% dplyr::mutate(AVAL = dplyr::case_when(
# .data$PARAMCD == "QT" ~ stats::rnorm(nrow(adeg), mean = 350, sd = 100),
# .data$PARAMCD == "RR" ~ stats::rnorm(nrow(adeg), mean = 1050, sd = 300),
# .data$PARAMCD == "HR" ~ stats::rnorm(nrow(adeg), mean = 70, sd = 20),
# .data$PARAMCD == "ECGINTP" ~ NA_real_
# ))
#
# adeg <- adeg %>% dplyr::mutate(AVISITN = dplyr::case_when(
# AVISIT == "SCREENING" ~ -1,
# AVISIT == "BASELINE" ~ 0,
# (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
# TRUE ~ NA_real_
# ))
#
# adeg$AVALU <- as.factor(rel_var(
# df = adeg,
# var_name = "AVALU",
# var_values = unit_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# adeg <- adeg[order(adeg$STUDYID, adeg$USUBJID, adeg$PARAMCD, adeg$AVISITN), ]
# adeg <- Reduce(rbind, lapply(split(adeg, adeg$USUBJID), function(x) {
# x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
# x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
# "Y",
# ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "")
# )
# x
# }))
#
# adeg$BASE <- ifelse(adeg$AVISITN >= 0, retain(adeg, adeg$AVAL, adeg$ABLFL == "Y"), adeg$AVAL)
# adeg <- adeg %>%
# dplyr::mutate(ANRLO = dplyr::case_when(
# .data$PARAMCD == "QT" ~ 200,
# .data$PARAMCD == "RR" ~ 600,
# .data$PARAMCD == "HR" ~ 40,
# .data$PARAMCD == "ECGINTP" ~ NA_real_
# )) %>%
# dplyr::mutate(ANRHI = dplyr::case_when(
# .data$PARAMCD == "QT" ~ 500,
# .data$PARAMCD == "RR" ~ 1500,
# .data$PARAMCD == "HR" ~ 100,
# .data$PARAMCD == "ECGINTP" ~ NA_real_
# )) %>%
# dplyr::mutate(ANRIND = factor(dplyr::case_when(
# .data$AVAL < .data$ANRLO ~ "LOW",
# .data$AVAL >= .data$ANRLO & .data$AVAL <= .data$ANRHI ~ "NORMAL",
# .data$AVAL > .data$ANRHI ~ "HIGH"
# )))
#
# adeg <- adeg %>%
# dplyr::mutate(CHG = ifelse(.data$AVISITN > 0, .data$AVAL - .data$BASE, NA)) %>%
# dplyr::mutate(PCHG = ifelse(.data$AVISITN > 0, 100 * (.data$CHG / .data$BASE), NA)) %>%
# dplyr::mutate(BASETYPE = "LAST") %>%
# dplyr::group_by(.data$USUBJID, .data$PARAMCD, .data$BASETYPE) %>%
# dplyr::mutate(BNRIND = .data$ANRIND[.data$ABLFL == "Y"]) %>%
# dplyr::ungroup() %>%
# dplyr::mutate(DTYPE = NA)
#
# adeg$ANRIND <- factor(adeg$ANRIND, levels = c("LOW", "NORMAL", "HIGH"))
# adeg$BNRIND <- factor(adeg$BNRIND, levels = c("LOW", "NORMAL", "HIGH"))
#
# adeg <- dplyr::inner_join(
# adsl,
# adeg,
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::ungroup() %>%
# dplyr::group_by(USUBJID) %>%
# dplyr::arrange(USUBJID, AVISITN) %>%
# dplyr::mutate(ADTM = rep(
# sort(sample(
# seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
# size = nlevels(AVISIT)
# )),
# each = n() / nlevels(AVISIT)
# )) %>%
# dplyr::ungroup() %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ADTM)
#
# adeg <- adeg %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$BASETYPE,
# .data$AVISITN,
# .data$DTYPE,
# .data$ADTM
# )
#
# adeg <- adeg %>%
# dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
# is.na(.data$TRTSDTM) ~ "",
# is.na(.data$ADTM) ~ "Y",
# (.data$ADTM < .data$TRTSDTM) ~ "",
# (.data$ADTM > .data$TRTEDTM) ~ "",
# TRUE ~ "Y"
# ))) %>%
# dplyr::mutate(AVALC = ifelse(
# .data$PARAMCD == "ECGINTP",
# as.character(sample_fct(c("ABNORMAL", "NORMAL"), nrow(adeg), prob = c(0.25, 0.75))),
# as.character(.data$AVAL)
# ))
#
# adeg <- adeg %>% dplyr::mutate(row_check = seq_len(nrow(adeg)))
# get_groups <- function(data, minimum) {
# data <- data %>%
# dplyr::group_by(.data$USUBJID, .data$PARAMCD, .data$BASETYPE) %>%
# dplyr::arrange(.data$ADTM) %>%
# dplyr::filter(
# (.data$AVISIT != "BASELINE" & .data$AVISIT != "SCREENING") &
# (.data$ONTRTFL == "Y" | .data$ADTM <= .data$TRTSDTM)
# ) %>%
# {
# if (minimum == TRUE) {
# dplyr::filter(., .data$AVAL == min(.data$AVAL)) %>%
# dplyr::mutate(., DTYPE = "MINIMUM", AVISIT = "POST-BASELINE MINIMUM")
# } else {
# dplyr::filter(., .data$AVAL == max(.data$AVAL)) %>%
# dplyr::mutate(., DTYPE = "MAXIMUM", AVISIT = "POST-BASELINE MAXIMUM")
# }
# } %>%
# dplyr::slice(1) %>%
# dplyr::ungroup()
# data
# }
#
# lbls <- col_labels(adeg)
# adeg <- rbind(adeg, get_groups(adeg, TRUE), get_groups(adeg, FALSE)) %>%
# dplyr::arrange(.data$row_check) %>%
# dplyr::group_by(.data$USUBJID, .data$PARAMCD, .data$BASETYPE) %>%
# dplyr::arrange(.data$AVISIT, .by_group = TRUE) %>%
# dplyr::ungroup()
# col_labels(adeg) <- lbls
#
# adeg <- adeg[, -which(names(adeg) %in% c("row_check"))]
# flag_variables <- function(data, worst_obs) {
# data_compare <- data %>%
# dplyr::mutate(row_check = seq_len(nrow(data)))
# data <- data_compare %>%
# {
# if (worst_obs == FALSE) {
# dplyr::group_by(., .data$USUBJID, .data$PARAMCD, .data$BASETYPE, .data$AVISIT) %>%
# dplyr::arrange(., .data$ADTM)
# } else {
# dplyr::group_by(., .data$USUBJID, .data$PARAMCD, .data$BASETYPE)
# }
# } %>%
# dplyr::filter(
# .data$AVISITN > 0 & (.data$ONTRTFL == "Y" | .data$ADTM <= .data$TRTSDTM) &
# is.na(.data$DTYPE)
# ) %>%
# {
# if (worst_obs == TRUE) {
# dplyr::arrange(., .data$AVALC) %>% dplyr::filter(., ifelse(
# .data$PARAMCD == "ECGINTP",
# ifelse(.data$AVALC == "ABNORMAL", .data$AVALC == "ABNORMAL", .data$AVALC == "NORMAL"),
# .data$AVAL == min(.data$AVAL)
# ))
# } else {
# dplyr::filter(., ifelse(
# .data$PARAMCD == "ECGINTP",
# .data$AVALC == "ABNORMAL" | .data$AVALC == "NORMAL",
# .data$AVAL == min(.data$AVAL)
# ))
# }
# } %>%
# dplyr::slice(1) %>%
# {
# if (worst_obs == TRUE) {
# dplyr::mutate(., new_var = dplyr::case_when(
# (.data$AVALC == "ABNORMAL" | .data$AVALC == "NORMAL") ~ "Y",
# (!is.na(.data$AVAL) & is.na(.data$DTYPE)) ~ "Y",
# TRUE ~ ""
# ))
# } else {
# dplyr::mutate(., new_var = dplyr::case_when(
# (.data$AVALC == "ABNORMAL" | .data$AVALC == "NORMAL") ~ "Y",
# (!is.na(.data$AVAL) & is.na(.data$DTYPE)) ~ "Y",
# TRUE ~ ""
# ))
# }
# } %>%
# dplyr::ungroup()
#
# data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "")
# data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]
#
# data_compare
# }
# adeg <- flag_variables(adeg, FALSE) %>% dplyr::rename(WORS01FL = "new_var")
# adeg <- flag_variables(adeg, TRUE) %>% dplyr::rename(WORS02FL = "new_var")
#
# tmc_ex_adeg <- adeg %>%
# dplyr::group_by(.data$USUBJID, .data$PARAMCD, .data$BASETYPE) %>%
# dplyr::mutate(BASEC = ifelse(
# .data$PARAMCD == "ECGINTP",
# .data$AVALC[.data$AVISIT == "BASELINE"],
# as.character(.data$BASE)
# )) %>%
# dplyr::ungroup() %>%
# col_relabel(BASEC = "Baseline Character Value")
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adeg)[is.na(col_labels(tmc_ex_adeg))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adeg[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adeg, file = "data/tmc_ex_adeg.rda", compress = "xz")
# }
## ----adex, eval=FALSE---------------------------------------------------------
# generate_adex <- function(adsl = tmc_ex_adsl,
# n_assessments = 3L,
# n_days = 3L,
# max_n_exs = 3L) {
# set.seed(1)
# param <- c(
# "Dose administered during constant dosing interval",
# "Number of doses administered during constant dosing interval",
# "Total dose administered",
# "Total number of doses administered"
# )
# paramcd <- c("DOSE", "NDOSE", "TDOSE", "TNDOSE")
# paramu <- c("mg", " ", "mg", " ")
# parcat1 <- c("INDIVIDUAL", "OVERALL")
# parcat2 <- c("Drug A", "Drug B")
# visit_format <- "WEEK"
#
# param_init_list <- relvar_init(param, paramcd)
# unit_init_list <- relvar_init(param, paramu)
#
# adex <- expand.grid(
# STUDYID = unique(adsl$STUDYID),
# USUBJID = adsl$USUBJID,
# PARAM = c(
# rep(
# param_init_list$relvar1[1],
# length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days)))
# ),
# rep(
# param_init_list$relvar1[2],
# length(levels(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days)))
# ),
# param_init_list$relvar1[3:4]
# ),
# stringsAsFactors = FALSE
# )
#
# adex$PARAMCD <- as.factor(rel_var(
# df = adex,
# var_name = "PARAMCD",
# var_values = param_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# adex$AVALU <- as.factor(rel_var(
# df = adex,
# var_name = "AVALU",
# var_values = unit_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# adex <- adex %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(PARCAT_ind = sample(c(1, 2), size = 1)) %>%
# dplyr::mutate(PARCAT2 = ifelse(.data$PARCAT_ind == 1, parcat2[1], parcat2[2])) %>%
# dplyr::select(-"PARCAT_ind")
#
# adex <- adex %>% dplyr::mutate(PARCAT1 = dplyr::case_when(
# (.data$PARAMCD == "TNDOSE" | .data$PARAMCD == "TDOSE") ~ "OVERALL",
# .data$PARAMCD == "DOSE" | .data$PARAMCD == "NDOSE" ~ "INDIVIDUAL"
# ))
#
# adex_visit <- adex %>%
# dplyr::filter(.data$PARAMCD == "DOSE" | .data$PARAMCD == "NDOSE") %>%
# dplyr::mutate(
# AVISIT = rep(visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days), 2)
# )
#
# adex <- dplyr::left_join(
# adex %>%
# dplyr::group_by(
# .data$USUBJID,
# .data$STUDYID,
# .data$PARAM,
# .data$PARAMCD,
# .data$AVALU,
# .data$PARCAT1,
# .data$PARCAT2
# ) %>%
# dplyr::mutate(id = dplyr::row_number()),
# adex_visit %>%
# dplyr::group_by(
# .data$USUBJID,
# .data$STUDYID,
# .data$PARAM,
# .data$PARAMCD,
# .data$AVALU,
# .data$PARCAT1,
# .data$PARCAT2
# ) %>%
# dplyr::mutate(id = dplyr::row_number()),
# by = c("USUBJID", "STUDYID", "PARCAT1", "PARCAT2", "id", "PARAMCD", "PARAM", "AVALU")
# ) %>%
# dplyr::select(-"id")
#
# adex <- adex %>% dplyr::mutate(AVISITN = dplyr::case_when(
# AVISIT == "SCREENING" ~ -1,
# AVISIT == "BASELINE" ~ 0,
# (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
# TRUE ~ 999000
# ))
#
# adex2 <- split(adex, adex$USUBJID) %>%
# lapply(function(pinfo) {
# pinfo %>%
# dplyr::filter(.data$PARAMCD == "DOSE") %>%
# dplyr::group_by(.data$USUBJID, .data$PARCAT2, .data$AVISIT) %>%
# dplyr::mutate(changeind = dplyr::case_when(
# .data$AVISIT == "SCREENING" ~ 0,
# .data$AVISIT != "SCREENING" ~ sample(c(-1, 0, 1),
# size = 1,
# prob = c(0.25, 0.5, 0.25),
# replace = TRUE
# )
# )) %>%
# dplyr::ungroup() %>%
# dplyr::group_by(.data$USUBJID, .data$PARCAT2) %>%
# dplyr::mutate(
# csum = cumsum(.data$changeind),
# changeind = dplyr::case_when(
# .data$csum <= -3 ~ sample(c(0, 1), size = 1, prob = c(0.5, 0.5)),
# .data$csum >= 3 ~ sample(c(0, -1), size = 1, prob = c(0.5, 0.5)),
# TRUE ~ .data$changeind
# )
# ) %>%
# dplyr::mutate(csum = cumsum(.data$changeind)) %>%
# dplyr::ungroup() %>%
# dplyr::group_by(.data$USUBJID, .data$PARCAT2, .data$AVISIT) %>%
# dplyr::mutate(AVAL = dplyr::case_when(
# .data$csum == -2 ~ 480,
# .data$csum == -1 ~ 720,
# .data$csum == 0 ~ 960,
# .data$csum == 1 ~ 1200,
# .data$csum == 2 ~ 1440
# )) %>%
# dplyr::select(-c("csum", "changeind")) %>%
# dplyr::ungroup()
# }) %>%
# Reduce(rbind, .)
#
# adextmp <- dplyr::full_join(adex2, adex, by = names(adex))
# adex <- adextmp %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(AVAL = ifelse(.data$PARAMCD == "NDOSE", 1, .data$AVAL)) %>%
# dplyr::mutate(AVAL = ifelse(
# .data$PARAMCD == "TNDOSE",
# sum(.data$AVAL[.data$PARAMCD == "NDOSE"]),
# .data$AVAL
# )) %>%
# dplyr::ungroup() %>%
# dplyr::group_by(.data$USUBJID, .data$STUDYID, .data$PARCAT2) %>%
# dplyr::mutate(AVAL = ifelse(
# .data$PARAMCD == "TDOSE",
# sum(.data$AVAL[.data$PARAMCD == "DOSE"]),
# .data$AVAL
# ))
#
# adex <- dplyr::inner_join(adsl, adex, by = c("STUDYID", "USUBJID"), multiple = "all") %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::mutate(ASTDTM = sample(
# seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
# size = 1
# )) %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ASTDTM)
#
# adex <- adex %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(EXSEQ = seq_len(dplyr::n())) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$ASTDTM,
# .data$AVISITN
# ) %>%
# col_relabel(
# PARCAT1 = "Parameter Category (Individual/Overall)",
# PARCAT2 = "Parameter Category (Drug A/Drug B)",
# EXSEQ = "Analysis Sequence Number"
# )
#
# visit_levels <- str_extract(levels(adex$AVISIT), pattern = "[0-9]+")
# vl_extracted <- vapply(visit_levels, function(x) as.numeric(x[2]), numeric(1))
# vl_extracted <- c(-1, 1, vl_extracted[!is.na(vl_extracted)])
#
# tmc_ex_adex <- adex %>%
# dplyr::mutate(VISITDY = as.numeric(as.character(factor(AVISIT, labels = vl_extracted)))) %>%
# dplyr::mutate(ASTDTM = lubridate::as_datetime(TRTSDTM) + lubridate::days(VISITDY)) %>%
# dplyr::distinct(USUBJID, .keep_all = TRUE)
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adex)[is.na(col_labels(tmc_ex_adex))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adex[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adex, file = "data/tmc_ex_adex.rda", compress = "xz")
# }
## ----adlb, eval=FALSE---------------------------------------------------------
# generate_adlb <- function(adsl = tmc_ex_adsl,
# n_assessments = 3L,
# n_days = 3L,
# max_n_lbs = 3L) {
# set.seed(1)
# lbcat <- c("CHEMISTRY", "CHEMISTRY", "IMMUNOLOGY")
# param <- c(
# "Alanine Aminotransferase Measurement",
# "C-Reactive Protein Measurement",
# "Immunoglobulin A Measurement"
# )
# paramcd <- c("ALT", "CRP", "IGA")
# paramu <- c("U/L", "mg/L", "g/L")
# aval_mean <- c(20, 1, 2)
# visit_format <- "WEEK"
#
# # validate and initialize related variables
# lbcat_init_list <- relvar_init(param, lbcat)
# param_init_list <- relvar_init(param, paramcd)
# unit_init_list <- relvar_init(param, paramu)
#
# adlb <- expand.grid(
# STUDYID = unique(adsl$STUDYID),
# USUBJID = adsl$USUBJID,
# PARAM = as.factor(param_init_list$relvar1),
# AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
# stringsAsFactors = FALSE
# )
#
# # assign AVAL based on different test
# adlb <- adlb %>%
# dplyr::mutate(AVAL = stats::rnorm(nrow(adlb), mean = 1, sd = 0.2)) %>%
# dplyr::left_join(data.frame(PARAM = param, ADJUST = aval_mean), by = "PARAM") %>%
# dplyr::mutate(AVAL = .data$AVAL * .data$ADJUST) %>%
# dplyr::select(-"ADJUST")
#
# # assign related variable values: PARAMxLBCAT are related
# adlb$LBCAT <- as.factor(rel_var(
# df = adlb,
# var_name = "LBCAT",
# var_values = lbcat_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# # assign related variable values: PARAMxPARAMCD are related
# adlb$PARAMCD <- as.factor(rel_var(
# df = adlb,
# var_name = "PARAMCD",
# var_values = param_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# adlb$AVALU <- as.factor(rel_var(
# df = adlb,
# var_name = "AVALU",
# var_values = unit_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# adlb <- adlb %>% dplyr::mutate(AVISITN = dplyr::case_when(
# AVISIT == "SCREENING" ~ -1,
# AVISIT == "BASELINE" ~ 0,
# (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
# TRUE ~ NA_real_
# ))
#
# adlb <- adlb %>%
# dplyr::mutate(AVISITN = dplyr::case_when(
# AVISIT == "SCREENING" ~ -1,
# AVISIT == "BASELINE" ~ 0,
# (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
# TRUE ~ NA_real_
# ))
#
# # order to prepare for change from screening and baseline values
# adlb <- adlb[order(adlb$STUDYID, adlb$USUBJID, adlb$PARAMCD, adlb$AVISITN), ]
#
# adlb <- Reduce(rbind, lapply(split(adlb, adlb$USUBJID), function(x) {
# x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
# x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
# x$ABLFL <- ifelse(toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
# "Y",
# ifelse(toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1", "Y", "")
# )
# x
# }))
#
# adlb$BASE <- ifelse(adlb$ABLFL2 != "Y", retain(adlb, adlb$AVAL, adlb$ABLFL == "Y"), NA)
# anrind_choices <- c("HIGH", "LOW", "NORMAL")
# adlb <- adlb %>%
# dplyr::mutate(BASETYPE = "LAST") %>%
# dplyr::mutate(ANRIND = sample_fct(anrind_choices, nrow(adlb), prob = c(0.1, 0.1, 0.8))) %>%
# dplyr::mutate(ANRLO = dplyr::case_when(
# .data$PARAMCD == "ALT" ~ 7,
# .data$PARAMCD == "CRP" ~ 8,
# .data$PARAMCD == "IGA" ~ 0.8
# )) %>%
# dplyr::mutate(ANRHI = dplyr::case_when(
# .data$PARAMCD == "ALT" ~ 55,
# .data$PARAMCD == "CRP" ~ 10,
# .data$PARAMCD == "IGA" ~ 3
# )) %>%
# dplyr::mutate(DTYPE = NA) %>%
# dplyr::mutate(
# ATOXGR = factor(dplyr::case_when(
# .data$ANRIND == "LOW" ~ sample(
# c("-1", "-2", "-3", "-4", "-5"),
# nrow(adlb),
# replace = TRUE,
# prob = c(0.30, 0.25, 0.20, 0.15, 0)
# ),
# .data$ANRIND == "HIGH" ~ sample(
# c("1", "2", "3", "4", "5"),
# nrow(adlb),
# replace = TRUE,
# prob = c(0.30, 0.25, 0.20, 0.15, 0)
# ),
# .data$ANRIND == "NORMAL" ~ "0"
# )) %>% with_label("Analysis Toxicity Grade")
# ) %>%
# dplyr::group_by(.data$USUBJID, .data$PARAMCD, .data$BASETYPE) %>%
# dplyr::mutate(BTOXGR = .data$ATOXGR[.data$ABLFL == "Y"]) %>%
# dplyr::ungroup() %>%
# col_relabel(BTOXGR = "Baseline Toxicity Grade")
#
# # High and low descriptions of the different PARAMCD values
# # This is currently hard coded as the GDSR does not have these descriptions yet
# grade_lookup <- tibble::tribble(
# ~PARAMCD, ~ATOXDSCL, ~ATOXDSCH,
# "ALB", "Hypoalbuminemia", NA_character_,
# "ALKPH", NA_character_, "Alkaline phosphatase increased",
# "ALT", NA_character_, "Alanine aminotransferase increased",
# "AST", NA_character_, "Aspartate aminotransferase increased",
# "BILI", NA_character_, "Blood bilirubin increased",
# "CA", "Hypocalcemia", "Hypercalcemia",
# "CHOLES", NA_character_, "Cholesterol high",
# "CK", NA_character_, "CPK increased",
# "CREAT", NA_character_, "Creatinine increased",
# "CRP", NA_character_, "C reactive protein increased",
# "GGT", NA_character_, "GGT increased",
# "GLUC", "Hypoglycemia", "Hyperglycemia",
# "HGB", "Anemia", "Hemoglobin increased",
# "IGA", NA_character_, "Immunoglobulin A increased",
# "POTAS", "Hypokalemia", "Hyperkalemia",
# "LYMPH", "CD4 lymphocytes decreased", NA_character_,
# "PHOS", "Hypophosphatemia", NA_character_,
# "PLAT", "Platelet count decreased", NA_character_,
# "SODIUM", "Hyponatremia", "Hypernatremia",
# "WBC", "White blood cell decreased", "Leukocytosis",
# )
#
# # merge grade_lookup onto adlb
# adlb <- dplyr::left_join(adlb, grade_lookup, by = "PARAMCD")
#
# # merge adsl to be able to add LB date and study day variables
# adlb <- dplyr::inner_join(
# adsl,
# adlb,
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::ungroup() %>%
# dplyr::group_by(USUBJID) %>%
# dplyr::arrange(USUBJID, AVISITN) %>%
# dplyr::mutate(ADTM = rep(
# sort(sample(
# seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
# size = nlevels(AVISIT)
# )),
# each = n() / nlevels(AVISIT)
# )) %>%
# dplyr::ungroup() %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ADTM)
#
# adlb <- adlb %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(LBSEQ = seq_len(dplyr::n())) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$BASETYPE,
# .data$AVISITN,
# .data$DTYPE,
# .data$ADTM,
# .data$LBSEQ
# ) %>%
# col_relabel(LBSEQ = "Lab Test or Examination Sequence Number")
#
# adlb <- adlb %>% dplyr::mutate(ONTRTFL = factor(dplyr::case_when(
# is.na(.data$TRTSDTM) ~ "",
# is.na(.data$ADTM) ~ "Y",
# (.data$ADTM < .data$TRTSDTM) ~ "",
# (.data$ADTM > .data$TRTEDTM) ~ "",
# TRUE ~ "Y"
# )))
#
# flag_variables <- function(data,
# apply_grouping,
# apply_filter,
# apply_mutate) {
# data_compare <- data %>% dplyr::mutate(row_check = seq_len(nrow(data)))
# data <- data_compare %>%
# {
# if (apply_grouping == TRUE) {
# dplyr::group_by(., .data$USUBJID, .data$PARAMCD, .data$BASETYPE, .data$AVISIT)
# } else {
# dplyr::group_by(., .data$USUBJID, .data$PARAMCD, .data$BASETYPE)
# }
# } %>%
# dplyr::arrange(.data$ADTM, .data$LBSEQ) %>%
# {
# if (apply_filter == TRUE) {
# dplyr::filter(
# .,
# (.data$AVISIT != "BASELINE" & .data$AVISIT != "SCREENING") &
# (.data$ONTRTFL == "Y" | .data$ADTM <= .data$TRTSDTM)
# ) %>%
# dplyr::filter(.data$ATOXGR == max(as.numeric(as.character(.data$ATOXGR))))
# } else if (apply_filter == FALSE) {
# dplyr::filter(
# .,
# (.data$AVISIT != "BASELINE" & .data$AVISIT != "SCREENING") &
# (.data$ONTRTFL == "Y" | .data$ADTM <= .data$TRTSDTM)
# ) %>%
# dplyr::filter(.data$ATOXGR == min(as.numeric(as.character(.data$ATOXGR))))
# } else {
# dplyr::filter(
# .,
# .data$AVAL == min(.data$AVAL) &
# (.data$AVISIT != "BASELINE" & .data$AVISIT != "SCREENING") &
# (.data$ONTRTFL == "Y" | .data$ADTM <= .data$TRTSDTM)
# )
# }
# } %>%
# dplyr::slice(1) %>%
# {
# if (apply_mutate == TRUE) {
# dplyr::mutate(., new_var = ifelse(is.na(.data$DTYPE), "Y", ""))
# } else {
# dplyr::mutate(., new_var = ifelse(is.na(.data$AVAL) == FALSE & is.na(.data$DTYPE), "Y", ""))
# }
# } %>%
# dplyr::ungroup()
#
# data_compare$new_var <- ifelse(data_compare$row_check %in% data$row_check, "Y", "")
# data_compare <- data_compare[, -which(names(data_compare) %in% c("row_check"))]
# data_compare
# }
# adlb <- flag_variables(adlb, TRUE, "ELSE", FALSE) %>% dplyr::rename(WORS01FL = "new_var")
# adlb <- flag_variables(adlb, FALSE, TRUE, TRUE) %>% dplyr::rename(WGRHIFL = "new_var")
# adlb <- flag_variables(adlb, FALSE, FALSE, TRUE) %>% dplyr::rename(WGRLOFL = "new_var")
# adlb <- flag_variables(adlb, TRUE, TRUE, TRUE) %>% dplyr::rename(WGRHIVFL = "new_var")
# adlb <- flag_variables(adlb, TRUE, FALSE, TRUE) %>% dplyr::rename(WGRLOVFL = "new_var")
#
# tmc_ex_adlb <- adlb %>% dplyr::mutate(
# ANL01FL = ifelse(
# (.data$ABLFL == "Y" | (.data$WORS01FL == "Y" & is.na(.data$DTYPE))) &
# (.data$AVISIT != "SCREENING"),
# "Y",
# ""
# ) %>% with_label("Analysis Flag 01 Baseline Post-Baseline"),
# PARAM = as.factor(.data$PARAM)
# )
#
# tmc_ex_adlb <- tmc_ex_adlb %>%
# group_by(.data$USUBJID, .data$PARAMCD, .data$BASETYPE) %>%
# mutate(BNRIND = .data$ANRIND[.data$ABLFL == "Y"]) %>%
# ungroup() %>%
# dplyr::mutate(ADY = ceiling(as.numeric(difftime(.data$ADTM, .data$TRTSDTM, units = "days"))))
#
# tmc_ex_adlb$PARAMCD <- as.factor(tmc_ex_adlb$PARAMCD)
# tmc_ex_adlb <- tmc_ex_adlb %>%
# dplyr::mutate(CHG = .data$AVAL - .data$BASE) %>%
# dplyr::mutate(PCHG = 100 * (.data$CHG / .data$BASE)) %>%
# col_relabel(
# LBCAT = "Category for Lab Test",
# ATOXDSCL = "Analysis Toxicity Description Low",
# ATOXDSCH = "Analysis Toxicity Description High",
# WGRHIFL = "Worst High Grade per Patient",
# WGRLOFL = "Worst Low Grade per Patient",
# WGRHIVFL = "Worst High Grade per Patient per Visit",
# WGRLOVFL = "Worst Low Grade per Patient per Visit"
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adlb)[is.na(col_labels(tmc_ex_adlb))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adlb[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adlb, file = "data/tmc_ex_adlb.rda", compress = "xz")
# }
## ----admh, eval=FALSE---------------------------------------------------------
# generate_admh <- function(adsl = tmc_ex_adsl,
# max_n_mhs = 10L) {
# set.seed(1)
# lookup_mh <- tibble::tribble(
# ~MHBODSYS, ~MHDECOD, ~MHSOC,
# "cl A", "trm A_1/2", "cl A",
# "cl A", "trm A_2/2", "cl A",
# "cl B", "trm B_1/3", "cl B",
# "cl B", "trm B_2/3", "cl B",
# "cl B", "trm B_3/3", "cl B",
# "cl C", "trm C_1/2", "cl C",
# "cl C", "trm C_2/2", "cl C",
# "cl D", "trm D_1/3", "cl D",
# "cl D", "trm D_2/3", "cl D",
# "cl D", "trm D_3/3", "cl D"
# )
#
# admh <- Map(
# function(id, sid) {
# n_mhs <- sample(0:max_n_mhs, 1)
# i <- sample(seq_len(nrow(lookup_mh)), n_mhs, TRUE)
# dplyr::mutate(
# lookup_mh[i, ],
# USUBJID = id,
# STUDYID = sid
# )
# },
# adsl$USUBJID,
# adsl$STUDYID
# ) %>%
# Reduce(rbind, .) %>%
# `[`(c(4, 5, 1, 2, 3)) %>%
# dplyr::mutate(MHTERM = .data$MHDECOD %>% with_label("Reported Term for the Medical History"))
#
# admh <- dplyr::inner_join(
# adsl,
# admh,
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::mutate(ASTDTM = sample(
# seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
# size = 1
# )) %>%
# select(-TRTENDT) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ASTDTM, .data$MHTERM) %>%
# dplyr::mutate(MHDISTAT = sample(
# x = c("Resolved", "Ongoing with treatment", "Ongoing without treatment"),
# prob = c(0.6, 0.2, 0.2),
# size = dplyr::n(),
# replace = TRUE
# ) %>% with_label("Status of Disease"))
#
# tmc_ex_admh <- admh %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(MHSEQ = seq_len(dplyr::n())) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ASTDTM) %>%
# col_relabel(
# MHBODSYS = "Body System or Organ Class",
# MHDECOD = "Dictionary-Derived Term",
# MHSOC = "Primary System Organ Class",
# MHSEQ = "Sponsor-Defined Identifier"
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_admh)[is.na(col_labels(tmc_ex_admh))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_admh[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_admh, file = "data/tmc_ex_admh.rda", compress = "xz")
# }
## ----adqs, eval=FALSE---------------------------------------------------------
# generate_adqs <- function(adsl = tmc_ex_adsl,
# n_assessments = 5L,
# n_days = 5L) {
# set.seed(1)
# param <- c(
# "BFI All Questions",
# "Fatigue Interference",
# "Function/Well-Being (GF1,GF3,GF7)",
# "Treatment Side Effects (GP2,C5,GP5)",
# "FKSI-19 All Questions"
# )
# paramcd <- c("BFIALL", "FATIGI", "FKSI-FWB", "FKSI-TSE", "FKSIALL")
# visit_format <- "WEEK"
#
# param_init_list <- relvar_init(param, paramcd)
#
# adqs <- expand.grid(
# STUDYID = unique(adsl$STUDYID),
# USUBJID = adsl$USUBJID,
# PARAM = param_init_list$relvar1,
# AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments, n_days = n_days),
# stringsAsFactors = FALSE
# )
#
# adqs <- dplyr::mutate(
# adqs,
# AVISITN = dplyr::case_when(
# AVISIT == "SCREENING" ~ -1,
# AVISIT == "BASELINE" ~ 0,
# (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
# TRUE ~ NA_real_
# )
# )
#
# adqs$PARAMCD <- rel_var(df = adqs, var_name = "PARAMCD", var_values = param_init_list$relvar2, related_var = "PARAM")
# adqs$AVAL <- stats::rnorm(nrow(adqs), mean = 50, sd = 8) + adqs$AVISITN * stats::rnorm(nrow(adqs), mean = 5, sd = 2)
# adqs <- adqs[order(adqs$STUDYID, adqs$USUBJID, adqs$PARAMCD, adqs$AVISITN), ]
#
# adqs <- Reduce(
# rbind,
# lapply(
# split(adqs, adqs$USUBJID),
# function(x) {
# x$STUDYID <- adsl$STUDYID[which(adsl$USUBJID == x$USUBJID[1])]
# x$ABLFL2 <- ifelse(x$AVISIT == "SCREENING", "Y", "")
# x$ABLFL <- ifelse(
# toupper(visit_format) == "WEEK" & x$AVISIT == "BASELINE",
# "Y",
# ifelse(
# toupper(visit_format) == "CYCLE" & x$AVISIT == "CYCLE 1 DAY 1",
# "Y",
# ""
# )
# )
# x
# }
# )
# )
#
# adqs$BASE <- ifelse(adqs$ABLFL2 != "Y", retain(adqs, adqs$AVAL, adqs$ABLFL == "Y"), NA)
# adqs <- adqs %>% dplyr::mutate(CHG = .data$AVAL - .data$BASE)
#
# adqs <- dplyr::inner_join(
# adsl,
# adqs,
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# ungroup() %>%
# group_by(USUBJID) %>%
# arrange(USUBJID, AVISITN) %>%
# dplyr::mutate(ADTM = rep(
# sort(sample(
# seq(lubridate::as_datetime(TRTSDTM[1]), lubridate::as_datetime(TRTENDT[1]), by = "day"),
# size = nlevels(AVISIT)
# )),
# each = n() / nlevels(AVISIT)
# )) %>%
# dplyr::ungroup() %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ADTM)
#
# tmc_ex_adqs <- adqs %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$AVISITN,
# .data$ADTM
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adqs)[is.na(col_labels(tmc_ex_adqs))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adqs[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adqs, file = "data/tmc_ex_adqs.rda", compress = "xz")
# }
## ----adrs, eval=FALSE, opts.label=c("remove_linter_comments")-----------------
# generate_adrs <- function(adsl = tmc_ex_adsl) {
# set.seed(1)
# param_codes <- stats::setNames(1:5, c("CR", "PR", "SD", "PD", "NE"))
#
# lookup_ars <- expand.grid(
# ARM = c("A: Drug X", "B: Placebo", "C: Combination"),
# AVALC = names(param_codes)
# ) %>% dplyr::mutate(
# AVAL = param_codes[.data$AVALC],
# p_scr = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)),
# p_bsl = c(rep(0, 3), rep(0, 3), c(1, 1, 1), c(0, 0, 0), c(0, 0, 0)),
# p_cycle = c(c(.35, .25, .4), c(.30, .20, .20), c(.2, .25, .3), c(.14, 0.20, 0.18), c(.01, 0.1, 0.02)),
# p_eoi = c(c(.35, .25, .4), c(.30, .20, .20), c(.2, .25, .3), c(.14, 0.20, 0.18), c(.01, 0.1, 0.02)),
# p_fu = c(c(.25, .15, .3), c(.15, .05, .25), c(.3, .25, .3), c(.3, .55, .25), rep(0, 3))
# )
#
# adrs <- split(adsl, adsl$USUBJID) %>%
# lapply(function(pinfo) {
# probs <- dplyr::filter(lookup_ars, .data$ARM == as.character(pinfo$ACTARM))
# # screening
# rsp_screen <- sample(probs$AVALC, 1, prob = probs$p_scr) %>% as.character()
# # baseline
# rsp_bsl <- sample(probs$AVALC, 1, prob = probs$p_bsl) %>% as.character()
# # cycle
# rsp_c2d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character()
# rsp_c4d1 <- sample(probs$AVALC, 1, prob = probs$p_cycle) %>% as.character()
# # end of induction
# rsp_eoi <- sample(probs$AVALC, 1, prob = probs$p_eoi) %>% as.character()
# # follow up
# rsp_fu <- sample(probs$AVALC, 1, prob = probs$p_fu) %>% as.character()
#
# best_rsp <- min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)])
# best_rsp_i <- which.min(param_codes[c(rsp_screen, rsp_bsl, rsp_eoi, rsp_fu, rsp_c2d1, rsp_c4d1)])
#
# avisit <- c("SCREENING", "BASELINE", "CYCLE 2 DAY 1", "CYCLE 4 DAY 1", "END OF INDUCTION", "FOLLOW UP")
#
# # meaningful date information
# TRTSTDT <- lubridate::date(pinfo$TRTSDTM) # nolint: object_name.
# TRTENDT <- lubridate::date(dplyr::if_else( # nolint: object_name.
# !is.na(pinfo$TRTEDTM), pinfo$TRTEDTM,
# lubridate::floor_date(TRTSTDT + study_duration_secs, unit = "day")
# ))
# scr_date <- TRTSTDT - lubridate::days(100)
# bs_date <- TRTSTDT
# flu_date <- sample(seq(lubridate::as_datetime(TRTSTDT), lubridate::as_datetime(TRTENDT), by = "day"), size = 1)
# eoi_date <- sample(seq(lubridate::as_datetime(TRTSTDT), lubridate::as_datetime(TRTENDT), by = "day"), size = 1)
# c2d1_date <- sample(seq(lubridate::as_datetime(TRTSTDT), lubridate::as_datetime(TRTENDT), by = "day"), size = 1)
# c4d1_date <- min(lubridate::date(c2d1_date + lubridate::days(60)), TRTENDT)
#
# tibble::tibble(
# STUDYID = pinfo$STUDYID,
# USUBJID = pinfo$USUBJID,
# PARAMCD = as.factor(c(rep("OVRINV", 6), "BESRSPI", "INVET")),
# PARAM = as.factor(dplyr::recode(
# .data$PARAMCD,
# OVRINV = "Overall Response by Investigator - by visit",
# OVRSPI = "Best Overall Response by Investigator (no confirmation required)",
# BESRSPI = "Best Confirmed Overall Response by Investigator",
# INVET = "Investigator End Of Induction Response"
# )),
# AVALC = c(
# rsp_screen, rsp_bsl, rsp_c2d1, rsp_c4d1, rsp_eoi, rsp_fu,
# names(param_codes)[best_rsp],
# rsp_eoi
# ),
# AVAL = param_codes[.data$AVALC],
# AVISIT = factor(c(avisit, avisit[best_rsp_i], avisit[5]), levels = avisit)
# ) %>%
# merge(
# tibble::tibble(
# AVISIT = avisit,
# ADTM = c(scr_date, bs_date, c2d1_date, c4d1_date, eoi_date, flu_date),
# AVISITN = c(-1, 0, 2, 4, 999, 999),
# TRTSDTM = pinfo$TRTSDTM
# ) %>%
# dplyr::select(-"TRTSDTM"),
# by = "AVISIT"
# )
# }) %>%
# Reduce(rbind, .) %>%
# dplyr::mutate(
# AVALC = factor(.data$AVALC, levels = names(param_codes)),
# DTHFL = factor(sample(c("Y", "N"), nrow(.), replace = TRUE, prob = c(1, 0.8))) %>%
# with_label("Death Flag")
# )
#
# # merge ADSL to be able to add RS date and study day variables
# adrs <- dplyr::inner_join(
# adsl,
# adrs,
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# )
#
# tmc_ex_adrs <- adrs %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$AVISITN,
# .data$ADTM
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adrs)[is.na(col_labels(tmc_ex_adrs))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adrs[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adrs, file = "data/tmc_ex_adrs.rda", compress = "xz")
# }
## ----adtte, eval=FALSE--------------------------------------------------------
# generate_adtte <- function(adsl = tmc_ex_adsl) {
# set.seed(1)
# lookup_tte <- tibble::tribble(
# ~ARM, ~PARAMCD, ~PARAM, ~LAMBDA, ~CNSR_P,
# "ARM A", "OS", "Overall Survival", log(2) / 610, 0.4,
# "ARM B", "OS", "Overall Survival", log(2) / 490, 0.3,
# "ARM C", "OS", "Overall Survival", log(2) / 365, 0.2,
# "ARM A", "PFS", "Progression Free Survival", log(2) / 365, 0.4,
# "ARM B", "PFS", "Progression Free Survival", log(2) / 305, 0.3,
# "ARM C", "PFS", "Progression Free Survival", log(2) / 243, 0.2,
# "ARM A", "EFS", "Event Free Survival", log(2) / 365, 0.4,
# "ARM B", "EFS", "Event Free Survival", log(2) / 305, 0.3,
# "ARM C", "EFS", "Event Free Survival", log(2) / 243, 0.2,
# "ARM A", "CRSD", "Duration of Confirmed Response", log(2) / 305, 0.4,
# "ARM B", "CRSD", "Duration of Confirmed Response", log(2) / 243, 0.3,
# "ARM C", "CRSD", "Duration of Confirmed Response", log(2) / 182, 0.2
# )
#
# evntdescr_sel <- c(
# "Death",
# "Disease Progression",
# "Last Tumor Assessment",
# "Adverse Event",
# "Last Date Known To Be Alive"
# )
#
# cnsdtdscr_sel <- c(
# "Preferred Term",
# "Clinical Cut Off",
# "Completion or Discontinuation",
# "End of AE Reporting Period"
# )
#
# adtte <- split(adsl, adsl$USUBJID) %>%
# lapply(FUN = function(pinfo) {
# lookup_tte %>%
# dplyr::filter(.data$ARM == as.character(pinfo$ACTARMCD)) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# STUDYID = pinfo$STUDYID,
# USUBJID = pinfo$USUBJID,
# CNSR = sample(c(0, 1), 1, prob = c(1 - .data$CNSR_P, .data$CNSR_P)),
# AVAL = stats::rexp(1, .data$LAMBDA),
# AVALU = "DAYS",
# EVNTDESC = if (.data$CNSR == 1) {
# sample(evntdescr_sel[-c(1:2)], 1)
# } else {
# ifelse(.data$PARAMCD == "OS",
# sample(evntdescr_sel[1], 1),
# sample(evntdescr_sel[c(1:2)], 1)
# )
# }
# ) %>%
# dplyr::select(-"LAMBDA", -"CNSR_P")
# }) %>%
# Reduce(rbind, .)
#
# # merge ADSL to be able to add TTE date and study day variables
# adtte <- dplyr::inner_join(
# adsl,
# dplyr::select(adtte, -"ARM"),
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::mutate(ADTM = sample(
# seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
# size = 1
# )) %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ADTM)
#
# adtte <- adtte %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::mutate(PARAM = as.factor(.data$PARAM)) %>%
# dplyr::mutate(PARAMCD = as.factor(.data$PARAMCD)) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$ADTM
# )
# lbls <- col_labels(adtte)
#
# # adding adverse event counts and log follow-up time
# tmc_ex_adtte <- dplyr::bind_rows(
# adtte,
# data.frame(adtte %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::slice_head(n = 1) %>%
# dplyr::mutate(
# PARAMCD = "TNE",
# PARAM = "Total Number of Exacerbations",
# AVAL = stats::rpois(1, 3),
# AVALU = "COUNT",
# lgTMATRSK = log(stats::rexp(1, rate = 3)),
# dplyr::across(c("ADTM", "EVNTDESC"), ~NA)
# ))
# ) %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$ADTM
# )
# col_labels(tmc_ex_adtte) <- c(lbls, lgTMATRSK = "Log Time At Risk")
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_adtte)[is.na(col_labels(tmc_ex_adtte))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_adtte[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_adtte, file = "data/tmc_ex_adtte.rda", compress = "xz")
# }
## ----advs, eval=FALSE---------------------------------------------------------
# generate_advs <- function(adsl = tmc_ex_adsl,
# n_assessments = 5L,
# n_days = 5L) {
# set.seed(1)
# param <- c(
# "Diastolic Blood Pressure",
# "Pulse Rate",
# "Respiratory Rate",
# "Systolic Blood Pressure",
# "Temperature", "Weight"
# )
# paramcd <- c("DIABP", "PULSE", "RESP", "SYSBP", "TEMP", "WEIGHT")
# paramu <- c("Pa", "beats/min", "breaths/min", "Pa", "C", "Kg")
# visit_format <- "WEEK"
#
# param_init_list <- relvar_init(param, paramcd)
# unit_init_list <- relvar_init(param, paramu)
#
# advs <- expand.grid(
# STUDYID = unique(adsl$STUDYID),
# USUBJID = adsl$USUBJID,
# PARAM = as.factor(param_init_list$relvar1),
# AVISIT = visit_schedule(visit_format = visit_format, n_assessments = n_assessments),
# stringsAsFactors = FALSE
# )
#
# advs <- dplyr::mutate(
# advs,
# AVISITN = dplyr::case_when(
# AVISIT == "SCREENING" ~ -1,
# AVISIT == "BASELINE" ~ 0,
# (grepl("^WEEK", AVISIT) | grepl("^CYCLE", AVISIT)) ~ as.numeric(AVISIT) - 2,
# TRUE ~ NA_real_
# )
# )
#
# advs$PARAMCD <- as.factor(rel_var(
# df = advs,
# var_name = "PARAMCD",
# var_values = param_init_list$relvar2,
# related_var = "PARAM"
# ))
# advs$AVALU <- as.factor(rel_var(
# df = advs,
# var_name = "AVALU",
# var_values = unit_init_list$relvar2,
# related_var = "PARAM"
# ))
#
# advs$AVAL <- stats::rnorm(nrow(advs), mean = 50, sd = 8)
# advs <- advs[order(advs$STUDYID, advs$USUBJID, advs$PARAMCD, advs$AVISITN), ]
#
# advs <- dplyr::inner_join(
# adsl,
# advs,
# by = c("STUDYID", "USUBJID"),
# multiple = "all"
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(TRTENDT = lubridate::date(dplyr::case_when(
# is.na(TRTEDTM) ~ lubridate::floor_date(lubridate::date(TRTSDTM) + study_duration_secs, unit = "day"),
# TRUE ~ TRTEDTM
# ))) %>%
# dplyr::mutate(ADTM = sample(
# seq(lubridate::as_datetime(TRTSDTM), lubridate::as_datetime(TRTENDT), by = "day"),
# size = 1
# )) %>%
# dplyr::mutate(ADY = ceiling(difftime(ADTM, TRTSDTM, units = "days"))) %>%
# dplyr::select(-TRTENDT) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(.data$STUDYID, .data$USUBJID, .data$ADTM)
#
# tmc_ex_advs <- advs %>%
# dplyr::group_by(.data$USUBJID) %>%
# dplyr::ungroup() %>%
# dplyr::arrange(
# .data$STUDYID,
# .data$USUBJID,
# .data$PARAMCD,
# .data$AVISITN,
# .data$ADTM
# )
#
# i_lbls <- sapply(
# names(col_labels(tmc_ex_advs)[is.na(col_labels(tmc_ex_advs))]), function(x) which(names(common_var_labels) == x)
# )
# col_labels(tmc_ex_advs[names(i_lbls)]) <- common_var_labels[i_lbls]
#
# save(tmc_ex_advs, file = "data/tmc_ex_advs.rda", compress = "xz")
# }
## ----generate data, eval=FALSE------------------------------------------------
# # Generate & load adsl
# tmp_fol <- getwd()
# setwd(dirname(tmp_fol))
# generate_adsl()
# load("data/tmc_ex_adsl.rda")
#
# # Generate other datasets
# generate_adae()
# generate_adaette()
# generate_adcm()
# generate_adeg()
# generate_adex()
# generate_adlb()
# generate_admh()
# generate_adqs()
# generate_adrs()
# generate_adtte()
# generate_advs()
#
# setwd(tmp_fol)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.