Nothing
## ----setup, echo=FALSE--------------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
collapse = TRUE,
comment = "#>"
)
## ----packages-----------------------------------------------------------------
library(public.ctn0094data)
library(public.ctn0094extra)
library(dplyr)
library(purrr)
library(tibble)
library(tidyr)
library(stringr)
## -----------------------------------------------------------------------------
examplePeople_int <- c(1L, 163L, 210L, 242L, 4L, 17L, 13L, 1103L, 233L, 2089L)
## ----trim-data-1--------------------------------------------------------------
data_ls <- loadRawData(c("randomization", "treatment"))
data_ls$randomization <-
data_ls$randomization %>%
select(who, when, treatment, randomized = which) %>%
# Remove second randomization events
filter(randomized != 2) %>%
# Retain example participants
filter(who %in% examplePeople_int)
data_ls$treatment <-
data_ls$treatment %>%
# Retain example participants
filter(who %in% examplePeople_int)
## ----view-data-1--------------------------------------------------------------
data_ls$randomization
data_ls$treatment
## ----mark-study-drug-days-----------------------------------------------------
treatTimeLong_df <-
# Collapse Data List
data_ls %>%
reduce(.f = full_join, by = c("who", "when")) %>%
as_tibble() %>%
arrange(who, when) %>%
# First pass: find any day with a dose of treatment drug
rowwise() %>%
mutate(
treated = if_else(
condition = !is.na(amount) & amount > 0,
true = TRUE,
false = FALSE,
missing = FALSE
)
)
# Inspect results
treatTimeLong_df
## ----calculate-induction-delay------------------------------------------------
inductDelay_df <-
treatTimeLong_df %>%
# Find the day of the first treatment
group_by(who) %>%
arrange(when) %>%
filter(treated) %>%
slice(1) %>%
mutate(treatStart = when) %>%
select(who, when, treatStart) %>%
# Add the first day back to the original data
left_join(treatTimeLong_df, ., by = c("who", "when")) %>%
group_by(who) %>%
fill(treatStart, .direction = "updown") %>%
# Calculate the delay
filter(randomized == 1) %>%
# This sets time to be missing if the induction was not observed
mutate(inductDelay = treatStart - when) %>%
select(who, treatment, inductDelay) %>%
ungroup()
# Inspect results
inductDelay_df
## ----echo=FALSE---------------------------------------------------------------
eg_df <-
inductDelay_df %>%
filter(inductDelay > 0)
## ----echo=FALSE---------------------------------------------------------------
rm(eg_df)
## ----clean-up-1---------------------------------------------------------------
rm(data_ls, treatTimeLong_df)
## ----backbone-2751------------------------------------------------------------
start_int <- c(`27` = -30L, `51` = -30L)
end_int <- c(`27` = 168L, `51` = 168L)
backbone2751_df <-
CreateProtocolHistory(
start_vec = start_int,
end_vec = end_int
) %>%
filter(who %in% examplePeople_int)
# Inspect
backbone2751_df
## ----backbone-30--------------------------------------------------------------
backbone30_df <-
randomization %>%
full_join(everybody, by = "who") %>%
filter(project == "30") %>%
filter(who %in% examplePeople_int) %>%
CreateCTN30ProtocolHistory() %>%
mutate(project = "30") %>%
select(who, project, when)
# Inspect
backbone30_df
backbone30_df %>%
group_by(who) %>%
summarise(lastDay = max(when))
## ----backbone-first-----------------------------------------------------------
backbone_df <-
bind_rows(backbone2751_df, backbone30_df) %>%
arrange(who)
rm(backbone2751_df, backbone30_df, start_int, end_int)
## ----trim-data-2--------------------------------------------------------------
data_ls <- loadRawData(c("randomization", "visit"))
data_ls$randomization <-
data_ls$randomization %>%
select(who, when, treatment, randomized = which) %>%
# Remove second randomization events
filter(randomized != 2) %>%
# Retain example participants
filter(who %in% examplePeople_int)
data_ls$visit <-
data_ls$visit %>%
filter(who %in% examplePeople_int)
## ----add-randomization-day----------------------------------------------------
timelineRand1_df <-
data_ls$randomization %>%
mutate(randomized = randomized == "1") %>%
# Join to backbone and arrange within subject by day
full_join(backbone_df, by = c("who", "when")) %>%
group_by(who) %>%
arrange(when, .by_group = TRUE) %>%
select(who, project, when, randomized)
# Inspect
timelineRand1_df
## ----add-visit-days-----------------------------------------------------------
timelineVisit1_df <-
data_ls$visit %>%
select(who, when, visit, status = what) %>%
filter(status %in% c("visit", "final")) %>%
mutate(visit = TRUE) %>%
select(who, when, visit) %>%
left_join(timelineRand1_df, ., by = c("who", "when"))
# Inspect
timelineVisit1_df
## ----mark-missing-visits------------------------------------------------------
timelineMissing1_df <- MarkMissing(timelineVisit1_df)
## ----tidy-missing-visit-marks-------------------------------------------------
derived_visitImputed <-
timelineMissing1_df %>%
mutate(visit = as.character(visit)) %>%
replace_na(list(visit = "", visitYM = "")) %>%
mutate(visitImputed = paste0(visit, visitYM)) %>%
mutate(
visitImputed = str_replace(
visitImputed, pattern = "TRUETRUE", replacement = "Present"
)
) %>%
select(who, when, visitImputed) %>%
filter(visitImputed != "") %>%
ungroup()
# Inspect
derived_visitImputed
## ----clean-up-2---------------------------------------------------------------
rm(
backbone_df, data_ls, timelineMissing1_df, timelineRand1_df, timelineVisit1_df
)
## ----backbone-again-----------------------------------------------------------
# CTN-0027 and CTN-0051
start_int <- c(`27` = -30L, `51` = -30L)
end_int <- c(`27` = 168L, `51` = 168L) # 24 weeks
backbone2751_df <- CreateProtocolHistory(
start_vec = start_int, end_vec = end_int
)
# CTN-0030
backbone30_df <-
randomization %>%
full_join(everybody, by = "who") %>%
filter(project == "30") %>%
CreateCTN30ProtocolHistory() %>%
mutate(project = "30") %>%
select(who, project, when)
# All Days
backbone_df <- bind_rows(
backbone2751_df, backbone30_df
) %>%
arrange(who) %>%
mutate(project = factor(project, levels = c("27", "30", "51"))) %>%
filter(who %in% examplePeople_int)
rm(backbone2751_df, backbone30_df, start_int, end_int)
## -----------------------------------------------------------------------------
randomized_df <-
randomization %>%
filter(who %in% examplePeople_int) %>%
mutate(randomized = as.integer(as.character(which))) %>%
select(who, when, randomized) %>%
left_join(everybody, by = "who") %>%
filter( !(randomized == 2 & project %in% c("27", "51")) ) %>%
select(-project)
udsUse2_df <-
backbone_df %>%
left_join(randomized_df, by = c("who", "when")) %>%
left_join(derived_visitImputed, by = c("who", "when")) %>%
left_join(uds, by = c("who", "when")) %>%
# So we can use MarkUse() with UDS data (instead of all_drugs)
mutate(source = "UDS")
## ----eg-visit-day-substances--------------------------------------------------
udsUse2_df %>%
filter(visitImputed == "Present") %>%
filter(!is.na(what)) %>%
filter(who == 17) %>%
print(n = nrow(.))
## ----good-drugs-bad-drugs-----------------------------------------------------
nonStudyOpioids_ls <- list(
"Buprenorphine" = c("Opioid", "Methadone"),
"Methadone" = c("Opioid", "Buprenorphine"),
"Naltrexone" = c("Opioid", "Methadone", "Buprenorphine"),
"Not treated" = c("Opioid", "Methadone", "Buprenorphine")
)
## ----treatment-groups---------------------------------------------------------
treatGroups_ls <-
public.ctn0094data::randomization %>%
filter(who %in% examplePeople_int) %>%
filter(which == 1) %>%
left_join(everybody, by = "who") %>%
select(who, treatment) %>%
mutate(
treat_drug = case_when(
str_detect(treatment, "BUP") ~ "Buprenorphine",
treatment == "Methadone" ~ "Methadone",
treatment == "Inpatient NR-NTX" ~ "Naltrexone"
)
) %>%
select(-treatment) %>%
split(f = .$treat_drug) %>%
map(.f = "who")
# Inspect
treatGroups_ls
## ----mark-non-study-drug-use, warning=TRUE------------------------------------
opioidUse_df <-
udsUse2_df %>%
mutate(
treat_group = case_when(
who %in% treatGroups_ls$Buprenorphine ~ "Buprenorphine",
who %in% treatGroups_ls$Methadone ~ "Methadone",
who %in% treatGroups_ls$Naltrexone ~ "Naltrexone",
TRUE ~ "Not treated"
)
) %>%
split(f = .$treat_group) %>%
# List of data in alphabetical order, so the non-study drugs ls should match
map2(
.y = nonStudyOpioids_ls,
.f = ~{
# REQUIRES "source" COLUMN
MarkUse(
targetDrugs_char = .y,
drugs_df = .x,
# because we have participants with no recorded UDS; in practice DO NOT
# use this command
retainEmptyRows = TRUE
)
}
) %>%
bind_rows() %>%
mutate(
udsOpioid = case_when(
is.na(when) ~ NA,
!is.na(when) ~ TRUE
)
) %>%
select(who, when, udsOpioid)
## ----timeline-to-one-row-per-day----------------------------------------------
timelineUDS_df <-
udsUse2_df %>%
left_join(opioidUse_df, by = c("who", "when")) %>%
select(-what, -source) %>%
# 2,089 rows to 1,994
distinct()
rm(
derived_visitImputed, opioidUse_df, randomized_df, treatGroups_ls, udsUse2_df
)
## ----sets-of-randomized-people------------------------------------------------
wasRandomized_int <-
timelineUDS_df %>%
group_by(who) %>%
summarise(randomized = any(randomized %in% 1:2)) %>%
filter(randomized) %>%
pull(who)
notRandomized_int <-
timelineUDS_df %>%
filter( !(who %in% wasRandomized_int) ) %>%
pull(who) %>%
unique()
# Was randomized:
wasRandomized_int
# Wasn't
notRandomized_int
## ----study-day-ticker-randomized----------------------------------------------
timelineUDS2_df <-
timelineUDS_df %>%
filter(who %in% wasRandomized_int) %>%
group_by(who) %>%
filter(!is.na(randomized)) %>%
mutate(
whenRandomized1 = case_when(randomized == 1 ~ when),
whenRandomized2 = case_when(randomized == 2 ~ when)
) %>%
select(who, when, whenRandomized1, whenRandomized2) %>%
left_join(timelineUDS_df, ., by = c("who", "when")) %>%
filter(who %in% wasRandomized_int) %>%
# Add back in the groupings BEFORE the fill()
group_by(who) %>%
fill(whenRandomized1, .direction = "updown") %>%
fill(whenRandomized2, .direction = "updown") %>%
mutate(daysSinceRand1 = when - whenRandomized1) %>%
mutate(daysSinceRand2 = when - whenRandomized2) %>%
select(-whenRandomized1, -whenRandomized2)
## ----weekly-use-summary-------------------------------------------------------
weeklyUse_df <-
timelineUDS2_df %>%
# The (daysSinceRand1 - 1) adjustment is to ensure that the first study week
# is a full 7 days, since "day 0" is the day before randomization. The "+1"
# at the end is to shift the study week label such that "week 0" is the
# week *BEFORE* treatment, rather than the first week of treatment. So, the
# randomization day is the last day of "week 0" (the pre-treatment period).
mutate(studyWeek1 = (daysSinceRand1 - 1) %/% 7 + 1) %>%
mutate(studyWeek2 = (daysSinceRand2 - 1) %/% 7 + 1) %>%
group_by(who, studyWeek1) %>%
# There are some study weeks with multiple UDS, so we count the number of
# positive and negative UDS per week.
summarise(
nPosUDS = sum(udsOpioid == 1, na.rm = TRUE),
nNegUDS = sum(visitImputed == "Present" & is.na(udsOpioid), na.rm = TRUE),
nMissing = sum(visitImputed == "Missing", na.rm = TRUE),
randWk1 = sum(randomized == 1, na.rm = TRUE) > 0,
randWk2 = sum(randomized == 2 & project == "30", na.rm = TRUE) > 0
) %>%
ungroup()
## ----single-week-symbols------------------------------------------------------
useByWeekRandomized_df <-
weeklyUse_df %>%
mutate(
udsStatus = case_when(
# If we see a positive UDS and no negative UDS, it's positive
nPosUDS > 0 & nNegUDS == 0 ~ "+",
# If we see a negative UDS and no positive UDS, it's negative
nPosUDS == 0 & nNegUDS > 0 ~ "-",
# If we see both positive and negative UDS in a single week, it's both
# (note that we can recode all "B"s to be "+" as necessary)
nPosUDS > 0 & nNegUDS > 0 ~ "*",
# If we don't have any UDS in a week after randomization, it's missing
# UPDATE 2022-03-08: I had this as a 0 originally, and I was using this
# in context of consent, not randomization. This was wrong.
nPosUDS == 0 & nNegUDS == 0 & studyWeek1 >= 1 ~ "o",
# If none of the above are true, but we still have a missing value as
# marked by the MarkMissing() function, then it's missing
nMissing > 0 ~ "o",
# If none of the above conditions are true (probably because it's a week
# before randomization but not during a baseline visit for consent),
# then leave it blank (pre-study)
TRUE ~ "_"
)
) %>%
group_by(who) %>%
# For CTN-0030, Phase II could have started on any day of the week, even in
# the middle of a treatment week. If we try to start counting Phase II
# weeks the day after treatment arms are switched, we can end up with the
# last "week" of Phase I not having 7 days. I'm going to leave the first
# week of Phase II as whatever week the switch happened in.
mutate(
rand1Active = studyWeek1 > 0,
# This returns 0 for any week before the Phase II randomization, and 1 for
# the Phase II randomization week and all subsequent weeks (because the
# randWk2 column is 1 only for the week of second randomization and 0
# all other rows).
rand2Active = cumsum(randWk2),
trialPhase = rand1Active + rand2Active
) %>%
select(
who, studyWeek = studyWeek1, randWk1, randWk2, udsStatus, trialPhase
)
## ----use-patterns-by-phase----------------------------------------------------
weeklyOpioidPatterns_df <-
useByWeekRandomized_df %>%
mutate(
phase = case_when(
trialPhase == 0L ~ "Baseline",
trialPhase == 1L ~ "Phase_1",
trialPhase == 2L ~ "Phase_2"
)
) %>%
group_by(who, phase) %>%
summarise(
usePattern = paste0(udsStatus, collapse = "")
) %>%
pivot_wider(names_from = "phase", values_from = "usePattern")
weeklyOpioidPatterns_df
## ----use-pattern-phases-start-and-end-----------------------------------------
derived_weeklyOpioidPatternRand <-
useByWeekRandomized_df %>%
mutate(
randWeek1 = randWk1 * studyWeek,
randWeek2 = randWk2 * studyWeek
) %>%
summarise(
startWeek = min(studyWeek),
randWeek1 = max(randWeek1),
randWeek2 = if_else( all(randWeek2 == 0), NA_real_, max(randWeek2) ),
endWeek = max(studyWeek)
) %>%
# View this smaller data set before joining. In practice, you can comment out
# this print() command.
print() %>%
left_join(weeklyOpioidPatterns_df, by = "who")
## ----mark-use-non-randomized--------------------------------------------------
nonRandUDS_df <-
backbone_df %>%
filter(who %in% notRandomized_int) %>%
left_join(uds) %>%
# MarkUse() requires a "source" column
mutate(source = "UDS") %>%
MarkUse(
targetDrugs_char = nonStudyOpioids_ls$`Not treated`,
drugs_df = .
) %>%
mutate(
udsOpioid = case_when(
!is.na(when) ~ TRUE,
is.na(when) ~ NA
)
) %>%
select(who, when, udsOpioid) %>%
# Uneccessary here, but some UDS records are duplicated
distinct()
## ----non-randomized-study-week------------------------------------------------
timelineNonRandUDS_df <-
backbone_df %>%
filter(who %in% notRandomized_int) %>%
left_join(nonRandUDS_df, by = c("who", "when")) %>%
# Because this week moves off of the consent date, there is no reason to add
# a `(week - 1)` adjustment
mutate(studyWeek = when %/% 7 + 1) %>%
group_by(who, studyWeek) %>%
summarise(
posUDS = sum(udsOpioid == 1, na.rm = TRUE) > 0
)
## ----single-week-symbols-non-randomized---------------------------------------
weeklyNonRandPatterns_df <-
timelineNonRandUDS_df %>%
mutate(
udsStatus = case_when(
# If they are positive, they are positive
posUDS ~ "+",
# If they aren't positive and it's after the consent week, they are
# missing (because they weren't randomized)
!posUDS & studyWeek >= 1 ~ "o",
# If they aren't positive and it's on or before the consent week, then
# leave it blank (pre-study)
!posUDS & studyWeek < 1 ~ "_"
)
) %>%
mutate(
phase = case_when(
studyWeek < 1 ~ "Baseline",
studyWeek >= 1 ~ "Phase_1"
)
) %>%
# Again, this print is unecessary, but here it make clear what we are doing
print() %>%
group_by(who, phase) %>%
summarise(
usePattern = paste0(udsStatus, collapse = "")
) %>%
pivot_wider(names_from = "phase", values_from = "usePattern") %>%
ungroup()
weeklyNonRandPatterns_df
## ----non-randomized-start-and-end---------------------------------------------
derived_weeklyOpioidPatternNonRand <-
timelineNonRandUDS_df %>%
group_by(who) %>%
summarise(
startWeek = min(studyWeek),
randWeek1 = NA_real_,
randWeek2 = NA_real_,
endWeek = max(studyWeek)
) %>%
left_join(weeklyNonRandPatterns_df, by = "who")
## ----final-product------------------------------------------------------------
derived_weeklyOpioidPattern <-
derived_weeklyOpioidPatternRand %>%
bind_rows(derived_weeklyOpioidPatternNonRand) %>%
arrange(who) %>%
replace_na(list(Phase_2 = ""))
derived_weeklyOpioidPattern
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.