# Tests - No GENIE Access Required ---------------------------------------------
test_that("No specifications- runs with no error", {
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data
), NA)
})
test_that("pull data synapse object is missing", {
expect_error(create_analytic_cohort())
})
test_that("Institution- argument check", {
expect_error(msk <- create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
institution = "MSK"
), NA)
expect_equal("MSK", unique(msk[[1]]$institution))
expect_error(dfci <- create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
institution = "DFCI"
), NA)
expect_equal("DFCI", unique(dfci[[1]]$institution))
expect_error(vicc <- create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
institution = "VICC"
), NA)
expect_equal("VICC", unique(vicc[[1]]$institution))
expect_error(uhn <- create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
institution = "UHN"
), NA)
expect_equal("UHN", unique(uhn[[1]]$institution))
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
institution = "non-existant"
), "Select from*")
})
test_that("stage_dx- argument check", {
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
stage_dx = "Stage I"
), NA)
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
stage_dx = "Stage II"
), NA)
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
stage_dx = "Stage III"
), NA)
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
stage_dx = "Stage IV"
), NA)
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
stage_dx = "staGe IV"
), NA)
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
stage_dx = c("Stage I", "Stage IV")
), NA)
expect_error(create_analytic_cohort(
data_synapse = genieBPC::nsclc_test_data,
stage_dx = "none"
), "*")
})
# Tests - Requiring GENIE Access -----------------------------------------------
# pull data for each cohort
# return to avoid having to re-run pull_data_synapse for
# each test
# pull data for each cohort
# return to avoid having to re-run pull_data_synapse for
# each test
testthat::expect_true(if (.is_connected_to_genie()) {
# data frame of each release to use for pmap
data_releases <- synapse_tables %>%
distinct(cohort, version) %>%
# define expected number of dataframes based on whether TM and RT data were released
mutate(expected_n_dfs = case_when(
# no TM or RT
cohort == "NSCLC" ~ 11,
# TM, no RT
cohort %in% c("CRC", "BrCa") ~ 12,
# RT, no TM
cohort == "BLADDER" ~ 12,
# TM and RT
cohort %in% c("PANC", "Prostate") ~ 13
),
expected_n_dfs_with_summary = expected_n_dfs + 4)
# for each data release, pull data into the R environment
data_releases_pull_data <- pmap(
data_releases %>%
select(cohort, version),
pull_data_synapse
)
# name the items in the list
names(data_releases_pull_data) <- paste0(
data_releases$cohort, "_",
data_releases$version
)
length(data_releases_pull_data) > 0
} else {0 == 0})
testthat::expect_true(if (.is_connected_to_genie()) {
# for each data release, run create analytic cohort
# get first object from each item in the list
# then run create analytic cohort
data_releases_create_cohort <- map(data_releases_pull_data, 1) %>%
map(., create_analytic_cohort)
# name the items in the list
names(data_releases_create_cohort) <- paste0(
data_releases$cohort, "_",
data_releases$version
)
# create analytic cohort with return summary = TRUE
data_releases_create_cohort_with_summary <- map(data_releases_pull_data, 1) %>%
map(., create_analytic_cohort, return_summary = TRUE)
# apply same names to list with summaries
names(data_releases_create_cohort_with_summary) <- paste0(
data_releases$cohort, "_",
data_releases$version
)
length(data_releases_create_cohort_with_summary) > 0
} else {0 == 0})
# will update once we merge in PR to allow multiple cohorts in create_analytic_cohort
test_that("multiple cohorts- argument check", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data[1:2]
))
})
test_that("non-existent data_synapse", {
# a non-existent data_synapse is specified
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$TEST_NONEXIST
))
})
test_that("correct number of objects returned from create cohort", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# check that number of items returned is correct
# data releases with RT and TM
actual_length <- map_df(data_releases_create_cohort, length) %>%
pivot_longer(
cols = everything(),
names_to = "data_release",
values_to = "length"
)
# compare to expected length
expect_equal(data_releases$expected_n_dfs, actual_length$length)
# check thta number of items is also correct when a summary is returned
actual_length_with_summary <- map_df(data_releases_create_cohort_with_summary, length) %>%
pivot_longer(
cols = everything(),
names_to = "data_release",
values_to = "length"
)
# compare to expected length when return_summary = TRUE
# +4 for the additional tables returned when return_summary = TRUE
expect_equal(data_releases$expected_n_dfs_with_summary, actual_length_with_summary$length)
# check that class is correct
map2(
map(data_releases_create_cohort, class),
rep("list", nrow(data_releases)),
expect_equal
)
})
test_that("correct cohort returned from create cohort", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# for each data frame returned with a cohort, get the cohort variable
# remove genomic data frames since we don't expect them to have a cohort variable
data_releases_create_cohort_no_genomic <- map(
data_releases_create_cohort,
~ within(
.x,
rm(
cohort_cna,
cohort_fusions,
cohort_mutations_extended
)
)
)
# for each dataframe returned for a data release, get the cohort variable
cohort_returned <- map_depth(data_releases_create_cohort_no_genomic, select, "cohort",
.depth = 2
) %>%
map(., bind_rows, .id = "df") %>%
map(., distinct) %>%
bind_rows(., .id = "data_release") %>%
separate(data_release,
into = c("cohort_expected", "data_release"),
sep = "_"
)
expect_equal(cohort_returned$cohort_expected, cohort_returned$cohort)
})
test_that("check first index cancer default", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# no diagnosis criteria specified
# expect that the first index cancer is returned without any other
# incl criteria
# for each index cancer dataset, pick the first index cancer
data_releases_create_cohort_ca_dx_index <- map_depth(data_releases_pull_data,
.depth = 2,
pluck,
"ca_dx_index"
) %>%
map_depth(., .depth = 2, group_by, record_id) %>%
map_depth(., .depth = 2, slice_min, ca_seq) %>%
map_depth(., .depth = 2, ungroup) %>%
map(., 1)
# expect the default from create cohort to match the first index cancer
# check that the first index cancer diagnosis is returned by create analytic cohort
map2(
data_releases_create_cohort_ca_dx_index,
map(data_releases_create_cohort, "cohort_ca_dx"),
expect_equal
)
})
test_that("index_ca_seq", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# not really cohort specific, all cohorts will have index_ca_seq
# for now, only test on lung
# first and second index cancer is specified
# if patient only has 1 index cancer, it should be returned
# if patient has 2+ index cancers, the first two should be returned
test_1a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
index_ca_seq = c(1, 2),
return_summary = TRUE
)
test_1b <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(cohort, record_id) %>%
arrange(cohort, record_id, ca_seq) %>%
mutate(index_ca_seq = 1:n()) %>%
ungroup() %>%
filter(index_ca_seq %in% c(1, 2)) %>%
select(-index_ca_seq)
expect_equal(test_1a$cohort_ca_dx, test_1b)
# an index cancer # that doesn't exist in the data is specified
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
index_ca_seq = 100
))
## index cancer #s in cohort_ngs match those in cohort_ca_dx
test2a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`CRC_v2.0-public`$CRC_v2.0,
index_ca_seq = c(1, 2)
)
expect_equal(
test2a$cohort_ca_dx %>%
select(record_id, ca_seq) %>%
arrange(record_id, ca_seq),
test2a$cohort_ngs %>%
distinct(record_id, ca_seq) %>%
arrange(record_id, ca_seq)
)
})
test_that("institution", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# institution is specified and correct institution is returned
# institution will be available across data releases,
# don't need to test on each
test_1a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
institution = "dfci"
)
test_1b <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
filter(institution == "DFCI")
expect_equal(test_1a$cohort_ca_dx, test_1b)
# multiple institutions specified
test_2a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1,
institution = c("dfci", "msk")
)
test_2b <- data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
filter(institution %in% c("MSK", "DFCI"))
expect_equal(test_2a$cohort_ca_dx, test_2b)
# a non-existent institution is specified
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
institution = "uDFCI"
))
# UHN didn't participate in CRC
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`CRC_v2.0-public`$CRC_v2.0,
institution = "UHN"
))
})
test_that("stage_dx", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# stage dx is specified and correct stage is returned
# not cohort specific, all cohorts will have stage
# test only on one cohort for now
test_1a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
stage_dx = "stage ii"
)
test_1b <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
filter(stage_dx == "Stage II")
expect_equal(test_1a$cohort_ca_dx, test_1b)
# multiple stage values are specified
test_2a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
stage_dx = c("Stage I", "stage ii")
)
test_2b <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
filter(stage_dx %in% c("Stage I", "Stage II"))
expect_equal(test_2a$cohort_ca_dx, test_2b)
# non-existent stage is specified
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
stage_dx = "3A"
))
})
test_that("histology", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# no histology is specified, all are returned
test0b <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup()
expect_equal(
data_releases_create_cohort$`NSCLC_v2.0-public`$cohort_ca_dx,
test0b
)
# histology is specified and correct histology is returned
test_1a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
histology = "adenocarcinoma"
)
test_1b <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
filter(ca_hist_adeno_squamous == "Adenocarcinoma")
expect_equal(test_1a$cohort_ca_dx, test_1b)
# repeat for BrCa, which has specific histologies available
test_1c <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1,
histology = "invasive ductal carcinoma"
)
test_1d <- data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
filter(ca_hist_brca == "Invasive ductal carcinoma")
expect_equal(test_1c$cohort_ca_dx, test_1d)
# multiple histologies are specified and returned
test_2a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
histology = c("adenocarcinoma", "squamous cell")
)
test_2b <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(cohort, record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
filter(ca_hist_adeno_squamous %in% c("Adenocarcinoma", "Squamous cell"))
expect_equal(test_2a$cohort_ca_dx, test_2b)
# a non-existent histology is specified
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
histology = "squamous_adeno"
))
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`BrCa_v1.2-consortium`$BrCa_v1.2,
histology = "squamous_adeno"
))
})
test_that("no regimen specified", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# all regimens are returned
# should match all regimens given for a patients first index cancer
test_1b <- inner_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c("cohort", "record_id", "ca_seq")
)
expect_equal(
data_releases_create_cohort$`NSCLC_v2.0-public`$cohort_ca_drugs,
test_1b
)
})
test_that("drug regimen specified, order not specified", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# one drug regimen specified, but order not specified
test_1a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium")
)
# expect all times that drug was received (for the first index ca)
# to be returned
test_1b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
filter(regimen_drugs == c("Carboplatin, Pemetrexed Disodium"))
expect_equal(test_1a$cohort_ca_drugs, test_1b)
# one drug regimen specified with drugs out of ABC order and in mixed case
# regimen order not specified
test_2a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Pemetrexed DISODIUM, carboplatin")
)
# expect all times that drug was received (for the first index ca)
# to be returned
# same as above
expect_equal(test_2a$cohort_ca_drugs, test_1b)
# multiple drug regimens specified, but order not specified
test_3a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium", "Nivolumab")
)
# expect all times that drug was received (for the first index ca)
# to be returned
test_3b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
filter(regimen_drugs %in% c(
"Carboplatin, Pemetrexed Disodium",
"Nivolumab"
))
expect_equal(test_3a$cohort_ca_drugs, test_3b)
# multiple drug regimens specified, regimen_type = containing
test_4a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin", "Nivolumab"),
regimen_type = "containING"
)
# expect all times that drug was received (for the first index ca)
# to be returned
test_4b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
filter(grepl("Carboplatin", regimen_drugs) |
grepl("Nivolumab", regimen_drugs))
expect_equal(test_4a$cohort_ca_drugs, test_4b)
})
test_that("drug regimen specified, order specified to be within cancer", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# regimen of a certain number but drug name not specified
# all patients whose first drug after diagnosis was carbo pem
test_0a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_order = 1,
regimen_order_type = "within cancer"
)
# compare to data
test_0b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c("cohort", "record_id", "ca_seq"),
multiple = "all"
) %>%
group_by(record_id) %>%
slice(which.min(regimen_number)) %>%
ungroup()
expect_equal(test_0a$cohort_ca_drugs, test_0b)
# all patients whose first drug after diagnosis was carbo pem
test_1a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium"),
regimen_type = "Exact",
regimen_order = 1,
regimen_order_type = "within cancer"
)
# compare to data
test_1b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c("cohort", "record_id", "ca_seq"),
multiple = "all"
) %>%
group_by(record_id) %>%
slice(which.min(regimen_number)) %>%
ungroup() %>%
filter(regimen_drugs == "Carboplatin, Pemetrexed Disodium")
expect_equal(test_1a$cohort_ca_drugs, test_1b)
# second regimen after diagnosis was carbo pem
test_2a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium"),
regimen_type = "Exact",
regimen_order = 2,
regimen_order_type = "within cancer"
)
# compare to data
test_2b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
group_by(record_id) %>%
mutate(new_reg_number = 1:n()) %>%
ungroup() %>%
filter(regimen_drugs == "Carboplatin, Pemetrexed Disodium") %>%
filter(new_reg_number == 2) %>%
select(-new_reg_number)
expect_equal(test_2a$cohort_ca_drugs, test_2b)
# first AND/OR second regimen after diagnosis was carbo pem
test_3a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium"),
regimen_type = "Exact",
regimen_order = c(1, 2),
regimen_order_type = "within cancer"
)
# compare to data
test_3b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
group_by(record_id) %>%
mutate(new_reg_number = 1:n()) %>%
ungroup() %>%
filter(regimen_drugs == "Carboplatin, Pemetrexed Disodium") %>%
filter(new_reg_number %in% c(1, 2)) %>%
select(-new_reg_number)
expect_equal(test_3a$cohort_ca_drugs, test_3b)
# first AND/OR second regimen after diagnosis was carbo pem
# regimen_type = containing rather than default of exact
test_4a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium"),
regimen_type = "containing",
regimen_order = c(1, 2),
regimen_order_type = "within cancer"
)
test_4b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
group_by(record_id) %>%
mutate(new_reg_number = 1:n()) %>%
ungroup() %>%
filter(grepl("Carboplatin, Pemetrexed Disodium", regimen_drugs)) %>%
filter(new_reg_number %in% c(1, 2)) %>%
select(-new_reg_number)
expect_equal(test_4a$cohort_ca_drugs, test_4b)
})
test_that("exact drug regimen specified,
order specified to be within regimen", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# single regimen specified, want first time that regimen
# was given for all cancers
test_1a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium"),
regimen_order = c(1),
regimen_order_type = "within REGimen"
)
test_1b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
group_by(record_id, regimen_drugs) %>%
mutate(new_reg_number = 1:n()) %>%
ungroup() %>%
filter(regimen_drugs == "Carboplatin, Pemetrexed Disodium") %>%
filter(new_reg_number %in% c(1)) %>%
select(-new_reg_number)
expect_equal(test_1a$cohort_ca_drugs, test_1b)
# multiple regimens specified, want first time each given
test_2a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium", "Nivolumab"),
regimen_order = c(1),
regimen_order_type = "within REGimen"
)
test_2b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
group_by(record_id, regimen_drugs) %>%
mutate(new_reg_number = 1:n()) %>%
ungroup() %>%
filter(regimen_drugs %in% c(
"Carboplatin, Pemetrexed Disodium",
"Nivolumab"
)) %>%
filter(new_reg_number %in% c(1)) %>%
select(-new_reg_number)
expect_equal(test_2a$cohort_ca_drugs, test_2b)
# multiple regimens specified
# first and/or second time they were received
# multiple regimens specified, want first time each given
test_3a <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium", "Nivolumab"),
regimen_order = c(1, 2),
regimen_order_type = "within REGimen"
)
test_3b <- left_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
select(cohort, record_id, ca_seq),
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
by = c(
"cohort", "record_id", "ca_seq"
),
multiple = "all"
) %>%
group_by(record_id, regimen_drugs) %>%
mutate(new_reg_number = 1:n()) %>%
ungroup() %>%
filter(regimen_drugs %in% c(
"Carboplatin, Pemetrexed Disodium",
"Nivolumab"
)) %>%
filter(new_reg_number %in% c(1, 2)) %>%
select(-new_reg_number)
expect_equal(test_3a$cohort_ca_drugs, test_3b)
})
test_that("containing drug regimen specified,
order specified to be within regimen", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# specify regimen type to be containing (default is exact,
# which is what is implemented in the above)
test_1c <- create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = c("Carboplatin, Pemetrexed Disodium"),
regimen_type = "containing",
regimen_order = c(1),
regimen_order_type = "within REGimen"
)
# order containing
ordered_containing_regs <- data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs %>%
filter(grepl("Carboplatin, Pemetrexed Disodium", regimen_drugs)) %>%
distinct(cohort, record_id, regimen_number, regimen_drugs) %>%
group_by(cohort, record_id) %>%
mutate(order_within_containing_regimen = 1:n()) %>%
ungroup() %>%
filter(order_within_containing_regimen %in% c(1)) %>%
select(
cohort, record_id, regimen_number,
order_within_containing_regimen
)
# merge containing order onto the regimen data
# only keep regimens of interest
ca_drugs_with_containing_order <- inner_join(data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_drugs,
ordered_containing_regs,
by = c(
"cohort", "record_id",
"regimen_number"
),
multiple = "all"
)
# merge cohort with patients who received drug regimens of interest
# in order specified
test_1d <- inner_join(
data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0$ca_dx_index %>%
group_by(record_id) %>%
slice(which.min(ca_seq)) %>%
ungroup() %>%
select(cohort, record_id, ca_seq),
ca_drugs_with_containing_order,
by = c(
"cohort", "record_id", "ca_seq"
)
) %>%
arrange(cohort, record_id, ca_seq) %>%
select(
cohort, record_id, institution,
regimen_number, ca_seq, everything()
) %>%
as.data.frame()
expect_equal(
test_1c$cohort_ca_drugs %>%
arrange(cohort, record_id, ca_seq) %>%
select(cohort, record_id, ca_seq, regimen_number,
everything()),
test_1d %>%
arrange(cohort, record_id, ca_seq) %>%
select(cohort, record_id, ca_seq, regimen_number,
everything())
)
})
test_that("regimen_type", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# only testing on a single cancer cohort since not cohort-specific
# invalid value provided for regimen_type
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_type = "exact_containing"
))
# if regimen_type is specified, regimen_drugs must also be specified
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`CRC_v2.0-public`$CRC_v2.0,
regimen_type = "exact"
))
})
test_that("regimen_order", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# character value provided for regimen_order
# only testing on a single cancer cohort since not cohort-specific
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1,
regimen_order = "C"
))
})
test_that("regimen_order_type", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
# only testing on a single cancer cohort since not cohort-specific
# invalid value provided for regimen_order_type
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1,
regimen_order = 1,
regimen_order_type =
"within_btwn_cancer"
))
# regimen_order is specified but regimen_order_type is not
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1,
regimen_order = 1
))
# regimen_order_type is specified but regimen_order is not
expect_error(create_analytic_cohort(
data_synapse = data_releases_pull_data$`BrCa_v1.1-consortium`$BrCa_v1.1,
regimen_order_type =
"within cancer"
))
})
test_that("No patients met criteria", {
# exit if user doesn't have a synapse log in or access to data.
testthat::skip_if_not(.is_connected_to_genie())
expect_message(create_analytic_cohort(
data_synapse = data_releases_pull_data$`NSCLC_v2.0-public`$NSCLC_v2.0,
regimen_drugs = "Carboplatin, Pemetrexed",
regimen_order = 1000,
regimen_order_type = "within cancer"
))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.