Nothing
test_that("you can subset on drecno, age, meddra_id", {
wd_in <- tempdir()
wd_in <- paste0(wd_in, "/", "tb_subset_t1") # avoid windows #1224
# never rewrite on the same tempfile in ANY test.
dir.create(path = wd_in)
mini_data <- rlang::list2(
demo =
data.table(
UMCReportId = c(1, 2, 3, 4),
AgeGroup = c(1, 2, 7, 9)
),
drug =
data.table(
UMCReportId = c(1, 2, 3, 4),
Drug_Id = c("d1", "d2", "d3", "d4"),
DrecNo = c(133138448, 133138448, 111841511, 111841511),
MedicinalProd_Id = c(25027716, 97354576, 104264760, 37484408)
),
adr =
data.table(
UMCReportId = c(1, 2, 3, 4),
Adr_Id = c("a1", "a2", "a3", "a4"),
MedDRA_Id = c(110049083, 146319904, 146319904, 72535511)
),
link =
data.table(
Drug_Id = c("d1", "d2", "d3", "d4"),
Adr_Id = c("a1", "a2", "a3", "a4")
),
srce =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
ind =
data.table(
Drug_Id = c("d1", "d2", "d3", "d4")
),
out =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
followup =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
suspdup =
data.table(
UMCReportId = c(3),
SuspectedduplicateReportId = c(4)
)
)
purrr::iwalk(
mini_data,
function(dataset, name)
arrow::write_parquet(
dataset |>
arrow::as_arrow_table(),
sink = paste0(wd_in, "/", name, ".parquet")
)
)
# ---- drecno
sv_selection_drecno <-
list(ipi = c(133138448, 133138448))
expect_snapshot(
tb_subset(
wd_in = paste0(wd_in, "/"),
wd_out = paste0(wd_in, "/", "subset_drecno", "/"),
subset_var = "drecno",
sv_selection = sv_selection_drecno
),
transform =
function(chr_line)
stringr::str_replace(
chr_line,
"(?>=\\d{1,3}\\%\\s| ).*(?= \\|)",
" percent, seconds"
)
)
drug_sub <-
dt_parquet(paste0(wd_in, "/", "subset_drecno", "/"), "drug")
demo_sub <-
dt_parquet(paste0(wd_in, "/", "subset_drecno", "/"), "demo")
expect_equal(
sort(unique(demo_sub$UMCReportId)),
sort(unique(drug_sub$UMCReportId))
)
para_test <-
drug_sub %>%
dplyr::group_by(UMCReportId) %>%
dplyr::summarise(
has_para = max(DrecNo %in% sv_selection_drecno$ipi)
)
expect_equal(
all(para_test$has_para == 1),
TRUE
)
# --- drecno several items
sv_selection_drecno_item <-
list(
ipi = c(133138448),
nivo = c(111841511)
)
expect_snapshot(tb_subset(
wd_in = paste0(wd_in, "/"),
wd_out = paste0(wd_in, "/", "subset_drecno_item", "/"),
subset_var = "drecno",
sv_selection = sv_selection_drecno_item
),
transform =
function(chr_line)
stringr::str_replace(
chr_line,
"(?>=\\d{1,3}\\%\\s| ).*(?= \\|)",
" percent, seconds"
))
demo_sub_item <-
dt_parquet(paste0(wd_in, "/", "subset_drecno_item", "/"), "demo")
drug_sub_item <-
dt_parquet(paste0(wd_in, "/", "subset_drecno_item", "/"), "drug")
expect_equal(
nrow(demo_sub_item),
3 # one duplicate
)
# ---- age
expect_snapshot(tb_subset(
wd_in = paste0(wd_in, "/"),
wd_out = paste0(wd_in, "/", "subset_age", "/"),
subset_var = "age",
sv_selection = c(7, 8)
),
transform =
function(chr_line)
stringr::str_replace(
chr_line,
"(?>=\\d{1,3}\\%\\s| ).*(?= \\|)",
" percent, seconds"
))
drug_sub <-
dt_parquet(paste0(wd_in, "/", "subset_age", "/"), "drug")
demo_sub <-
dt_parquet(paste0(wd_in, "/", "subset_age", "/"), "demo")
expect_equal(
sort(unique(demo_sub$UMCReportId)),
sort(unique(drug_sub$UMCReportId))
)
age_test <-
demo_sub %>%
dplyr::mutate(
good_age = AgeGroup %in% c(7, 8)
)
expect_equal(
all(age_test$good_age == 1),
TRUE
)
# ---- adr
sv_selection_mid <-
list(colitis = c(146319904, 72535511))
wd_out <- paste0(wd_in, "/", "subset_meddraid", "/")
expect_snapshot(tb_subset(
wd_in = paste0(wd_in, "/"),
wd_out = paste0(wd_in, "/", "subset_meddraid", "/"),
subset_var = "meddra_id",
sv_selection = sv_selection_mid
),
transform =
function(chr_line)
stringr::str_replace(
chr_line,
"(?>=\\d{1,3}\\%\\s| ).*(?= \\|)",
" percent, seconds"
))
adr_sub <-
dt_parquet(paste0(wd_in, "/", "subset_meddraid", "/"), "adr")
demo_sub <-
dt_parquet(paste0(wd_in, "/", "subset_meddraid", "/"), "demo")
expect_equal(
sort(unique(demo_sub$UMCReportId)),
sort(unique(adr_sub$UMCReportId))
)
colitis_test <-
adr_sub %>%
dplyr::group_by(UMCReportId) %>%
dplyr::summarise(
has_colitis = max(MedDRA_Id %in% sv_selection_mid$colitis)
)
expect_equal(
all(colitis_test$has_colitis == 1),
TRUE
)
unlink(wd_in, recursive = TRUE)
# attempt to clean note on debian
if(dir.exists(wd_in) & Sys.info()[["sysname"]] != "Windows")
file.remove(wd_in)
})
test_that("you can keep suspdup", {
wd_in <- tempdir()
wd_in <- paste0(wd_in, "/", "sub") # avoid windows #1224
dir.create(path = wd_in)
mini_data <- rlang::list2(
demo =
data.table(
UMCReportId = c(1, 2, 3, 4),
AgeGroup = c(1, 7, 7, 8)
),
drug =
data.table(
UMCReportId = c(1, 2, 3, 4),
Drug_Id = c("d1", "d2", "d3", "d4"),
DrecNo = c(133138448, 133138448, 111841511, 111841511),
MedicinalProd_Id = c(25027716, 97354576, 104264760, 37484408)
),
adr =
data.table(
UMCReportId = c(1, 2, 3, 4),
Adr_Id = c("a1", "a2", "a3", "a4"),
MedDRA_Id = c(110049083, 31672047, 146319904, 72535511)
),
link =
data.table(
Drug_Id = c("d1", "d2", "d3", "d4"),
Adr_Id = c("a1", "a2", "a3", "a4")
),
srce =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
ind =
data.table(
Drug_Id = c("d1", "d2", "d3", "d4")
),
out =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
followup =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
suspdup =
data.table(
UMCReportId = c(3),
SuspectedduplicateReportId = c(4)
)
)
purrr::iwalk(
mini_data,
function(dataset, name)
arrow::write_parquet(
dataset |>
arrow::as_arrow_table(),
sink = paste0(wd_in, "/", name, ".parquet")
)
)
# ---- age
expect_snapshot(tb_subset(
wd_in = paste0(wd_in, "/"),
wd_out = paste0(wd_in, "/", "subset_age_suspdup", "/"),
subset_var = "age",
sv_selection = c(7, 8),
rm_suspdup = FALSE
),
transform =
function(chr_line)
stringr::str_replace(
chr_line,
"(?>=\\d{1,3}\\%\\s| ).*(?= \\|)",
" percent, seconds"
))
expect_snapshot(tb_subset(
wd_in = paste0(wd_in, "/"),
wd_out = paste0(wd_in, "/", "subset_age", "/"),
subset_var = "age",
sv_selection = c(7, 8),
rm_suspdup = TRUE
),
transform =
function(chr_line)
stringr::str_replace(
chr_line,
"(?>=\\d{1,3}\\%\\s| ).*(?= \\|)",
" percent, seconds"
))
drug_sub <-
dt_parquet(paste0(wd_in, "/", "subset_age_suspdup", "/"), "drug")
demo_sub <-
dt_parquet(paste0(wd_in, "/", "subset_age_suspdup", "/"), "demo")
demo_nosuspdup <-
dt_parquet(paste0(wd_in, "/", "subset_age", "/"), "demo")
expect_equal(
sort(unique(demo_sub$UMCReportId)),
sort(unique(drug_sub$UMCReportId))
)
age_test <-
demo_sub %>%
dplyr::mutate(
good_age = AgeGroup %in% c(7, 8)
)
expect_equal(
all(age_test$good_age == 1),
TRUE
)
expect_equal(
nrow(demo_sub),
nrow(demo_nosuspdup) + 1
)
unlink(wd_in, recursive = TRUE)
if(dir.exists(wd_in) & Sys.info()[["sysname"]] != "Windows")
file.remove(wd_in)
})
test_that("alternative syntaxes work", {
wd_in <- tempdir()
wd_in <- paste0(wd_in, "/", "sub2") # avoid windows #1224
dir.create(path = wd_in)
mini_data <- rlang::list2(
demo =
data.table(
UMCReportId = c(1, 2, 3, 4),
AgeGroup = c(1, 7, 7, 8)
),
drug =
data.table(
UMCReportId = c(1, 2, 3, 4),
Drug_Id = c("d1", "d2", "d3", "d4"),
DrecNo = c(133138448, 133138448, 111841511, 111841511),
MedicinalProd_Id = c(25027716, 97354576, 104264760, 37484408)
),
adr =
data.table(
UMCReportId = c(1, 2, 3, 4),
Adr_Id = c("a1", "a2", "a3", "a4"),
MedDRA_Id = c(110049083, 31672047, 146319904, 72535511)
),
link =
data.table(
Drug_Id = c("d1", "d2", "d3", "d4"),
Adr_Id = c("a1", "a2", "a3", "a4")
),
srce =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
ind =
data.table(
Drug_Id = c("d1", "d2", "d3", "d4")
),
out =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
followup =
data.table(
UMCReportId = c(1, 2, 3, 4)
),
suspdup =
data.table(
UMCReportId = c(3),
SuspectedduplicateReportId = c(4)
)
)
purrr::iwalk(
mini_data,
function(dataset, name)
arrow::write_parquet(
dataset |>
arrow::as_arrow_table(),
sink = paste0(wd_in, "/", name, ".parquet")
)
)
# no end slashes at wd_in and wd_out
expect_snapshot(
tb_subset(
wd_in = wd_in,
wd_out = paste0(wd_in, "/", "subset_age"),
subset_var = "age",
sv_selection = c(7, 8)
),
transform =
function(chr_line)
stringr::str_replace(
chr_line,
"(?>=\\d{1,3}\\%\\s| ).*(?= \\|)",
" percent, seconds"
))
drug_sub <-
dt_parquet(paste0(wd_in, "/", "subset_age", "/"), "drug")
demo_sub <-
dt_parquet(paste0(wd_in, "/", "subset_age", "/"), "demo")
expect_equal(
sort(unique(demo_sub$UMCReportId)),
sort(unique(drug_sub$UMCReportId))
)
age_test <-
demo_sub %>%
dplyr::mutate(
good_age = AgeGroup %in% c(7, 8)
)
expect_equal(
all(age_test$good_age == 1),
TRUE
)
unlink(wd_in, recursive = TRUE)
if(dir.exists(wd_in) & Sys.info()[["sysname"]] != "Windows")
file.remove(here::here(wd_in))
})
test_that("error if no wd_in", {
cnd <- rlang::catch_cnd(
error_dir_exists("wd_in", "/wrong_path/")
)
expect_s3_class(cnd, "no_dir")
expect_equal(cnd$dir, "wd_in")
expect_equal(cnd$wrong_dir, "/wrong_path/")
path_wrong <- "/wrong_path/"
expect_error(
check_dir_exists(path_wrong),
class = "no_dir",
regexp = path_wrong
)
# silent if dir exists
expect_silent(
check_dir_exists(tempdir())
)
expect_snapshot(error = TRUE,
tb_subset(
wd_in = "that_dir_doesnt_exist",
sv_selection = list(a = c(1, 2))
),
cnd_class = TRUE
)
})
cli::test_that_cli("printing error no dir", {
expect_snapshot(error = TRUE,
error_dir_exists("wd_in", "/wrong_path/")
)
})
unlink(tempdir(), recursive = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.