Nothing
context("Datastep Performance Tests")
base_path <- "c:\\packages\\libr\\tests\\testthat\\data"
base_path <- "./data"
DEV <- FALSE
test_that("add_autos() function works as expected", {
libname(dat, file.path(base_path, "SDTM"), "sas7bdat")
tm <- Sys.time()
dat3 <- add_autos(dat$lb, c("USUBJID", "LBCAT", "LBTESTCD"))
tmdiff <- Sys.time() - tm
tmdiff
expect_equal("first." %in% names(dat3), TRUE)
expect_equal("last." %in% names(dat3), TRUE)
})
test_that("sort check works as expected", {
dat3 <- add_autos(mtcars, c("am"), sort_check = FALSE)
expect_equal("first." %in% names(dat3), TRUE)
expect_equal("last." %in% names(dat3), TRUE)
expect_error( add_autos(mtcars, c("am"), sort_check = TRUE))
})
# Baseline of 10 sec on condition
test_that("datastep() performance is good", {
if (DEV) {
libname(dat, file.path(base_path, "SDTM"), "sas7bdat")
tm <- Sys.time()
res <- datastep(dat$lb,
{
if (is.na(LBBLFL))
blisna <- TRUE
else
blisna <- FALSE
})
tmdiff <- Sys.time() - tm
tmdiff
expect_equal(tmdiff < 10, TRUE)
} else
expect_equal(TRUE, TRUE)
})
# Jumps to 14 seconds when retain added
test_that("datastep() performance with retain is good", {
libname(dat, file.path(base_path, "SDTM"), "sas7bdat")
if (DEV) {
libname(dat, file.path(base_path, "SDTM"), "sas7bdat")
tm <- Sys.time()
res <- datastep(dat$lb, retain = list(rnum = 0),
{
if (is.na(LBBLFL))
subjstart <- TRUE
else
subjstart <- FALSE
rnum <- 2 + 1
})
tmdiff <- Sys.time() - tm
tmdiff
expect_equal(tmdiff < 14, TRUE)
} else
expect_equal(TRUE, TRUE)
})
# Still less than < 11 seconds when group by added
test_that("datastep() performance with by group is good", {
if (DEV) {
libname(dat, file.path(base_path, "SDTM"), "sas7bdat")
tm <- Sys.time()
res <- datastep(dat$lb,
by = c("USUBJID"),
{
if (first.)
subjstart <- TRUE
else
subjstart <- FALSE
# rnum <- rnum + 1
})
tmdiff <- Sys.time() - tm
tmdiff
expect_equal(tmdiff < 11, TRUE)
} else
expect_equal(TRUE, TRUE)
})
# Jumps to 14 seconds when retain and group by added
test_that("datastep() performance with retain is good", {
if (DEV) {
libname(dat, file.path(base_path, "SDTM"), "sas7bdat")
tm <- Sys.time()
res <- datastep(dat$lb, retain = list(rnum = 0),
by = c("USUBJID", "LBCAT", "LBTESTCD"),
{
if (first.)
subjstart <- TRUE
else
subjstart <- FALSE
rnum <- rnum + 1
rnum2 <- n.
}, sort_check = TRUE)
tmdiff <- Sys.time() - tm
tmdiff
expect_equal(tmdiff < 16, TRUE)
} else
expect_equal(TRUE, TRUE)
})
test_that("datastep() with group_by performance is good", {
if (DEV) {
library(dplyr)
scs <- specs(PE = import_spec(PESTAT = "character"))
libname(dat, file.path(base_path, "SDTM"), "csv", import_specs = scs)
tm <- Sys.time()
prep <- dat$DM %>%
left_join(dat$VS, by = c("USUBJID" = "USUBJID")) %>%
select(USUBJID, VSTESTCD, VISIT, VISITNUM, VSSTRESN, ARM, VSBLFL) %>%
filter(VSTESTCD %in% c("PULSE", "RESP", "TEMP", "DIABP", "SYSBP"),
!(VISIT == "SCREENING" & VSBLFL != "Y")) %>%
arrange(USUBJID, VSTESTCD, VISITNUM) %>%
group_by(USUBJID, VSTESTCD) %>%
#datastep(by = c("USUBJID", "VSTESTCD"), retain = list(BSTRESN = 0), {
datastep(retain = list(BSTRESN = 0), {
# Combine treatment groups
# And distingish baseline time points
if (ARM == "ARM A") {
if (VSBLFL %eq% "Y") {
GRP <- "A_BASE"
} else {
GRP <- "A_TRT"
}
} else {
if (VSBLFL %eq% "Y") {
GRP <- "O_BASE"
} else {
GRP <- "O_TRT"
}
}
# Populate baseline value
if (first.)
BSTRESN = VSSTRESN
})
tmdiff <- Sys.time() - tm
tmdiff
expect_equal(tmdiff < 3, TRUE)
} else
expect_equal(TRUE, TRUE)
})
test_that("100,000 row datastep on data.frame is good.", {
if (DEV) {
l <- 100000
df <- data.frame(C1 = seq_len(l), C2 = runif(l),
C3 = runif(l), C4 = runif(l), stringsAsFactors = FALSE)
tm <- Sys.time()
res <- datastep(df, attrib = list(C5 = 0, C6 = 0),
{
C5 <- C2 + C3 + C4
C6 <- max(C2, C3, C4)
})
tmdiff <- Sys.time() - tm
tmdiff
res[1:10, ]
expect_equal(tmdiff < 30, TRUE)
# 100,000 rows is 20 seconds
# 1,000,000 rows is 3.6 minutes
} else
expect_equal(TRUE, TRUE)
})
test_that("100,000 row datastep on tibble is good.", {
if (DEV) {
library(tibble)
l <- 100000
df <- tibble(C1 = seq_len(l), C2 = runif(l),
C3 = runif(l), C4 = runif(l))
tm <- Sys.time()
res <- datastep(df, attrib = list(C5 = 0, C6 = 0),
{
C5 <- C2 + C3 + C4
C6 <- max(C2, C3, C4)
})
tmdiff <- Sys.time() - tm
tmdiff
res[1:10, ]
expect_equal(tmdiff < 30, TRUE)
# 100,000 rows is 21.2 seconds
# 1,000,000 rows is 3.8 minutes
} else
expect_equal(TRUE, TRUE)
})
test_that("100,000 row datastep on data.table is good.", {
if (DEV) {
library(data.table)
l <- 100000
df <- data.table(C1 = seq_len(l), C2 = runif(l),
C3 = runif(l), C4 = runif(l))
tm <- Sys.time()
res <- datastep(df, attrib = list(C5 = 0, C6 = 0),
{
C5 <- C2 + C3 + C4
C6 <- max(C2, C3, C4)
})
tmdiff <- Sys.time() - tm
tmdiff
res[1:10, ]
expect_equal(tmdiff < 30, TRUE)
# 100,000 rows is 1.3 minutes with out modification to datastep
# 100,000 rows is 22.3 seconds after modification
# 1,000,000 rows not going to try
} else
expect_equal(TRUE, TRUE)
})
test_that("complex datastep() performance is good", {
if (DEV) {
library(dplyr)
scs <- specs(PE = import_spec(PESTAT = "character"))
libname(dat, file.path(base_path, "SDTM"), "csv", import_specs = scs,
quiet = TRUE)
tm <- Sys.time()
prep <- dat$DM %>%
left_join(dat$VS, by = c("USUBJID" = "USUBJID")) %>%
select(USUBJID, VSTESTCD, VISIT, VISITNUM, VSSTRESN, ARM, VSBLFL) %>%
filter(VSTESTCD %in% c("PULSE", "RESP", "TEMP", "DIABP", "SYSBP"),
!(VISIT == "SCREENING" & VSBLFL != "Y")) %>%
arrange(USUBJID, VSTESTCD, VISITNUM) %>%
group_by(USUBJID, VSTESTCD) %>%
datastep(by = c("USUBJID", "VSTESTCD"), retain = list(BSTRESN = 0), {
# Combine treatment groups
# And distingish baseline time points
if (ARM == "ARM A") {
if (VSBLFL %eq% "Y") {
GRP <- "A_BASE"
} else {
GRP <- "A_TRT"
}
} else {
if (VSBLFL %eq% "Y") {
GRP <- "O_BASE"
} else {
GRP <- "O_TRT"
}
}
# Populate baseline value
if (first.)
BSTRESN = VSSTRESN
})
tmdiff <- Sys.time() - tm
tmdiff
expect_equal(tmdiff < 3, TRUE)
} else
expect_equal(TRUE, TRUE)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.