Nothing
context("Compute summary statistics table with column variable")
library(plyr)
library(dplyr)
test_that("A summary table is correctly computed for a continuous variable by a column variable", {
data <- data.frame(
USUBJID = seq.int(7),
AGE = seq(20, 62, length.out = 7),
TRT = rep(c("A", "B"), times = c(3, 4)),
stringsAsFactors = FALSE
)
expect_silent(
sumTable <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = "TRT"
)
)
expect_s3_class(sumTable, "summaryTable")
expect_true("TRT" %in% colnames(sumTable))
expect_identical(levels(sumTable$TRT), unique(data$TRT))
# table with colVar = cbind of table created for each element of colVar
statsContName <- c("statMean", "statSD", "statSE", "statMedian", "statMin", "statMax")
for(trt in unique(data$TRT)){
expect_equal(
object = subset(sumTable, TRT == !!trt, select = -TRT),
expected = computeSummaryStatisticsTable(data = subset(data, TRT == !!trt), var = "AGE"),
check.attributes = FALSE
)
}
# counts:
expect_true(all(sumTable$statPercN %in% 100))
expect_equal(subset(sumTable, TRT == "A" & isTotal)$statN, 3)
expect_equal(subset(sumTable, TRT == "A")$statPercTotalN, c(3, 3))
expect_equal(subset(sumTable, TRT == "B" & isTotal)$statN, 4)
expect_equal(subset(sumTable, TRT == "B")$statPercTotalN, c(4, 4))
})
test_that("Column elements are ordered alphabetically if the column variable is a character", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = rep(c("A", "B"), each = 3),
stringsAsFactors = FALSE
)
expect_equal(
levels(computeSummaryStatisticsTable(data, var = "AGE", colVar = "TRT")$TRT),
c("A", "B")
)
})
test_that("Column elements are ordered as specified if the column variable is a factor", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = rep(c("A", "B"), each = 3),
stringsAsFactors = FALSE
)
data$TRT <- factor(data, levels = c("B", "A"))
summaryTable <- computeSummaryStatisticsTable(data, var = "AGE", colVar = "TRT")
expect_equal(
levels(summaryTable$TRT),
c("B", "A")
)
})
test_that("The column totals are correctly computed", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = rep(c("A", "B"), each = 3),
stringsAsFactors = FALSE
)
expect_silent(
sumTableColTotal <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = "TRT",
colTotalInclude = TRUE
)
)
expect_identical(
levels(sumTableColTotal$TRT),
c(unique(data$TRT), "Total")
)
# total should be the same as stats computed on entire dataset:
expect_equal(
subset(sumTableColTotal, TRT == "Total", select = -TRT),
computeSummaryStatisticsTable(data, var = "AGE"),
check.attributes = FALSE # row.names diff
)
})
test_that("A label is correctly set to the column totals", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = rep(c("A", "B"), each = 3),
stringsAsFactors = FALSE
)
sumTableColTotal <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = "TRT",
colTotalInclude = TRUE
)
expect_equal(
object = {
sumTableColTotalLab <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = "TRT",
colTotalInclude = TRUE, colTotalLab = "All treatments"
)
subset(sumTableColTotalLab, TRT == "All treatments", select = -TRT)
},
expected = subset(sumTableColTotal, TRT == "Total", select = -TRT)
)
})
test_that("The column totals are correctly extracted from a different dataset", {
data <- data.frame(
USUBJID = c("1", "1", "2", "3", "2", "3", "3"),
AEBODSYS = c("A", "A", "A", "A", "B", "B", "B"),
AEDECOD = c("a1", "a2", "a1", "a1", "b1", "b1", "b2"),
TRT = c("X1", "X1", "X1", "X2", "X1", "X2", "X2"),
stringsAsFactors = FALSE
)
dataTotalCol <- do.call(
rbind,
replicate(2, data, simplify = FALSE)
)
dataTotalCol$USUBJID <- as.character(sample.int(nrow(dataTotalCol)))
# full summary table
summaryTable <- computeSummaryStatisticsTable(
data,
rowVar = c("AEBODSYS", "AEDECOD"),
colVar = "TRT",
colTotalInclude = TRUE,
dataTotalCol = dataTotalCol
)
# counts in total column should be the same
# as computed for the full data specified in 'dataTotalCol'
expect_equal(
object = subset(summaryTable, TRT == "Total", select = -TRT),
expected = computeSummaryStatisticsTable(
data = dataTotalCol,
rowVar = c("AEBODSYS", "AEDECOD")
),
check.attributes = FALSE
)
})
test_that("The column totals for row totals are correctly computed", {
data <- data.frame(
USUBJID = c("1", "1", "2", "3", "2", "3", "3"),
AEBODSYS = c("A", "A", "A", "A", "B", "B", "B"),
AEDECOD = c("a1", "a2", "a1", "a1", "b1", "b1", "b2"),
TRT = c("X1", "X1", "X1", "X2", "X1", "X2", "X2"),
stringsAsFactors = FALSE
)
# full summary table
rowVar <- c("AEBODSYS", "AEDECOD")
summaryTable <- computeSummaryStatisticsTable(
data,
rowVar = rowVar,
rowVarTotalInclude = rowVar,
colVar = "TRT",
colTotalInclude = TRUE
)
# row total across AEDECOD
expect_equal(
object = subset(summaryTable,
subset = TRT == "Total" & AEDECOD == "Total" & AEBODSYS != "Total",
select = -c(TRT, AEDECOD),
),
expected = subset(
computeSummaryStatisticsTable(data = data, rowVar = c("AEBODSYS")),
subset = !isTotal
),
check.attributes = FALSE
)
# row total across AEBODYS
expect_equal(
object = subset(summaryTable,
subset = TRT == "Total" & AEDECOD == "Total" & AEBODSYS == "Total",
select = -c(TRT, AEDECOD, AEBODSYS),
),
expected = subset(
computeSummaryStatisticsTable(data = data),
subset = !isTotal
),
check.attributes = FALSE
)
})
test_that("The column totals for row totals are correctly computed from a different dataset", {
data <- data.frame(
USUBJID = c("1", "1", "2", "3", "2", "3", "3"),
AEBODSYS = c("A", "A", "A", "A", "B", "B", "B"),
AEDECOD = c("a1", "a2", "a1", "a1", "b1", "b1", "b2"),
TRT = c("X1", "X1", "X1", "X2", "X1", "X2", "X2"),
stringsAsFactors = FALSE
)
dataTotalColDummy <- do.call(
rbind,
replicate(2, data, simplify = FALSE)
)
dataTotalColDummy$USUBJID <- as.character(sample.int(nrow(dataTotalColDummy)))
dataTotalCol <- list(
total = dataTotalColDummy,
AEBODSYS = dataTotalColDummy[sample(nrow(dataTotalColDummy), 9), ],
AEDECOD = dataTotalColDummy[sample(nrow(dataTotalColDummy), 5), ]
)
# full summary table
rowVar <- c("AEBODSYS", "AEDECOD")
summaryTable <- computeSummaryStatisticsTable(
data,
rowVar = rowVar,
rowVarTotalInclude = rowVar,
colVar = "TRT",
colTotalInclude = TRUE,
dataTotalCol = dataTotalCol
)
# total used for the percentages are based on 'dataTotalCol' with 'total' label
summaryTableColTotal <- subset(summaryTable, TRT == "Total")
nSubjTotalCol <- length(unique(dataTotalCol[["total"]]$USUBJID))
expect_setequal(
summaryTableColTotal$statPercTotalN,
nSubjTotalCol
)
# total used for the column header is based on 'dataTotalCol' with 'total' label
expect_equal(
subset(summaryTableColTotal, isTotal)$statN,
nSubjTotalCol
)
# counts in total column should be the same
# as computed for the full data specified in 'dataTotalCol'
# col total for the general row total
expect_equal(
object = subset(summaryTable,
TRT == "Total" & AEDECOD == "Total" & AEBODSYS == "Total",
select = c(-AEBODSYS, -AEDECOD, -TRT)
),
expected = subset(
computeSummaryStatisticsTable(data = dataTotalCol[["total"]]),
subset = !isTotal
),
check.attributes = FALSE
)
# col total across AEDECOD
expect_equal(
object = subset(summaryTable,
subset = (TRT == "Total" & AEDECOD == "Total" & AEBODSYS != "Total"),
select = c("statN", "statm")
),
expected = subset(
computeSummaryStatisticsTable(
data = dataTotalCol[["AEBODSYS"]],
rowVar = "AEBODSYS"
),
subset = !isTotal,
select = c("statN", "statm")
),
check.attributes = FALSE
)
# col total for most nested row
expect_equal(
object = subset(summaryTable,
subset = (TRT == "Total" & AEDECOD != "Total" & AEBODSYS != "Total"),
select = c("statN", "statm")
),
expected = subset(
computeSummaryStatisticsTable(
data = dataTotalCol[["AEDECOD"]],
rowVar = "AEDECOD"
),
subset = !isTotal,
select = c("statN", "statm")
),
check.attributes = FALSE
)
})
test_that("Only column elements of the data are included by default", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = factor(rep(c("A", "B"), each = 3)),
DOSE = factor(rep(c("100", "200"), each = 3)),
stringsAsFactors = FALSE
)
# by default, only the columns appearing in the data are included
expect_silent(
sumTableBase <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE")
)
)
expect_equal(
object = unique(sumTableBase[, c("TRT", "DOSE")]),
expected = data.frame(
TRT = factor(c("A", "B")),
DOSE = factor(c("100", "200"))
),
check.attributes = FALSE
)
})
test_that("All combinations of column elements are correctly included when requested", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = factor(rep(c("A", "B"), each = 3)),
DOSE = factor(rep(c("100", "200"), each = 3)),
stringsAsFactors = FALSE
)
# if colInclude0 is specified, all combinations of columns are included based on levels
expect_silent(
sumTableInclude0 <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE"),
colInclude0 = TRUE
)
)
# all combinations are used for the 'total', in correct order:
colAllComb <- expand.grid(unique(data[, c("TRT", "DOSE")]))
colAllComb <- colAllComb[with(colAllComb, order(TRT, DOSE)), ]
sumTableInclude0Total <- subset(sumTableInclude0, isTotal)
expect_equal(
sumTableInclude0Total[, c("TRT", "DOSE")],
colAllComb,
check.attributes = FALSE
)
# statistics for combinations appearing in the data are identical
# than when 'include0' is set to FALSE:
colVarInData <- unique(data[, c("DOSE", "TRT")])
sumTableBase <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE")
)
expect_equal(
merge(colVarInData, sumTableInclude0)[, colnames(sumTableBase)],
sumTableBase,
check.attributes = FALSE
)
# statistics for combinations NOT appearing in the data are empty
sumTableNotInData <- dplyr::anti_join(sumTableInclude0, colVarInData)
statsCont <- c("statMean", "statSD", "statSE", "statMedian", "statMin", "statMax")
expect_true(all(is.na(sumTableNotInData[, statsCont])))
expect_true(all(sumTableNotInData[, c("statN", "statm", "statPercTotalN")] == 0))
expect_true(all(is.nan(sumTableNotInData[, c("statPercN")])))
})
test_that("Specific combinations of column elements are included when requested", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = factor(rep(c("A", "B"), each = 3)),
DOSE = factor(rep(c("100", "200"), each = 3)),
stringsAsFactors = FALSE
)
# sum table with combinations appearing in data
sumTableBase <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE")
)
## if other groups should be included, 'colVarDataLevels' can be used
# different doses for all treatment and non alphabetical order:
colVarDataLevels <- data.frame(
TRT = factor(
rep(c("A", "B", "C"), length.out = 5),
levels = c("C", "A", "B")
),
DOSE = factor(
c("100", "200", "600", "400", "100")
)
)
expect_silent(
sumTable <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE"),
colVarDataLevels = colVarDataLevels
)
)
# all combinations are included for the 'total', in correct order:
expect_equal(
subset(sumTable, isTotal)[, c("TRT", "DOSE")],
colVarDataLevels[with(colVarDataLevels, order(TRT, DOSE)), ],
check.attributes = FALSE
)
# statistics for combinations appearing in the data are identical
# than when 'colVarDataLevels' is not specified:
colVarInData <- unique(data[, c("DOSE", "TRT")])
expect_equal(
merge(colVarInData, sumTable)[, colnames(sumTableBase)],
sumTableBase,
check.attributes = FALSE
)
# statistics for combinations NOT appearing in the data are empty
sumTableNotInData <- dplyr::anti_join(sumTable, colVarInData)
statsCont <- c("statMean", "statSD", "statSE", "statMedian", "statMin", "statMax")
expect_true(all(is.na(sumTableNotInData[, statsCont])))
expect_true(all(sumTableNotInData[, c("statN", "statm", "statPercTotalN")] == 0))
expect_true(all(is.nan(sumTableNotInData[, c("statPercN")])))
})
test_that("A warning is generated if specified combinations of column elements are not available in the data", {
data <- data.frame(
USUBJID = seq.int(6),
AGE = seq(20, 62, length.out = 6),
TRT = factor(rep(c("A", "B"), each = 3)),
DOSE = factor(rep(c("100", "200"), each = 3)),
stringsAsFactors = FALSE
)
## missing groups in colVarDataLevels
colVarDataLevels <- data.frame(
TRT = factor(c("A", "B")),
DOSE = c("0", "200")
)
expect_warning(
sumTable <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE"),
colVarDataLevels = colVarDataLevels
),
"Some variable records are not present in the data used for variable levels."
)
})
test_that("Columns in the row total data but not in the data are correctly included", {
dataAll <- data.frame(
USUBJID = rep(c(1:6), each = 2),
TRT = rep(c("A", "B"), each = 6),
COD = rep(c("Term1", "Term2", "Term3"), each = 4),
stringsAsFactors = FALSE
)
data <- subset(dataAll, TRT == "A")
dataTotalRow <- list(COD = dataAll)
expect_silent(
summaryTable <- computeSummaryStatisticsTable(
data = data,
colVar = "TRT",
rowVar = "COD",
rowVarTotalInclude = "COD",
stats = getStats("n (%)"),
dataTotalRow = dataTotalRow
)
)
expect_s3_class(summaryTable, "summaryTable")
expect_identical(levels(summaryTable$TRT), c("A", "B"))
summaryTableGroupOnlyInTotal <- subset(summaryTable, TRT == "B")
expect_equal(nrow(summaryTableGroupOnlyInTotal), 1)
expect_equal(as.character(summaryTableGroupOnlyInTotal$COD), "Total")
expect_equal(summaryTableGroupOnlyInTotal$statN, 3)
expect_equal(summaryTableGroupOnlyInTotal$statm, 6)
})
test_that("The column totals are correctly computed for a subset of the columns", {
data <- data.frame(
USUBJID = seq.int(5),
AGE = seq(20, 62, length.out = 5),
TRT = c("A", "A", "A", "B", "B"),
DOSE = c("100", "100", "200", "300", "400"),
stringsAsFactors = FALSE
)
expect_silent(
sumTable <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE"),
colVarTotal = "TRT"
)
)
expect_equal(sum(sumTable$isTotal), 2)
expect_equal(subset(sumTable, isTotal & TRT == "A")$statN, 3)
expect_true(all(subset(sumTable, TRT == "A")$statPercTotalN == 3))
expect_equal(subset(sumTable, isTotal & TRT == "B")$statN, 2)
expect_true(all(subset(sumTable, TRT == "B")$statPercTotalN == 2))
expect_equal(sumTable$statPercN, sumTable$statN/sumTable$statPercTotalN*100)
})
test_that("The column totals are correctly computed by the variable to summarize", {
data <- data.frame(
USUBJID = seq.int(5),
AGE = seq(20, 62, length.out = 5),
SEX = c(NA_character_, c("F", "M", "F", "M")),
TRT = c("A", "A", "A", "B", "B"),
DOSE = c("100", "100", "200", "300", "400"),
stringsAsFactors = FALSE
)
# by variable
expect_silent(
sumTable <- computeSummaryStatisticsTable(
data,
var = c("AGE", "SEX"),
colVar = c("TRT", "DOSE"),
colVarTotal = "variable"
)
)
expect_equal(sum(sumTable$isTotal), 2)
expect_equal(subset(sumTable, isTotal & variable == "AGE")$statN, 5)
expect_true(all(subset(sumTable, variable == "AGE")$statPercTotalN == 5))
expect_equal(subset(sumTable, isTotal & variable == "SEX")$statN, 4)
expect_true(all(subset(sumTable, variable == "SEX")$statPercTotalN == 4))
expect_equal(sumTable$statPercN, sumTable$statN/sumTable$statPercTotalN*100)
})
test_that("A warning is generated if the column for the totals is not available in the data", {
data <- data.frame(
USUBJID = seq.int(5),
AGE = seq(20, 62, length.out = 5),
TRT = c("A", "A", "A", "B", "B"),
stringsAsFactors = FALSE
)
expect_warning(
computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = "TRT",
colVarTotal = "TRT2"
),
"Variable.* in colVarTotal.* ignored because.*not available"
)
})
test_that("Column percentages are correctly computed for a subset of the columns", {
data <- data.frame(
USUBJID = seq.int(5),
AGE = seq(20, 62, length.out = 5),
TRT = c("A", "A", "A", "B", "B"),
DOSE = c("100", "100", "200", "300", "400"),
stringsAsFactors = FALSE
)
expect_silent(
sumTable <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = c("TRT", "DOSE"),
colVarTotalPerc = "TRT"
)
)
# column total still computed per TRT and DOSE
expect_equal(sum(sumTable$isTotal), 4)
sumTableTotal <- subset(sumTable, isTotal)
expect_equal(
sumTableTotal[match(c("100", "200", "300", "400"), sumTableTotal$DOSE), "statN"],
c(2, 1, 1, 1)
)
# but percentage are computed by treatment
expect_true(all(subset(sumTable, TRT == "A")$statPercTotalN == 3))
expect_true(all(subset(sumTable, TRT == "B")$statPercTotalN == 2))
expect_equal(sumTable$statPercN, sumTable$statN/sumTable$statPercTotalN*100)
})
test_that("Column percentages are correctly computed by the variable to summarize", {
data <- data.frame(
USUBJID = seq.int(5),
AGE = seq(20, 62, length.out = 5),
SEX = c(NA_character_, c("F", "M", "F", "M")),
TRT = c("A", "A", "A", "B", "B"),
DOSE = c("100", "100", "200", "300", "400"),
stringsAsFactors = FALSE
)
# by variable
expect_silent(
sumTable <- computeSummaryStatisticsTable(
data,
var = c("AGE", "SEX"),
colVar = c("TRT", "DOSE"),
colVarTotalPerc = "variable"
)
)
# column total still computed per TRT and DOSE
expect_equal(sum(sumTable$isTotal), 4)
# but percentage are computed by treatment
expect_true(all(subset(sumTable, variable == "AGE")$statPercTotalN == 5))
expect_true(all(subset(sumTable, variable == "SEX")$statPercTotalN == 4))
expect_equal(sumTable$statPercN, sumTable$statN/sumTable$statPercTotalN*100)
})
test_that("A warning is generated if the column for the percentages is not available in the data", {
data <- data.frame(
USUBJID = seq.int(5),
AGE = seq(20, 62, length.out = 5),
TRT = c("A", "A", "A", "B", "B"),
stringsAsFactors = FALSE
)
# wrong specification
expect_warning(
sumTable <- computeSummaryStatisticsTable(
data,
var = "AGE",
colVar = "TRT",
colVarTotalPerc = "TRT2"
),
"Variable.* in colVarTotalPerc.* ignored because.*not available"
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.