Nothing
context("Compute summary statistics")
test_that("The statistics are correctly computed for a continuous variable (including missing)", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
statsCont <- computeSummaryStatistics(data = dataCont, var = "x")
expect_s3_class(statsCont, "data.frame")
statsContNameInt <- c("statN", "statm")
statsContNameNum <- c("statMean", "statSD", "statSE", "statMedian", "statMin", "statMax")
expect_named(
object = statsCont,
expected = c(statsContNameInt, statsContNameNum),
ignore.order = TRUE
)
# check variable type
for(col in statsContNameInt){
expect_type(statsCont[, !!col], type = "integer")
}
for(col in statsContNameNum){
expect_type(statsCont[, !!col], type = "double")
}
# check if computations are correct
expect_equal(statsCont[, "statN"], 4)
expect_equal(statsCont[, "statm"], 4)
expect_equal(statsCont[, "statMean"], 5)
expect_equal(statsCont[, "statMedian"], 4.5)
expect_equal(statsCont[, "statSD"], sd(dataCont$x, na.rm = TRUE))
expect_equal(statsCont[, "statSE"], statsCont[, "statSD"]/sqrt(statsCont[, "statm"]))
expect_equal(statsCont[, "statMin"], 1)
expect_equal(statsCont[, "statMax"], 10)
})
test_that("The statistics are correctly computed for a categorical variable (including missing)", {
dataCat <- data.frame(
x = c(NA_character_, "B", "B", "B", "A"),
USUBJID = seq.int(5),
stringsAsFactors = TRUE
)
statsCat <- computeSummaryStatistics(data = dataCat, var = "x")
expect_s3_class(statsCat, "data.frame")
expect_named(
object = statsCat,
expected = c("variableGroup", "statN", "statm"),
ignore.order = TRUE
)
expect_s3_class(statsCat[, "variableGroup"], "factor")
expect_identical(statsCat[, "variableGroup"], factor(c("A", "B")))
expect_equal(statsCat[, "statm"], c(1, 3))
expect_equal(statsCat[, "statN"], c(1, 3))
})
test_that("Counts are computed on the entire dataset if the variable is not specified", {
data <- data.frame(
x = c("A", "B"),
USUBJID = c("1", "2")
)
expect_equal(
computeSummaryStatistics(data = data, var = NULL),
data.frame(statN = 2, statm = 2)
)
})
test_that("Counts are computed on the entire dataset if the variable is set to 'all'", {
data <- data.frame(
x = c("A", "B"),
USUBJID = c("1", "2")
)
expect_equal(
computeSummaryStatistics(data = data, var = "all"),
data.frame(statN = 2, statm = 2)
)
})
test_that("Counts are computed similarly for empty variable or variable set to 'all'", {
data <- data.frame(
x = c("A", "B"),
USUBJID = c("1", "2")
)
expect_equal(
computeSummaryStatistics(data = data, var = "all"),
computeSummaryStatistics(data = data, var = NULL)
)
})
test_that("An error is generated if a specified variable is not available in the data", {
data <- data.frame(
x = c("A", "B"),
USUBJID = c("1", "2")
)
expect_error(
computeSummaryStatistics(data = data, var = "y"),
"Variable to summarize.*not available in data."
)
})
test_that("An error is generated if the subject variable is not available in the data", {
expect_error(
computeSummaryStatistics(
data = data.frame(x = 1:10),
var = "x",
subjectVar = "blabla"
),
pattern = "Subject variable .* not available"
)
})
test_that("An error is generated if multiple records are available per subject", {
dataCont <- data.frame(x = seq.int(5), USUBJID = c(1, 1, 2, 2, 2))
expect_error(
computeSummaryStatistics(data = dataCont, var = "x"),
pattern = "Extraction of statistics failed .* because multiple different records are available"
)
})
test_that("A warning is generated if multiple records are available per subject when requested", {
dataCont <- data.frame(x = seq.int(5), USUBJID = c(1, 1, 2, 2, 2))
expect_warning(
computeSummaryStatistics(data = dataCont, var = "x", checkVarDiffBySubj = "warning"),
pattern = "multiple different records are available"
)
})
test_that("No check is run for multiple records available per subject if requested", {
dataCont <- data.frame(x = seq.int(5), USUBJID = c(1, 1, 2, 2, 2))
expect_silent(
computeSummaryStatistics(data = dataCont, var = "x", checkVarDiffBySubj = "none")
)
})
test_that("The variable total is correctly included when requested", {
dataCat <- data.frame(
x = c(NA_character_, "B", "B", "B", "A"),
USUBJID = seq.int(5)
)
statsCatWithTotal <- computeSummaryStatistics(
data = dataCat, var = "x", varTotalInclude = TRUE
)
statsCatWthtTotal <- computeSummaryStatistics(
data = dataCat, var = "x", varTotalInclude = FALSE
)
statsCatIncludeTotal <- rbind(
statsCatWthtTotal,
data.frame(variableGroup = "Total", statN = 4, statm = 4)
)
expect_equal(
object = statsCatWithTotal,
expected = statsCatIncludeTotal
)
})
test_that("An error is generated if the type of the extra statistic is not correct", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
expect_error(
computeSummaryStatistics(
data = dataCont,
var = "x",
statsExtra = function(x) x
),
"should be a list",
)
})
test_that("An error is generated if the parameter of the extra statistic function is not correct", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
expect_error(
computeSummaryStatistics(
data = dataCont,
var = "x",
statsExtra = list(A = function(y) length(y))
),
"'statsExtra' should contain a parameter named 'x' or 'data'"
)
})
test_that("An error is generated if no names are specified for the extra statistics", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
# create example of custom function (coefficient of variation)
statsFctX <- function(x) sd(x, na.rm = TRUE)/mean(x, na.rm = TRUE)*100
expect_error(
computeSummaryStatistics(data = dataCont, var = "x", statsExtra = list(statsFctX)),
"should be named"
)
expect_error(
computeSummaryStatistics(data = dataCont, var = "x", statsExtra = list(A = statsFctX, 2)),
"should be named"
)
})
test_that("An error is generated if the name of the extra statistic is set to a default statistic", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
# create example of custom function (coefficient of variation)
statsFctX <- function(x) sd(x, na.rm = TRUE)/mean(x, na.rm = TRUE)*100
expect_error(
computeSummaryStatistics(
data = dataCont,
var = "x",
statsExtra = list(statN = statsFctX)
),
"choose a different name"
)
})
test_that("Extra statistic is correctly included when specified as a function of a vector", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
# create example of custom function (coefficient of variation)
statsFctX <- function(x) sd(x, na.rm = TRUE)/mean(x, na.rm = TRUE)*100
statsExtraFctX <- computeSummaryStatistics(
data = dataCont,
var = "x",
statsExtra = list(A = statsFctX)
)
expect_true("A" %in% names(statsExtraFctX))
expect_equal(statsExtraFctX$A, statsFctX(dataCont$x))
})
test_that("Extra statistic is correctly included when specified as a function of the data", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
# create example of custom function (coefficient of variation)
statsFctData <- function(data) sd(data$x, na.rm = TRUE)/mean(data$x, na.rm = TRUE)*100
statsExtraFctData <- computeSummaryStatistics(
data = dataCont,
var = "x",
statsExtra = list(B = statsFctData)
)
expect_true("B" %in% names(statsExtraFctData))
expect_equal(statsExtraFctData$B, statsFctData(dataCont))
})
test_that("An error is generated if the type of table is continuous whereas the variable is categorical", {
dataCat <- data.frame(
x = c(NA_character_, "B", "B", "B", "A"),
USUBJID = seq.int(5)
)
expect_error(
computeSummaryStatistics(
data = dataCat,
var = "x",
type = "summaryTable"
),
"should be numeric"
)
})
test_that("The type of table is correct for a categorical variable by default (or auto)", {
dataCat <- data.frame(
x = c(NA_character_, "B", "B", "B", "A"),
USUBJID = seq.int(5)
)
expect_identical(
computeSummaryStatistics(data = dataCat, var = "x"),
computeSummaryStatistics(data = dataCat, var = "x", type = "countTable")
)
expect_identical(
computeSummaryStatistics(data = dataCat, var = "x", type = "auto"),
computeSummaryStatistics(data = dataCat, var = "x", type = "countTable")
)
})
test_that("The type of table is correct for a continuous variable by default (or auto)", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
expect_identical(
computeSummaryStatistics(data = dataCont, var = "x"),
computeSummaryStatistics(data = dataCont, var = "x", type = "summaryTable")
)
expect_identical(
computeSummaryStatistics(data = dataCont, var = "x", type = "auto"),
computeSummaryStatistics(data = dataCont, var = "x", type = "summaryTable")
)
})
test_that("A count table is successfully extracted for a continuous variable", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = seq.int(5)
)
expect_error(
computeSummaryStatistics(
data = dataCont,
var = "x",
type = "countTable"
),
NA
)
})
test_that("An error is generated if the variable is not specified for a continuous table", {
dataCont <- data.frame(
x = c(1, 3),
USUBJID = c("a", "b")
)
expect_error(
computeSummaryStatistics(
data = dataCont,
var = NULL,
type = "summaryTable"
),
"Variable.*should be specified.*for a summary table"
)
})
test_that("An error is generated if the table type is not count in case the variable is set to 'all'", {
dataCont <- data.frame(
x = c(1, 3),
USUBJID = c("a", "b")
)
expect_error(
computeSummaryStatistics(
data = dataCont,
var = "all",
type = "summaryTable"
),
"'type' should be set to 'countTable'"
)
})
test_that("A continuous variable with only missing values is correctly filtered by default", {
# variable is considered empty if all missing:
emptyDataNA <- data.frame(
x = rep(NA_real_, 5),
USUBJID = seq.int(5)
)
expect_null(
computeSummaryStatistics(data = emptyDataNA, var = "x")
)
})
test_that("A continuous variable with only missing values is correctly included when requested", {
# variable is considered empty if all missing:
emptyDataNA <- data.frame(
x = rep(NA_real_, 5),
USUBJID = seq.int(5)
)
statsEmptyDataNA <- computeSummaryStatistics(
data = emptyDataNA,
var = "x",
filterEmptyVar = FALSE
)
expect_equal(
statsEmptyDataNA,
data.frame(
statN = 0, statm = 0, statMean = NA_real_,
statSD = NA_real_, statSE = NA_real_, statMedian = NA_real_,
statMin = NA_real_, statMax = NA_real_
)
)
})
test_that("An empty continuous variable is correctly included when requested", {
emptyData <- data.frame(x = numeric(), USUBJID = character())
statsEmptyData <- computeSummaryStatistics(
data = emptyData,
var = "x",
filterEmptyVar = FALSE
)
expect_equal(
statsEmptyData,
data.frame(
statN = 0, statm = 0, statMean = NA_real_,
statSD = NA_real_, statSE = NA_real_, statMedian = NA_real_,
statMin = NA_real_, statMax = NA_real_
)
)
})
test_that("Missing values are correctly filtered from a categorical variable with only missing values by default", {
dataCat <- data.frame(
x = factor(
c(NA_character_, "B", "B", "B", "A"),
levels = c("A", "B", "C")
),
USUBJID = seq.int(5)
)
sumTable <- computeSummaryStatistics(
data = dataCat,
var = "x",
filterEmptyVar = TRUE
)
expect_equal(sumTable$variableGroup, factor(c("A", "B"), levels = c("A", "B", "C")))
})
test_that("Missing values are correctly included from a categorical variable when requested", {
dataCat <- data.frame(
x = factor(
c(NA_character_, "B", "B", "B", "A"),
levels = c("A", "B", "C")
),
USUBJID = seq.int(5)
)
statsCatNoFilter <- computeSummaryStatistics(data = dataCat, var = "x", filterEmptyVar = FALSE)
expect_equal(nrow(statsCatNoFilter), 3)
expect_true("C" %in% statsCatNoFilter$variableGroup)
expect_equal(subset(statsCatNoFilter, variableGroup == "C")$statN, 0)
expect_equal(subset(statsCatNoFilter, variableGroup == "C")$statm, 0)
})
test_that("An empty summary table is correctly returned by default if the input data is empty", {
emptyData <- do.call(
data.frame,
list(
x = factor(character(), levels = c("a", "b")),
USUBJID = character()
)
)
# by default, nothing is returned
expect_equal(nrow(computeSummaryStatistics(data = emptyData, var = "x")), 0)
})
test_that("A count table is correctly returned if the input data is empty when empty variable should be included", {
# if variable should not be filtered, 0 counts are returned
emptyData <- do.call(
data.frame,
list(
x = factor(character(), levels = c("a", "b")),
USUBJID = character()
)
)
expect_equal(
computeSummaryStatistics(data = emptyData, var = "x", filterEmptyVar = FALSE),
data.frame(variableGroup = c("a", "b"), statN = c(0, 0), statm = c(0, 0), stringsAsFactors = TRUE)
)
})
test_that("A count table is correctly returned if the input data is empty when the empty variable and total of the variable are requested", {
emptyData <- do.call(
data.frame,
list(
x = factor(character(), levels = c("a", "b")),
USUBJID = character()
)
)
expect_equal(
computeSummaryStatistics(
data = emptyData,
var = "x",
filterEmptyVar = FALSE,
varTotalInclude = TRUE
),
data.frame(
variableGroup = factor(
c("a", "b", "Total"),
levels = c(c("a", "b", "Total"))
),
statN = c(0, 0, 0),
statm = c(0, 0, 0)
)
)
})
test_that("A count table is correctly returned if the input data is empty when total of the variable is requested", {
emptyData <- do.call(
data.frame,
list(
x = factor(character(), levels = c("a", "b")),
USUBJID = character()
)
)
expect_equal(
computeSummaryStatistics(
data = emptyData,
var = "x",
varTotalInclude = TRUE
),
data.frame(variableGroup = factor("Total", c("a", "b", "Total")),statN = 0, statm = 0)
)
})
test_that("An error is generated if a continuous variable contains different values for the same subject", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
y = rep(2, 5),
USUBJID = c("A", "B", "B", "C", "D")
)
expect_error(
computeSummaryStatistics(data = dataCont, var = "x"),
"multiple different records of x are available for the same USUBJID"
)
})
test_that("An error is generated containing a specified variable if a continuous variable contains different values for the same subject", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
y = rep(2, 5),
USUBJID = c("A", "B", "B", "C", "D")
)
expect_error(
computeSummaryStatistics(data = dataCont, var = "x", msgVars = "y"),
"multiple different records of x are available for the same USUBJID.* USUBJID y x"
)
})
test_that("An error is generated containing a specified label if a continuous variable contains different values for the same subject", {
dataCont <- data.frame(
x = c(NA, 1, 3, 6, 10),
USUBJID = c("A", "B", "B", "C", "D")
)
expect_error(
computeSummaryStatistics(
data = dataCont,
var = "x",
msgLabel = "test dataset"
),
"multiple different records of x for the test dataset are available for the same USUBJID"
)
})
test_that("A message is generated if a continuous variable contains duplicated values for the same subject", {
dataCont <- data.frame(
x = c(NA, 1, 1, 6, 10),
USUBJID = c("A", "B", "B", "C", "D")
)
expect_message(
computeSummaryStatistics(data = dataCont, var = "x"),
"duplicated values for x"
)
})
test_that("A warning is generated if some subject IDs are missing and a variable is specified", {
data <- data.frame(USUBJID = c(NA_character_, 1), x = 1:2)
expect_warning(
summaryTable <- computeSummaryStatistics(data = data, var = "x"),
regexp = "Missing records.*are not considered for subject counts"
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.