tests/testthat/test_computeSummaryStatisticsTable-stats.R

context("Compute summary statistics table with custom statistics")

test_that("A unique statistic specified as an expression is correctly computed", {
		
	set.seed(123)
	data <- data.frame(AVAL = rnorm(5), USUBJID = seq.int(5))

	expect_silent(
		summaryTableExpr <- computeSummaryStatisticsTable(
			var = "AVAL",
			data = data,
			stats = expression(statMean)
		)
	)
	
	expect_identical(
		attr(summaryTableExpr, "summaryTable")$statsVar, 
		"Statistic"
	)
	expect_equal(
		subset(summaryTableExpr, !isTotal)$Statistic, 
		mean(data$AVAL)
	)
	
})

test_that("A unique statistic specified as a name is correctly compute", {
			
	set.seed(123)
	data <- data.frame(AVAL = rnorm(5), USUBJID = seq.int(5))
	
	# as a 'name' object
	expect_silent(
		summaryTableName <- computeSummaryStatisticsTable(
			var = "AVAL",
			data = data,
			stats = bquote(statMean)
		)
	)
	expect_identical(
		attr(summaryTableName, "summaryTable")$statsVar, 
		"Statistic"
	)
	expect_equal(
		subset(summaryTableName, !isTotal)$Statistic, 
		mean(data$AVAL)
	)

})

test_that("The statistic of interest is specified as a string from the default set", {
			
	set.seed(123)
	data <- data.frame(AVAL = rnorm(5), USUBJID = seq.int(5))
		
	expect_silent(
		summaryTableString <- computeSummaryStatisticsTable(
			var = "AVAL",
			data = data,
			stats = "default"
		)
	)
	
	# specification equivalent to:
	stats <- getStatsData("summary-default", data = data, var = "AVAL")$AVAL
	summaryTableGetStats <- computeSummaryStatisticsTable(
		var = "AVAL",
		data = data,
		stats = stats
	)
	expect_identical(summaryTableString, summaryTableGetStats)
	
})

test_that("An error is generated if the statistic of interest is not available in the default set", {
			
	set.seed(123)
	data <- data.frame(AVAL = rnorm(5), USUBJID = seq.int(5))	
			
	expect_error(
		computeSummaryStatisticsTable(
			var = "AVAL",
			data = data,
			stats = "myStat"
		),
		"should be one of .*summary.*, .*count.*"
	)
			
})

test_that("The statistic of interest as a copy of a default statistic is correctly computed	", {
			
	set.seed(123)
	data <- data.frame(AVAL = rnorm(5),	USUBJID = seq.int(5))	
	expect_silent(
		summaryTable <- computeSummaryStatisticsTable(
			data = data, var = "AVAL", 
			stats = list(statMean = expression(statMean))
		)
	)
	expect_true("statMean" %in% colnames(summaryTable))
	expect_equal(
		subset(summaryTable, !isTotal)$statMean,
		mean(data$AVAL)
	)
			
})

test_that("An error is generated if the statistics has the same name as a default statistic", {
			
	data <- data.frame(AVAL = rnorm(5),	USUBJID = seq.int(5))	
	expect_error(
		computeSummaryStatisticsTable(
			data = data, var = "AVAL", 
			stats = list(statMean = expression(statMean+statSD))
		),
		"statistic name.*is a default name used"
	)
			
})

test_that("A unique set of statistics is correctly computed by variable", {
		
	set.seed(123)
	data <- data.frame(
		AVAL = rnorm(10), 
		CHG = c(NA_real_, rnorm(9)), 
		USUBJID = seq.int(10)
	)	
	
	expect_silent(
		summaryTable <- computeSummaryStatisticsTable(
			var = c("AVAL", "CHG"),
			data = data,
			stats = list(
				AVAL = expression(statMean), 
				CHG = expression(statMedian)
			)
		)
	)
	expect_identical(
		attr(summaryTable, "summaryTable")$statsVar,
		"Statistic"
	)

	expect_equal(subset(summaryTable, variable == "AVAL")$Statistic, mean(data$AVAL))
	expect_equal(subset(summaryTable, variable == "CHG")$Statistic, median(data$CHG, na.rm = TRUE))	
			
})

test_that("Multiple sets of statistics are correctly computed by variable", {
		
	set.seed(123)
	data <- data.frame(
		AVAL = rnorm(10), 
		CHG = c(NA_real_, rnorm(9)), 
		USUBJID = seq.int(10)
	)		
		
	# variable to summarize
	stats <- list(
		AVAL = list(n = expression(statN), `Mean` = expression(statMean)), 
		CHG = list(Median = expression(round(statMedian, 3)))
	)
	expect_silent(
		summaryTable <- computeSummaryStatisticsTable(
			var = c("AVAL", "CHG"),
			data = data,
			stats = stats
		)
	)
	expect_identical(
		attr(summaryTable, "summaryTable")$statsVar,
		c("n", "Median", "Mean"),
		label = "Order of statistics is as specified by variable"
	)
	sumTableAVAL <- subset(summaryTable, variable == "AVAL")[, c("n", "Median", "Mean")]
	expect_equal(unlist(sumTableAVAL), c(n = 10, Median = NA_real_, Mean = mean(data$AVAL)))
	
	sumTableCHG <- subset(summaryTable, variable == "CHG")[, c("n", "Median", "Mean")]
	expect_equal(unlist(sumTableCHG), c(n = NA_integer_, Median = round(median(data$CHG, na.rm = TRUE), 3), Mean = NA_real_))
	
})

test_that("An error is generated if the statistics are not specified for all variables to summarize", {
		
	set.seed(123)
	data <- data.frame(
		AVAL = rnorm(10), 
		CHG = c(NA_real_, rnorm(9)), 
		USUBJID = seq.int(10)
	)	
				
	# in this case, stats should be specified for all variables
	expect_error(
		computeSummaryStatisticsTable(
			var = c("AVAL", "CHG"),
			data = data,
			stats = list(AVAL = list(Mean = expression(statMean)))
		),
		"'stats'.*should be specified for all variables specified in 'var'"
	)
	
})

test_that("An error is generated if the statistics are specified for a variable not available", {
	
	set.seed(123)
	data <- data.frame(
		AVAL = rnorm(10), 
		CHG = c(NA_real_, rnorm(9)), 
		USUBJID = seq.int(10)
	)	
	
	expect_error(
		computeSummaryStatisticsTable(
			var = c("AVAL", "CHG"),
			data = data,
			stats = list(AVAL1 = list(Mean = expression(statMean)))
		),
		"Statistics specified in 'stats' should be.*list named with variable.*"
	)
	
})

test_that("Statistics are correctly computed by each element of a row variable", {
			
	set.seed(123)
	data <- data.frame(
		PARAM = rep(c("ALB", "ALT"), length.out = 10),
		AVAL = rnorm(10), 
		USUBJID = seq.int(10)
	)
	
	# variable to summarize
	stats <- list(
		ALB = list(Mean = expression(statMean)), 
		ALT = list(Median = expression(statMedian))
	)
	
	expect_silent(
		summaryTable <- computeSummaryStatisticsTable(
			var = "AVAL",
			data = data,
			stats = stats,
			rowVar = "PARAM", statsVarBy = "PARAM"
		)
	)
	
	sumTableALT <- subset(summaryTable, PARAM == "ALT")[, c("Median", "Mean")]
	dataALT <- subset(data, PARAM == "ALT")
	expect_equal(
		unlist(sumTableALT), 
		c(Median = median(dataALT$AVAL), Mean = NA_real_)
	)
	
	sumTableALB <- subset(summaryTable, PARAM == "ALB")[, c("Median", "Mean")]
	dataALB <- subset(data, PARAM == "ALB")
	expect_equal(
		unlist(sumTableALB), 
		c(Median = NA_real_, Mean = mean(dataALB$AVAL))
	)
	
})

	
test_that("An error is generated if the variable to compute statistics by is not specified as row or column variable", {
				
	set.seed(123)
	data <- data.frame(
		PARAM = rep(c("ALB", "ALT"), length.out = 10),
		AVAL = rnorm(10), 
		USUBJID = seq.int(10)
	)
	
	stats <- list(
		ALB = list(Mean = expression(statMean)), 
		ALT = list(Median = expression(statMedian))
	)
			
	expect_error(
		computeSummaryStatisticsTable(
			var = "AVAL",
			data = data,
			stats = stats,
			statsVarBy = "PARAM"
		),
		".*not available in.*row or column variables.*"
	)
	
})

test_that("An error is generated if the variable to compute statistics by is not specified but the statistics are specified by group", {
		
	set.seed(123)
	data <- data.frame(
		PARAM = rep(c("ALB", "ALT"), length.out = 10),
		AVAL = rnorm(10), 
		USUBJID = seq.int(10)
	)
			
	stats <- list(
		ALB = list(Mean = expression(statMean)), 
		ALT = list(Median = expression(statMedian))
	)
	
	expect_error(
		computeSummaryStatisticsTable(
			var = "AVAL",
			data = data,
			stats = stats
		),
		"Statistics.*should be.*list named with.*statsVarBy elements.*"
	)
	
})

test_that("Statistics are correctly computed by a specified row/column variable and by each of its element", {
	
	set.seed(123)
	data <- data.frame(
		PARAM = rep(c("ALB", "ALT"), length.out = 10),
		AVAL = rnorm(10), 
		CHG = rnorm(10),
		USUBJID = seq.int(10)
	)
	
	# variable to summarize
	stats <- list(
		AVAL = list(ALB = list(Mean = expression(statMean))), 
		CHG = list(ALT = list(Median = expression(statMedian)))
	)
	
	expect_silent(
		summaryTable <- computeSummaryStatisticsTable(
			var = c("AVAL", "CHG"),
			data = data,
			stats = stats,
			rowVar = "PARAM", statsVarBy = "PARAM"
		)
	)
	
	expect_equal(
		unlist(subset(summaryTable, PARAM == "ALB" & variable == "AVAL", c("Mean", "Median"))), 
		c(Mean = mean(subset(data, PARAM == "ALB")$AVAL), Median = NA_real_)
	)
	expect_identical(
		unlist(subset(summaryTable, PARAM == "ALT" & variable == "AVAL", c("Mean", "Median"))), 
		c(Mean = NA_real_, Median = NA_real_)
	)
	expect_identical(
		unlist(subset(summaryTable, PARAM == "ALB" & variable == "CHG", c("Mean", "Median"))), 
		c(Mean = NA_real_, Median = NA_real_)
	)
	expect_identical(
		unlist(subset(summaryTable, PARAM == "ALT" & variable == "CHG", c("Mean", "Median"))), 
		c(Mean = NA_real_, Median = median(subset(data, PARAM == "ALT")$CHG))
	)
				
})
			

test_that("Custom statistics are correctly computed", {
			
	set.seed(123)
	data <- data.frame(
		AVAL = rnorm(10), 
		USUBJID = seq.int(10)
	)
	
	CV <- function(x) sd(x, na.rm = TRUE)/mean(x, na.rm = TRUE)*100
	
	summaryTable <- computeSummaryStatisticsTable(
		var = "AVAL",
		data = data,
		stats = list(CV = bquote(statCV)),
		statsExtra = list(statCV = CV)
	)
	expect_equal(subset(summaryTable, !isTotal)$CV, CV(data$AVAL))
	
	# format of statsExtra already checked in unit tests
	# for the 'computeSummaryStatistics' function
			
})

test_that("A general label is correctly set for the statistics", {

	set.seed(123)
	data <- data.frame(USUBJID = seq.int(10))	
	
	statLab <- "My custom statistic"
	
	summaryTable <- computeSummaryStatisticsTable(data = data, statsGeneralLab = statLab)
	# general label is stored in table attribute
	# (and set when the table is formatted)
	expect_identical(
		attr(summaryTable, "summaryTable")$rowVarLab,
		c(Statistic = statLab)
	)
			
})

test_that("Percentages are correctly computed on the number of subject by default", {
			
	set.seed(123)
	data <- data.frame(
		USUBJID = c("1", "1", "2", "3", "4"),
		AEDECOD = rep("A", 5)
	)
	
	expect_silent(
		summaryTableStatN <- computeSummaryStatisticsTable(
			var = "AEDECOD",
			data = data,
			statsPerc = "statN"
		)
	)
	summaryTableStatNGroup <- subset(summaryTableStatN, variableGroup == "A")
	expect_equal(summaryTableStatNGroup$statPercTotalN, 4)
	
})


test_that("Percentages are correctly computed on the number of records when requested", {
				
	set.seed(123)
	data <- data.frame(
		USUBJID = c("1", "1", "2", "3", "4"),
		AEDECOD = rep("A", 5)
	)
				
	expect_silent(
		summaryTableStatm <- computeSummaryStatisticsTable(
			var = "AEDECOD",
			data = data,
			statsPerc = "statm"
		)
	)
	summaryTableStatmGroup <- subset(summaryTableStatm , variableGroup == "A")
	expect_equal(summaryTableStatmGroup$statPercTotalm, 5)
	
})

test_that("An error is generated when the statistic specified for percentages is not available", {
			
	set.seed(123)
	data <- data.frame(
		USUBJID = c("1", "1", "2", "3", "4"),
		AEDECOD = rep("A", 5)
	)

	expect_error(
		computeSummaryStatisticsTable(
			var = "AEDECOD",
			data = data,
			statsPerc = "statTest"
		)
	)
			
})

Try the inTextSummaryTable package in your browser

Any scripts or data that you put into this service are public.

inTextSummaryTable documentation built on Sept. 12, 2023, 5:06 p.m.