R/DunnettGLM.R

Defines functions Dunnett.GLM

Documented in Dunnett.GLM

#' @title Dunnett.GLM
#' @description When conducting statistical tests with multiple treatments, such as a control group and
#' increasing concentrations of a test substance, ANOVA and parametric post-hoc tests (e.g. Dunnett's test)
#' are commonly used. However, these tests require the assumptions of homogeneous variances and normally
#' distributed data. For count data (e.g. counts of animals), these assumptions are typically violated,
#' as the data are usually Poisson-distributed. The Dunnett.GLM function is based on a GLM followed by a
#' Dunnett test to the model estimates. It was implemented to serve as an alternative approach to CPCAT
#' while using a Quasi-Poisson regression.
#' The basic approach from Hothorn and Kluxen (2020) was adjusted to overcome methodological issues (see
#' description of 'zero.treatment.action parameter').
#' For details on the structure of the input data, please refer to the dataset 'Daphnia.counts' provided
#' alongside this package.
#' @param groups Group vector
#' @param counts Vector with count data
#' @param control.name Character string with control group name (optional)
#' @param zero.treatment.action Method for dealing with treatments only containing zeros (use either
#' "identity.link" or "log(x+1)"). The method is only used if the data set contains dose/concentration
#' groups that exclusively contain zero values (since the basic method provides for a logarithmic
#' transformation of the data averages, it would lead to incorrect results). To deal with this
#' methodological shortcoming, two options were implemented. The 'identity.link' option: the 'identity'
#' link is used in the GLM instead of the 'log' link, i.e. the data are no longer transformed. The 'log(x+1)'
#' option: The 'log' link is retained and 1 is added to each count value at the start of the procedure so
#' that the subsequent log-transformation can be carried out without any problems. Note that both options
#' may slightly distort the results.
#' @param show.output Show/hide output
#' @return R object with results and information from Dunnett.GLM calculations
#' @references
#' Hothorn, L.; Kluxen, F. (2020): Statistical analysis of no observed effect concentrations or levels in eco-toxicological assays with overdispersed count endpoints. In: bioRxiv, 2020, https://doi.org/10.1101/2020.01.15.907881
#' @examples
#' Daphnia.counts	# example data provided alongside the package
#'
#' # Test Dunnett.GLM with 'identity.link' option
#' Dunnett.GLM(groups = Daphnia.counts$Concentration,
#' 			   counts = Daphnia.counts$Number_Young,
#' 			   control.name = NULL,
#' 			   zero.treatment.action = "identity.link",
#' 			   show.output = TRUE)
#'
#' # Test Dunnett.GLM with 'log(x+1)' option
#' Dunnett.GLM(groups = Daphnia.counts$Concentration,
#'			   counts = Daphnia.counts$Number_Young,
#'			   control.name = NULL,
#'			   zero.treatment.action = "log(x+1)",
#'			   show.output = TRUE)
#' @export
Dunnett.GLM = function(	groups,										# group vector
						counts,										# vector with count data
						control.name = NULL,						# character string with control group name
						zero.treatment.action = "identity.link",	# method for dealing with treatments only containing zeros (alternative: "log(x+1)")
						show.output = TRUE){						# show/hide output

	#library(multcomp)

	# do some error handling
	if (length(groups) != length(counts)) {
		stop("Lengths of groups and counts don't match!")
	}

	if (!is.numeric(counts) | min(counts < 0)) {	 # | !all(counts == floor(counts))
		stop("Counts must be non-negative numeric values!")
	}

	if (zero.treatment.action != "identity.link" & zero.treatment.action != "log(x+1)") {
		stop("Parameter zero.treatment.action must be either \"identity.link\" or \"log(x+1)\"!")
	}

	# setup information to be stored
	info = data.frame(matrix(nrow = 0, ncol = 1))

	# Re-structure the input to a data frame
	dat = data.frame(Counts = counts, Groups = groups)

	# Assign new order of levels if control.name was specified
	if (!is.null(control.name)) {
		if (!is.character(control.name)) {
			stop("Specified control must be provided as a character string!")
		}
		if (!is.element(control.name, unique(dat$Groups))) {
			stop("Specified control cannot be found!")
		}

		# Put desired control in the first place
		dat.temp.1 = dat[dat$Groups == control.name,]
		dat.temp.2 = dat[dat$Groups != control.name,]
		dat = rbind(dat.temp.1, dat.temp.2)
	}

	# Convert groups column to a factor, specifying the desired order of levels
	dat$Groups = factor(dat$Groups, levels = unique(dat$Groups))

	# Use treatments vector for convenience
	treatments = levels(dat$Groups)

	# Exit if not enough data left
	if (dim(stats::na.omit(dat))[1] < 2) {
		stop("Too few valid data!")
	}
	if (dim(dat)[1] != dim(stats::na.omit(dat))[1]) {
		info = rbind(info, paste0(dim(dat)[1] != dim(stats::na.omit(dat))[1], " rows with NA values were excluded!"))
	}
	dat = stats::na.omit(dat)

	agg = stats::aggregate(dat$Counts, by=list(dat$Groups), mean)

	if (min(agg$x) > 0) {
		# Dunnett GLM with quasi-Poisson link-function
		mod = stats::glm(Counts~Groups, data=dat, family=stats::quasipoisson(link = "log"))
	} else {
		info = rbind(info, paste0("A treatment contained only zeros, hence, the zero.treatment.action \"", zero.treatment.action, "\" was applied."))

		# Use either the identity link or a data transformation if one treatment mean is zero (standard log link would fail)
		if (zero.treatment.action == "identity.link") {
			# use another link function (link="identity", i.e. E(Y) = b0 + b1 * X)
			mod = try(stats::glm(Counts~Groups, data=dat, family=stats::quasipoisson(link = "identity")), silent = T)
			if ("try-error" %in% class(mod)){
				stop("Error in GLM calculation. Consider to change argument zero.treatment.action to \"log(x+1)\".")
			}
		} else {
			# modify data to always get positive estimates using log link (link="log", i.e. log(E(Y)) = b0 + b1 * X)
			dat$Counts = dat$Counts + 1
			mod = try(stats::glm(dat$Counts~Groups, data=dat, family=stats::quasipoisson(link = "log")), silent = T)
			if ("try-error" %in% class(mod)){
				stop("Error in GLM calculation. Consider to change argument zero.treatment.action to \"identity.link\".")
			}
		}
	}

	results = summary(multcomp::glht(mod, linfct = multcomp::mcp(Groups = "Dunnett"), alternative="two.sided"))

	# Set header for information object
	colnames(info) = "Information and warnings:"

	# Provide output
	if (show.output) {
		print(structure(list('Results' = results, Info=info)), row.names = F, quote = F, right = F)
	} else {
		invisible(structure(list('Results' = results, Info=info)))
	}
}

Try the qountstat package in your browser

Any scripts or data that you put into this service are public.

qountstat documentation built on April 4, 2025, 12:18 a.m.