R/getFit.R

Defines functions getFit

Documented in getFit

#' Fit Infectivity Results with Binomial GLM Model
#'
#' Fit the results in a \code{data.frame} produced by either \code{\link{tally}} or
#' \code{\link{score}}.
#'
#' @param obj a \code{data.frame} produced by \code{\link{tally}}
#'    or by \code{\link{score}}.
#' @param by the name of the variable in the results \code{data.frame}
#'   that will be used to split the results before fitting
#'   (see details.)
#'
#' @details
#' This function will fit a GLM binomial model using a two-column response
#' where the fit is weighted by the total numbers of cases. Data are fit by
#' the \code{\link{glm}} function using the \code{binomial} \code{\link{family}}
#' with either the default \code{"logit"} \emph{or} the complementary log-log
#' \code{"cloglog"} link function. The \code{"logit"} link is
#' appropriate for symmetrical data. The \code{"cloglog"} link is better for
#' asymmetrical data. The best fit will be chosen by Akaike's criteria.
#'
#' If the parameter \code{by} is specified, the data frame will first be split
#' by this variable after coercion to a \code{factor}. In previous versions,
#' this option only accepted "columns" or "rows" as a potential variable. 
#'
#' The argument \code{obj} must be produced either by \code{\link{tally}} or by
#' \code{\link{score}}. If a variable named \code{"positive"} is present, data
#' are assumed to have been produced by \code{\link{score}} and the function
#' \code{\link{tally}} will be called before proceeding. If variables named 
#' \code{"pos"} and \code{"neg"} are present, the data are assumed to be the
#' product of \code{\link{tally}}. In the case of a data frame produced by
#' \code{\link{score}}, the data must have a variable named \code{"moi"} or
#' \code{"x"} representing the multiplicity of infection. If \bold{both}
#' \code{"x"} and \code{"moi"} are present, an attempt will be made to choose
#' the more informative value and a warning will be issued. Data produced by
#' \code{\link{tally}} will always have a variable named \code{"x"} as the
#' independent variable representing the multiplicity in the \code{glm} model.
#'
#' @return
#'
#' A fitted \code{glm} model \emph{or} list of such models. Each fitted model
#' has the unit of measure attached as an attribute with the text value from
#' \code{unit}. The \code{link} function will either be "logit" or "cloglog" 
#' and can be identified with the \code{\link{family}} function. Note that
#' the fitted model uses the \code{log} of the moi values for fitting.
#'
#' @export
#'
getFit <- function(obj, by)
{
	if (missing(obj))
		stop("getFit requires a data.frame from tally() or score()")
	if (all(c("pos", "neg") %in% names(obj)))	# data.frame produced by tally()
		res <- obj
	else if ("positive" %in% names(obj))	# data.frame from score()
		res <- tally(obj)
	else
		stop(deparse(substitute(obj)), " has not been generated by tally() or score()")

# extract object name
	objName <- deparse(substitute(obj))
	
# assign the multiplicity to either "moi" or "x" 
# "moi" will be changed to "x" internally
	sel <- grep("^x$|^moi$", names(res),ignore.case = TRUE)
	if (length(sel) > 2)
		stop(objName, ' has too many variables named "x" and/or "moi"')
	else if (length(sel) == 2) {
		n1 <- length(unique(res[[sel[1]]]))
		n2 <- length(unique(res[[sel[2]]]))
		if (n1 == n2)
			stop('unable to choose between "x" or "moi" in ', objName)
		else if (n1 > n2)
			names(res)[sel[1]] <- "x"
		else
			names(res)[sel[2]] <- "x"
	}
	else if (length(sel) == 1)
		names(res)[sel] <- "x"
	else
		stop(objName, " requires either 'moi' or 'x' as the multiplicity")
		
# check for unit string
	if ("unit" %in% names(res))
		unit <- as.character(res$unit[1])
	else
		unit <- "none"

# split data with 'by'
	if (!missing(by))
		spl <- split(res, as.factor(res[[by]]))
	else
		spl <- list(res)

# return value is list
	fmList <- lapply(spl, function(dat) {
		fm1 <- tryCatch(glm(cbind(pos, neg) ~ log(x), data = dat, subset = x > 0,
			family = binomial("logit")), error = function(e) NA)
		fm2 <- tryCatch(glm(cbind(pos, neg) ~ log(x), data = dat, subset = x > 0,
			family = binomial("cloglog")), error = function(e) NA)
		if (!"try-error" %in% class(fm1) & "try-error" %in% class(fm2))
			return (fm1)
		else if ("try-error" %in% class(fm1) & !"try-error" %in% class(fm2))
			return (fm2)
		else if ("try-error" %in% class(fm1) && "try-error" %in% class(fm2))
			return (NULL)
		else if (AIC(fm1) < AIC(fm2) - 5)
			return(fm1)
		else
			return(fm2)
	})

# assign unit attribute
	for (i in seq_along(fmList))
		attr(fmList[[i]], "unit") <- unit 
	
# return fit or list of fits
	if (length(fmList) == 1)
		return(fmList[[1]])
	else 
		return(fmList)
}
ornelles/virustiter documentation built on Oct. 13, 2024, 7:58 a.m.