Nothing
#' Conditional count by ordinal tests for association.
#'
#' \code{countbot} tests for independence between an ordered categorical
#' variable, \var{X}, and a count variable, \var{Y}, conditional on other variables,
#' \var{Z}. The basic approach involves fitting an ordinal model of \var{X} on
#' \var{Z}, a Poisson or Negative Binomial model of \var{Y} on \var{Z}, and then determining whether there is any
#' residual information between \var{X} and \var{Y}. This is done by
#' computing residuals for both models, calculating their correlation, and
#' testing the null of no residual correlation. This procedure is analogous to test statistic
#' \code{T2} in \code{cobot}. Two test statistics (correlations) are currently output. The first
#' is the correlation between probability-scale residuals. The second is the correlation between
#' the Pearson residual for the count outcome model and a latent variable residual
#' for the ordinal model (Li C and Shepherd BE, 2012).
#'
#' Formula is specified as \code{\var{X} | \var{Y} ~ \var{Z}}.
#' This indicates that models of \code{\var{X} ~ \var{Z}} and
#' \code{\var{Y} ~ \var{Z}} will be fit. The null hypothesis to be
#' tested is \eqn{H_0 : X}{H0 : X} independent of \var{Y} conditional
#' on \var{Z}. The ordinal variable, \code{\var{X}}, must precede the \code{|} and be a factor variable, and \code{\var{Y}} must be an integer.
#' @references Li C and Shepherd BE (2012)
#' A new residual for ordinal outcomes.
#' \emph{Biometrika}. \bold{99}: 473--480.
#' @references Shepherd BE, Li C, Liu Q (2016)
#' Probability-scale residuals for continuous, discrete, and censored data.
#' \emph{The Canadian Journal of Statistics}. \bold{44}: 463--479.
#'
#' @param formula an object of class \code{\link{Formula}} (or one
#' that can be coerced to that class): a symbolic description of the
#' model to be fitted. The details of model specification are given
#' under \sQuote{Details}.
#'
#' @param data an optional data frame, list or environment (or object
#' coercible by \code{\link{as.data.frame}} to a data frame)
#' containing the variables in the model. If not found in
#' \code{data}, the variables are taken from
#' \code{environment(formula)}, typically the environment from which
#' \code{countbot} is called.
#' @param link.x The link family to be used for the ordinal model of
#' \var{X} on \var{Z}. Defaults to \samp{logit}. Other options are
#' \samp{probit}, \samp{cloglog},\samp{loglog}, and \samp{cauchit}.
#'
#' @param fit.y The error distribution for the count model of \var{Y} on \var{Z}.
#' Defaults to \samp{poisson}. The other option is \samp{negative binomial}.
#' If \samp{negative binomial} is specified, \code{\link[MASS]{glm.nb}} is called to fit the count model.
#' @param subset an optional vector specifying a subset of
#' observations to be used in the fitting process.
#'
#' @param na.action action to take when \code{NA} present in data.
#'
#' @param fisher logical indicating whether to apply fisher transformation to compute confidence intervals and p-values for the correlation.
#'
#' @param conf.int numeric specifying confidence interval coverage.
#'
#' @return object of \samp{cocobot} class.
#' @export
#' @importFrom stats qlogis qnorm qcauchy integrate
#' @examples
#' data(PResidData)
#' countbot(x|c ~z, fit.y="poisson",data=PResidData)
#' countbot(x|c ~z, fit.y="negative binomial",data=PResidData)
countbot <- function(formula, data, link.x=c("logit", "probit","loglog", "cloglog", "cauchit"),
fit.y=c("poisson", "negative binomial"),
subset, na.action=getOption('na.action'),
fisher=TRUE, conf.int=0.95) {
# Construct the model frames for x ~ z and y ~ z
F1 <- Formula(formula)
Fx <- formula(F1, lhs=1)
Fy <- formula(F1, lhs=2)
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action",
"offset"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf$na.action <- na.action
# We set xlev to a benign non-value in the call so that it won't get partially matched
# to any variable in the formula. For instance a variable named 'x' could possibly get
# bound to xlev, which is not what we want.
mf$xlev <- integer(0)
mf[[1L]] <- as.name("model.frame")
mx <- my <- mf
# NOTE: we add the opposite variable to each model frame call so that
# subsetting occurs correctly. Later we strip them off.
mx[["formula"]] <- Fx
yName <- all.vars(Fy[[2]])[1]
mx[[yName]] <- Fy[[2]]
my[["formula"]] <- Fy
xName <- all.vars(Fx[[2]])[1]
my[[xName]] <- Fx[[2]]
mx <- eval(mx, parent.frame())
mx[[paste('(',yName,')',sep='')]] <- NULL
my <- eval(my, parent.frame())
my[[paste('(',xName,')',sep='')]] <- NULL
data.points <- nrow(mx)
if (!is.factor(mx[[1]])){
warning("Coercing ",names(mx)[1]," to factor. Check the ordering of categories.")
mx[[1]] <- as.factor(mx[[1]])
}
if (is.factor(my[[1]])){
stop(names(my)[1]," cannot be a factor.")
}
# Construct the model matrix z
mxz <- model.matrix(attr(mx,'terms'),mx)
zzint <- match("(Intercept)", colnames(mxz), nomatch = 0L)
if(zzint > 0L) {
mxz <- mxz[, -zzint, drop = FALSE]
}
myz <- model.matrix(attr(my,'terms'),my)
zzint <- match("(Intercept)", colnames(myz), nomatch = 0L)
if(zzint > 0L) {
myz <- myz[, -zzint, drop = FALSE]
}
score.xz <- ordinal.scores(mx, mxz,method=link.x[1])
if (fit.y[1]=="poisson")
score.yz <- poisson.scores(y=model.response(my), X=myz)
else if (fit.y[1]=="negative binomial")
score.yz <- nb.scores(y=model.response(my), X=myz)
else stop("fit.y has to be 'poisson' or 'negative binomial'")
npar.xz = dim(score.xz$dl.dtheta)[2]
npar.yz = dim(score.yz$dl.dtheta)[2]
xx = as.integer(model.response(mx))
nx = length(table(xx))
N = length(xx)
low.x = cbind(0, score.xz$Gamma)[cbind(1:N, xx)]
hi.x = cbind(1-score.xz$Gamma, 0)[cbind(1:N, xx)]
xz.presid <- low.x - hi.x
xz.dpresid.dtheta <- score.xz$dlow.dtheta - score.xz$dhi.dtheta
## return value
ans <- list(
TS=list(),
fisher=fisher,
conf.int=conf.int,
data.points=data.points
)
tb = corTS(xz.presid, score.yz$presid,
score.xz$dl.dtheta, score.yz$dl.dtheta,
score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta,
xz.dpresid.dtheta, score.yz$dpresid.dtheta,fisher)
tb.label = "PResid vs. PResid"
ans$TS$TB <-
list(
ts=tb$TS, var=tb$var.TS, pval=tb$pval.TS,
label = tb.label
)
rij <- cbind(score.xz$Gamma, 1)[cbind(1:N, xx)]
rij_1 <- cbind(0,score.xz$Gamma)[cbind(1:N, xx)]
pij <- rij-rij_1
G.inverse <- switch(link.x[1], logit = qlogis, probit = qnorm,
cloglog = qgumbel, cauchit = qcauchy)
xz.latent.resid <- rep(NA, N)
inverse_fail <- FALSE
for (i in 1:N){
tmp <- try(integrate(G.inverse, rij_1[i], rij[i])$value/pij[i],silent=TRUE)
if (inherits(tmp,'try-error')){
if (link.x[1] != 'cauchit')
warning("Cannot compute latent variable residual.")
else
warning("Cannot compute latent variable residual with link function cauchit.")
inverse_fail <- TRUE
break
} else {
xz.latent.resid[i] <- tmp
}
}
if (!inverse_fail){
### To compute dlatent.dtheta (need dgamma.dtheta and dp0.dtheta from ordinal scores)
xz.dlatent.dtheta = dpij.dtheta = matrix(, npar.xz, N)
drij_1.dtheta <- score.xz$dlow.dtheta
drij.dtheta <- -score.xz$dhi.dtheta
for(i in 1:N) {
dpij.dtheta[,i] <- score.xz$dp0.dtheta[i, xx[i],]
if (xx[i] == 1) {
xz.dlatent.dtheta[,i] <- -xz.latent.resid[i]/pij[i]*dpij.dtheta[,i] + 1/pij[i]*(
G.inverse(rij[i])*drij.dtheta[,i] - 0 )
} else if(xx[i] == nx){
xz.dlatent.dtheta[,i] <- -xz.latent.resid[i]/pij[i]*dpij.dtheta[,i] + 1/pij[i]*(
0 - G.inverse(rij_1[i])*drij_1.dtheta[,i] )
} else
xz.dlatent.dtheta[,i] <- -xz.latent.resid[i]/pij[i]*dpij.dtheta[,i] + 1/pij[i]*(
G.inverse(rij[i])*drij.dtheta[,i] - G.inverse(rij_1[i])*drij_1.dtheta[,i])
}
### latent.resid vs pearson resid
tc <- corTS(xz.latent.resid, score.yz$pearson.resid,
score.xz$dl.dtheta, score.yz$dl.dtheta,
score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta,
xz.dlatent.dtheta, score.yz$dpearson.resid.dtheta, fisher)
ans$TS$TC <-
list(
ts=tc$TS, var=tc$var.TS, pval=tc$pval.TS,
label = 'Latent.resid vs. Pearson.resid'
)
}
ans <- structure(ans, class="cocobot")
# Apply confidence intervals
for (i in seq_len(length(ans$TS))){
ts_ci <- getCI(ans$TS[[i]]$ts,ans$TS[[i]]$var,ans$fisher,conf.int)
ans$TS[[i]]$lower <- ts_ci[,1]
ans$TS[[i]]$upper <- ts_ci[,2]
}
return(ans)
}
#### example
## generate count by ordinal data
## generate.data3 = function(alphax, betax, alphay, betay, eta, N) {
## z = rnorm(N,0,1)
## x = y = numeric(N)
## ## px is an N x length(alphax) matrix.
## ## Each row has the TRUE cummulative probabilities for each subject.
## px = (1 + exp(- outer(alphax, betax*z, "+"))) ^ (-1)
## aa = runif(N)
## for(i in 1:N)
## x[i] = sum(aa[i] > px[,i])
## x = as.numeric(as.factor(x))
## y = rpois(N, exp(outer(alphay, betay*z+eta[x], "+")))
## return(list(x=as.factor(x), y=y, z=z))
## }
## set.seed(13)
## alphax = c(-1, 0, 1, 2)
## betax = 1
## alphay = 1
## betay = -.5
## #eta = rep(0, 5)
## eta = c(1:5)/20
## N = 200
## data <- generate.data3(alphax, betax, alphay, betay, eta, N)
## #### check for cocobot
## cocobot(x|y~z, data=data)
## countbot(x|y~z, data=data, fisher=TRUE)
## countbot(x|y~z, data=data, family="negative binomial")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.