#' @title The \code{probit} class and some basic methods
#'
#' @name probit-class
#' @description
#' Objects of class \code{probit} as generated by \code{\link{probit}} is a list with entries as specified below.
#' \describe{
#' \item{\code{fixed}}{Model for the fixed effects.}
#' \item{\code{response.name}}{Name of response in internal representation.}
#' \item{\code{weight.name}}{Name of weight in internal representation.}
#' \item{\code{item.name}}{Name of item identifier in the model formula.}
#' \item{\code{items.interval}}{Character vector with names of interval responses.}
#' \item{\code{items.ordinal}}{Character vector with names of ordinal responses.}
#' \item{\code{ordinal.levels}}{List of character vector with levels names of ordinal variables.}
#' \item{\code{subject}}{Name of subject identifier.}
#' \item{\code{random}}{List of mean models for the random effects.}
#' \item{\code{dependence}}{Text string (\code{marginal} or \code{joint}) specifying the used correlation structure.}
#' \item{\code{m.fixed}}{\code{\link[biglm]{biglm}}-object with joint fit of fixed effects. Note that test and confidence intervals on this object does not make direct sense.}
#' \item{\code{sigma2}}{List with estimated variances for the interval responses.}
#' \item{\code{eta}}{List with estimated threshold parameters for the ordinal responses.}
#' \item{\code{m.random}}{List of \code{\link[stats]{lm}}-objects for mean models on the random effects.}
#' \item{\code{Gamma}}{Cholesky factor of estimated inverse variance of random effects.}
#' \item{\code{mu}}{Matrix (subjects, q) of estimated approximate conditional means for the random effects, where q = number of random effects.}
#' \item{\code{psi}}{Matrix (subjects, q*(q+1)/2) of estimated Cholesky factors of approximate inverse conditional variances for the random effects.}
#' \item{\code{B}}{Number of replications for each subject in Monte-Carlo computation in minimization step.}
#' \item{\code{BB}}{Number of replications for each subject in Monte-Carlo computation in maximization step.}
#' \item{\code{logL}}{Guess at \code{log(likelihood)}. Should be compared to \code{sum(F1)}.}
#' \item{\code{F1}}{Vector with subject-wise \code{F1}-values from last minimization step.}
#' \item{\code{pvalue}}{p-value from latest T-test comparing the stochastic optimization.}
#' \item{\code{iter}}{Number of minimization-maximization steps.}
#' \item{\code{code}}{Iteration status: 0=iterations halted due to p-value, 1=iterations reached maximum iterations.}
#' \item{\code{data}}{Tibble with dataset analyzed.}
#' }
#'
#' @param x Object of class \code{probit}.
#' @param object Object of class \code{probit}.
#' @param level Approximative coverage for confidence interval. Defaults to \code{level=0.95}.
#' @param BB Number of simulations per subject in \code{anova} or \code{update}. Defaults to \code{BB=NULL}, which corresponds to \code{BB} taken from the call object.
#' @param digits Number of digits in print of \code{probit_anova} object. Defaults to \code{digits=4}.
#' @param data Date frame with data on the wide format. Defaults to \code{data=NULL}, which corresponds to \code{data} taken from the call object.
#' @param B Number of simulations in minimization step. Defaults to \code{B=NULL}, which corresponds to \code{B} taken from the call object.
#' @param maxit Maximal number of minimization-maximization steps. Defaults to \code{maxit=20}.
#' @param sig.level Significance level at which the iterative stochastic optimizations will be stopped. Defaults to \code{sig.level=0.60}.
#' @param verbose Numeric controlling amount of convergence diagnostics. Default: \code{verbose=0} corresponding to no output.
#'
#' @note \code{anova} re-simulates the underlying responses and random
#' effects for the fixed effects model. Hence the output of the top part
#' of anova table is stochastic.
#'
#' @seealso
#' \code{\link{probit}}
#'
#' @rdname probit-class
#' @export
print.probit <- function(x) {
cat("Probit regression with",length(x$items.interval),"interval items and",length(x$items.ordinal),"ordinal items\n")
cat("Total approximative log(likelihood)=",-sum(x$F1),"\n")
}
#' @rdname probit-class
#' @export
summary.probit <- function(object) {
cat("Probit regression with",length(object$items.interval),"interval items,",length(object$items.ordinal),"ordinal items, and",length(object$random),"random effects.\n")
cat("\n")
cat("Total approximative log(likelihood)=",-sum(object$F1),"via",paste0("(",object$B,",",object$BB,")"),"replications in minimization-maximization steps.\n")
cat("\n")
if (object$dependence=="marginal") {
cat("Variance of random effects (fitted as jointly independent):\n")
} else {
cat("Variance of random effects (fitted as jointly dependent):\n")
}
print(solve(t(object$Gamma)%*%(object$Gamma)))
cat("\n")
if (length(object$items.interval)>0) {
cat("Variances on interval items:\n")
print(unlist(object$sigma2))
cat("\n")
}
if (length(object$items.ordinal)>0) {
cat("Threshold parameters for ordinal items:\n")
print(object$eta)
cat("\n")
}
}
#' @rdname probit-class
#' @export
confint.probit <- function(object,level=0.95) {
bhat <- biglm:::coef.biglm(object$m.fixed)
SE <- sqrt(diag(biglm:::vcov.biglm(object$m.fixed)) * object$BB)
mydf <- (object$m.fixed$df.resid + sum(!is.na(bhat)))/object$BB - sum(!is.na(bhat))
# Coefficient table for fixed effects
my.confint <- data.frame(variable=object$item.name,
estimate=bhat,
CI.lower=bhat+qt((1-level)/2,df=mydf)*SE,
CI.upper=bhat+qt(1-(1-level)/2,df=mydf)*SE,
SE=SE,
t.statistic=bhat/SE,
p.value=2*(1-pt(abs(bhat/SE),df=mydf)))
# Coefficient table for random effects
random.eff <- unlist(lapply(object$random,function(x){all.vars(x[[2]])}))
for (i in random.eff) {
if (length(coef(object$m.random[[i]]))>0) {
tmp <- as.data.frame(cbind(summary(object$m.random[[i]])$coefficients,confint(object$m.random[[i]])))[,c(1,5,6,2,3,4)]
tmp <- cbind(variable=i,tmp)
colnames(tmp) <- colnames(my.confint)
rownames(tmp) <- paste0(rownames(tmp),".",i)
my.confint <- rbind(my.confint,tmp)
}
}
# return result
return(structure(my.confint,class=c("probit_confint","data.frame")))
}
#' @rdname probit-class
#' @export
anova.probit <- function(object,BB=NULL) {
# fit lm-object in order to be able to extract ANOVA table
# take parameters from object
subjects <- unique(object$data[[object$subject.name]])
q <- length(object$random)
items <- c(object$items.interval,object$items.ordinal)
random.eff <- unlist(lapply(object$random,function(x){all.vars(x[[2]])}))
if (is.null(BB)) BB <- object$BB
# design matrix for Cholesky factor of inverse covariance in normal approximation
Q <- matrix(0,q*q,q*(q+1)/2)
Q[matrix(1:(q*q),q,q)[upper.tri(matrix(0,q,q),diag=TRUE)],] <- diag(nrow=q*(q+1)/2)
# set-up data matrix with random input
U <- as_tibble(cbind(rep(subjects,each=BB),matrix(0,length(subjects)*BB,q)),
.name_repair = "minimal")
names(U) <- c(object$subject.name,random.eff)
for (s in 1:length(subjects)) {
U[U[[object$subject.name]]==subjects[s],-1] <- t(object$mu[s,] + solve(matrix(Q%*%object$psi[s,],q,q),matrix(rnorm(BB*q),q,BB)))
}
mydata <- full_join(as_tibble(object$data),U,by=object$subject.name)
# simulate responses for ordinal variables
for (i in object$items.ordinal) {
ii <- !is.na(mydata[[i]]); NN <- sum(ii)
tmp <- tibble(factor(rep(i,NN),levels=items)); names(tmp) <- object$item.name
my.offset <- predict_slim(object$m.fixed,bind_cols(mydata[ii,],tmp))
# simulated underlying normal and insert in mydata
tmp <- as.numeric(mydata[[i]][ii])
mydata[,i] <- rep(as.numeric(NA),nrow(mydata))
mydata[ii,i] <- my.offset + qnorm(
pnorm(c(-Inf,object$eta[[i]])[tmp]-my.offset) +
(pnorm(c(object$eta[[i]],Inf)[tmp]-my.offset) - pnorm(c(-Inf,object$eta[[i]])[tmp]-my.offset))*runif(NN)
)
}
# linear regression
mydata <- pivot_longer(mydata,all_of(items),names_to = object$item.name, values_to = object$response.name)
m.lm <- lm(eval(substitute(update(formula(object$fixed),y~.),list(y=as.name(object$response.name)))),
data=mydata)
# extract and correct anova from fixed effect model
my.anova <- as.data.frame(anova(m.lm))
my.anova["Residuals","Df"] <- (m.lm$df.residual + sum(!is.na(m.lm$coefficients)))/BB - sum(!is.na(m.lm$coefficients))
my.anova[,"Sum Sq"] <- my.anova[,"Sum Sq"]/BB
my.anova[,"Mean Sq"] <- my.anova[,"Sum Sq"]/my.anova[,"Df"]
my.anova[-nrow(my.anova),"F value"] <- my.anova[-nrow(my.anova),"Mean Sq"] / my.anova[nrow(my.anova),"Mean Sq"]
my.anova[-nrow(my.anova),"Pr(>F)"] <- 1-pf(my.anova[-nrow(my.anova),"F value"],df1=my.anova[-nrow(my.anova),"Df"],df2=my.anova[nrow(my.anova),"Df"])
my.anova <- cbind(variable=object$item.name,my.anova)
# extract anova's for random effect models
for (i in random.eff) {
tmp <- cbind(variable=i,as.data.frame(anova(object$m.random[[i]])))
rownames(tmp)[nrow(tmp)] <- paste0("Residuals.",i)
my.anova <- rbind(my.anova,tmp)
}
# return
return(structure(my.anova,class=c("probit_anova","data.frame")))
}
#' @rdname probit-class
#' @export
print.probit_confint <- function(x,digits=4) {
if (!is.null(digits)) x[,-1] <- round(x[,-1],digits=digits)
tmp <- rownames(x)
x <- as.data.frame(apply(x,2,function(y){ifelse(is.na(y),".",as.character(y))})); rownames(x) <- tmp
skip <- data.frame(V1=paste(rep("-",max(8,nchar(x$variable))),collapse=""),
V2=paste(rep("-",max(8,nchar(x$estimate))),collapse=""),
V3=paste(rep("-",max(8,nchar(x$CI.lower))),collapse=""),
V4=paste(rep("-",max(8,nchar(x$CI.upper))),collapse=""),
V5=paste(rep("-",max(2,nchar(x$SE))),collapse=""),
V6=paste(rep("-",max(11,nchar(x$t.statistic))),collapse=""),
V6=paste(rep("-",max(7,nchar(x$p.value))),collapse=""))
rownames(skip) <- ""; colnames(skip) <- colnames(x)
tmp <- which(c("",x$variable)!=c(x$variable,""))
y <- skip; for (i in 2:length(tmp)) {
rownames(skip) <- paste(c(" ",rownames(skip)),collapse="")
y <- rbind(y,x[tmp[i-1]:(tmp[i]-1),],skip)
}
print(y)
}
#' @rdname probit-class
#' @export
print.probit_anova <- function(x,digits=4) {
if (!is.null(digits)) x[,-1] <- round(x[,-1],digits=digits)
tmp <- rownames(x)
x <- as.data.frame(apply(x,2,function(y){ifelse(is.na(y),".",as.character(y))})); rownames(x) <- tmp
skip <- data.frame(V1=paste(rep("-",max(8,nchar(x$variable))),collapse=""),
V2=paste(rep("-",max(2,nchar(x$Df))),collapse=""),
V3=paste(rep("-",max(6,nchar(x$'Sum Sq'))),collapse=""),
V4=paste(rep("-",max(7,nchar(x$'Mean Sq'))),collapse=""),
V5=paste(rep("-",max(7,nchar(x$'F value'))),collapse=""),
V6=paste(rep("-",max(6,nchar(x$'Pr(>F)'))),collapse=""))
rownames(skip) <- ""; colnames(skip) <- colnames(x)
tmp <- which(c("",x$variable)!=c(x$variable,""))
y <- skip; for (i in 2:length(tmp)) {
rownames(skip) <- paste(c(" ",rownames(skip)),collapse="")
y <- rbind(y,x[tmp[i-1]:(tmp[i]-1),],skip)
}
print(y)
}
# #' @rdname probit-class
# #' @export
# update.probit <- function(object,fixed=NULL,random=NULL,dependence=NULL,data=NULL,B=NULL,BB=NULL,maxit=20,sig.level=0.6,verbose=0) {
# # update fixed effects?
# if (is.null(fixed)) {fixed <- formula(object$fixed)} else {
# fixed <- update(formula(object$fixed),fixed)
# }
#
# # update random effects?
# if (is.null(random)) {random <- object$random} else {
# new_random <- object$random
# ranef_update <- sapply(random,function(x){all.vars(x[[2]])})
# random_ii <- match(ranef_update,sapply(new_random,function(x){all.vars(x[[2]])}))
# if (any(is.na(random_ii))) stop("Random effects to be updated must already be present")
# for (i in 1:length(ranef_update)) {
# new_random[[random_ii[i]]] <- update(new_random[[random_ii[i]]],random[[i]])
# }
# random <- new_random
# }
#
# # take present values of dependence, B, BB, data?
# if (is.null(dependence)) {dependence <- object$dependence}
# if (is.null(B)) {B <- object$B}
# if (is.null(BB)) {BB <- object$BB}
# if (is.null(data)) {data <- object$data}
#
# # fix dependence
# if (dependence!="marginal") dependence <- "joint"
#
# # refit and return
# return(MM_probit(maxit,sig.level,verbose,
# fixed,object$response.name,object$item.name,object$items.interval,object$items.ordinal,
# object$subject.name,random,dependence,
# object$m.fixed,object$eta,
# object$mu,object$psi,
# B,BB,
# data))
# }
#' @rdname probit-class
#' @export
predict.probit <- function(object,re.form=TRUE) {
# find subjects and items
subjects <- unique(object$data[[object$subject.name]])
items <- c(object$items.interval,object$items.ordinal)
# find random effects
random.eff <- sapply(object$random,function(x){all.vars(x[[2]])})
# if re.form=TRUE, then include predicted random effects
if (re.form) {
# take prediction random effects
mu <- object$mu
} else {
# use m.random to predict random effects
mu <- matrix(0,length(subjects),ncol(object$mu))
data.short <- object$data %>% group_by(!!as.name(object$subject.name)) %>% slice_head(n=1)
for (i in 1:ncol(object$mu)) {
mu[,i] <- predict(object$m.random[[i]],newdata=data.short)
}
}
# set-up data matrix with predicted random input
U <- as_tibble(cbind(subjects,mu),.name_repair = "minimal")
names(U) <- c(object$subject.name,random.eff)
mydata <- full_join(object$data,U,by=object$subject.name)
for (i in object$items.ordinal) mydata[,i] <- as.numeric(as.matrix(mydata[,i]))
mydata[,items] <- matrix(
predict_slim(object$m.fixed,
pivot_longer(mydata, all_of(items),
names_to = object$item.name, values_to = object$response.name)),
nrow(mydata),length(items), byrow = TRUE)
# return prediction
return(mydata)
}
# Hack: Define generic functions by stealing from emmeans-package.
# Needed to make things work together with future::plan().
setGeneric("recover_data",emmeans:::recover_data)
setGeneric("emm_basis",emmeans:::emm_basis)
#' @rdname probit-class
#' @export
recover_data.probit <- function(object, ...) {
# grab parameters
subjects <- unique(object$data[[object$subject.name]])
random.eff <- sapply(object$random,function(x){all.vars(x[[2]])})
q <- length(random.eff)
# grab predicted random effects
U <- as_tibble(cbind(subjects,object$mu),.name_repair = "minimal")
names(U) <- c(object$subject.name,random.eff)
mydata <- full_join(object$data,U,by=object$subject.name)
# recode ordinal items as numeric
for (i in object$items.ordinal) mydata[[i]] <- as.numeric(mydata[[i]])
# make into long format
mydata <- pivot_longer(mydata,all_of(c(object$items.interval,object$items.ordinal)),
names_to = object$item.name,values_to = object$response.name)
# find data without NA's
tbl <- get_all_vars(delete.response(terms(object$m.fixed)),
mydata[!is.na(mydata[[object$response.name]]),])
attr(tbl, "terms") <- delete.response(terms(object$m.fixed))
attr(tbl, "predictors") <- .all.vars(delete.response(terms(object$m.fixed)))
#attr(tbl, "responses") <- object$response.name
tbl
}
#' @rdname probit-class
#' @export
emm_basis.probit <- function(object, trms, xlev, grid, ...) {
m <- model.frame(trms, grid, na.action = na.pass, xlev = xlev)
X <- model.matrix(trms, m)
bhat <- biglm:::coef.biglm(object$m.fixed)
V <- biglm:::vcov.biglm(object$m.fixed) * object$BB
# PRESUMABLY IS ESTIMABILITY TO BE UPDATED!!!
nbasis <- matrix(NA)
# SO HAVE A LOOK AT THAT!
dfargs <- list(df = (object$m.fixed$df.resid + sum(!is.na(bhat)))/object$BB - sum(!is.na(bhat)))
dffun <- function(k, dfargs) dfargs$df
# Return result
list(X = X, bhat = bhat, nbasis = nbasis, V = V, dffun = dffun, dfargs = dfargs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.