probit: Item response analysis with random effects.

Documented in simulate.probit

#' update method for probit-class
#'
#' @description
#' simulate method for \code{probit}-object as generated by \code{\link{probit}}.
#'
#'
#' @param object Object of class \code{probit}.
#' @param nsim Number of response vectors to simulate. Defaults to \code{1}.
#' @param seed An object specifying if and how the random number generator should be initialized (‘seeded’). Defaults to \code{NULL}. See \code{\link{simulate}} for more information.
#' @param re.form (\code{formula}, \code{NULL}, or \code{NA}) specify which random effects to include in simulation. Defaults to \code{NULL} in which case all random effects are included. If \code{NA} or \code{~0} no random effects are included.
#' @param re.keep Keep realizations of random effects in the model? Defaults to \code{TRUE}. If \code{FALSE} and also \code{nsim > 1}, then simulations will be stored as lists of vectors.
#' @param make.ordinal Should ordinal responses be categorized from their numerical value? Defaults to \code{TRUE}.
#' @param data Date frame with data on the wide format. Defaults to \code{data=NULL}, which corresponds to \code{data} taken from the call object.
#'
#' @return \code{\link{probit-class}} object.
#'
#' @export
simulate.probit <- function(object,nsim=1,seed=NULL,re.form=NULL,re.keep=TRUE,make.ordinal=TRUE,data=NULL) {
  # setting-up code for random generator is copied from stats:::simulate.lm
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1)
  if (is.null(seed)) {
    RNGstate <- get(".Random.seed", envir = .GlobalEnv)
  } else {
    R.seed <- get(".Random.seed", envir = .GlobalEnv)
    set.seed(seed)
    RNGstate <- structure(seed, kind = as.list(RNGkind()))
    on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
  }

  # my own code follows below

  # data with explanatory variables for the simulations
  if (is.null(data)) {data <- object$data} else {data <- as_tibble(data)}

#  # simulation.name may not appear in data
#  if ((!is.null(simulation.name)) && is.element(simulation.name,names(data))) stop("simulation.name may not appear as a variable in data")

  # grab parameters
  random.eff   <- sapply(object$random,function(x){all.vars(x[[2]])})
  q            <- length(random.eff)
  new_subjects <- unique(data[[object$subject.name]])
  old_subjects <- unique(object$data[[object$subject.name]])
  items        <- c(object$items.interval,object$items.ordinal)

  # linear parametrization matrix Q such that
  # 1) diagonal elements in parameter indexes = cumsum(1:q)
  # 2) Q Psi = matrix(Q%*%Psi,q,q)
  # 3) Q.mu  = matrix(tildeQ%*%mu,q,q*(q+1)/2)
  Q <- matrix(0,q*q,q*(q+1)/2)
  Q[matrix(1:(q*q),q,q)[upper.tri(matrix(0,q,q),diag=TRUE)],] <- diag(nrow=q*(q+1)/2)
  tildeQ <- matrix(aperm(array(Q,dim=c(q,q,q*(q+1)/2)),c(1,3,2)),q*q*(q+1)/2,q)

  # set-up data matrix with random input
  U <- as_tibble(cbind(rep(new_subjects,each=nsim),matrix(0,length(new_subjects)*nsim,q)),
                 .name_repair = "minimal")
  names(U) <- c(object$subject.name,random.eff)

  # simulate random effects as specified by re.form
  if (is.null(re.form)) re.form <- as.formula(paste("~",paste(random.eff,collapse="+")))
  if ((length(re.form)==1) && (is.na(re.form))) re.form <- ~0
  A <- diag(as.numeric(is.element(random.eff,all.vars(re.form))),nrow=q)
  # simulate know subjects from conditional distribution
  for (s in which(is.element(new_subjects,old_subjects))) {
    U[U[[object$subject.name]]==new_subjects[s],2:(q+1)] <- t(object$mu[s,] + solve(matrix(Q%*%object$psi[s,],q,q),matrix(rnorm(nsim*q),q,nsim)))%*%A
  }
  # simulate unknown subjects from marginal distribution
  for (s in which(!is.element(new_subjects,old_subjects))) {
    U[U[[object$subject.name]]==new_subjects[s],2:(q+1)] <- t(solve(object$Gamma,matrix(rnorm(nsim*q),q,nsim)))%*%A
  }

  # set-up tibble to contain result
  res <- full_join(data,U,by=object$subject.name)
  take.names <- names(res)
  if (!re.keep) take.names <- setdiff(take.names,random.eff)
  res[object$items.ordinal] <- as.numeric(NA)
  res <- pivot_longer(res,all_of(items),names_to = object$item.name, values_to = object$response.name)
  res[[object$item.name]] <- factor(res[[object$item.name]],levels=items)

  # prediction by fixed effects with inserted simulated random effects
  res[object$response.name] <- predict_slim(object$m.fixed,newdata = res)

  # add white noise term
  sd <- sqrt(as.numeric(object$sigma2))
  names(sd) <- items
  res[object$response.name] <- res[object$response.name] + sd[res[[object$item.name]]]*rnorm(nrow(res))

  # pivot_wider
  if (!re.keep) res <- select(res,-all_of(random.eff))
  if ((!re.keep) && (nsim>1)) {
    res <- pivot_wider(res, names_from = object$item.name, values_from = object$response.name,
                       values_fn = list)
  } else {
    res <- pivot_wider(res, names_from = object$item.name, values_from = object$response.name)
  }

  # threshold ordinal variables?
  if (make.ordinal) {
    for (i in object$items.ordinal) {
      my.levels <- object$ordinal.levels[[i]]
      if (is.list(res[[i]])) {
        res[[i]] <- lapply(res[[i]],function(x) {
          ordered(my.levels[1+sapply(x,function(y){sum(y>object$eta[[i]])})],
                            levels=my.levels)
        })
      } else {
        res[[i]] <- ordered(my.levels[1+sapply(res[[i]],function(y){sum(y>object$eta[[i]])})],
                            levels=my.levels)
      }
    }
  }

#  # simplify lists when nsim=1?
#  if ((nsim==1) && simplify) {
#    for (i in object$items.interval) res[[i]] <- unlist(res[[i]])
#    for (i in object$items.ordinal) res[[i]] <- ordered(unlist(res[[i]]))
#  }

  # return result with original order of the variables
  res[,take.names]
}