R/R6Model.R
In glmmrBase: Generalised Linear Mixed Models in R

#' A GLMM Model
#'
#' A generalised linear mixed model 
#' @details
#' See \link[glmmrBase]{glmmrBase-package} for a more in-depth guide.
#'
#' The generalised linear mixed model is:
#'
#' \deqn{Y \sim F(\mu,\sigma)}
#' \deqn{\mu = h^-1(X\beta + Zu)}
#' \deqn{u \sim MVN(0,D)}
#'
#' where F is a distribution with scale parameter \deqn{\sigma}, h is a link function, X are the fixed effects with parameters \deqn{\beta}, Z is the random effect design matrix with multivariate Gaussian distributed effects u. 
#' The class provides access to all of the elements of the model above and associated calculations and functions including model fitting, power analysis,
#' and various relevant matrices, including information matrices and related corrections. The object is an R6 class and so can serve as a parent class for extended functionality.
#'
#' The currently supported families (links) are Gaussian (identity, log), Binomial (logit, log, probit, identity), Poisson (log, identity), Gamma (logit, identity, inverse), and Beta (logit).
#' 
#' This class provides model fitting functionality with a variety of stochastic maximum likelihood algorithms with and without restricted maximum likelihood corrections. A fast Laplace approximation is also included. 
#' Small sample corrections are also provided including Kenward-Roger and Satterthwaite corrections. 
#'
#' Many calculations use the covariance matrix of the observations, such as the information matrix, which is used in power calculations and
#' other functions. For non-Gaussian models, the class uses the first-order approximation proposed by Breslow and Clayton (1993) based on the
#' marginal quasilikelihood:
#'
#' \deqn{\Sigma = W^{-1} + ZDZ^T}
#'
#' where _W_ is a diagonal matrix with the GLM iterated weights for each observation equal
#' to, for individual _i_ \eqn{\left( \frac{(\partial h^{-1}(\eta_i))}{\partial \eta_i}\right) ^2 Var(y|u)}
#' (see Table 2.1 in McCullagh and Nelder (1989)). The modification proposed by Zegers et al to the linear predictor to
#' improve the accuracy of approximations based on the marginal quasilikelihood is also available, see `use_attenuation()`.
#'
#' See \href{https://github.com/samuel-watson/glmmrBase/blob/master/README.md}{glmmrBase} for a
#' detailed guide on model specification. A detailed vingette for this package is also available online<doi:10.48550/arXiv.2303.12657>.
#' @references
#' Breslow, N. E., Clayton, D. G. (1993). Approximate Inference in Generalized Linear Mixed Models.
#' Journal of the American Statistical Association<, 88(421), 9–25. <doi:10.1080/01621459.1993.10594284>
#' 
#' McCullagh P, Nelder JA (1989). Generalized linear models, 2nd Edition. Routledge.
#' 
#' McCulloch CE (1997). “Maximum Likelihood Algorithms for Generalized Linear Mixed Models.” 
#' Journal of the American statistical Association, 92(437), 162–170.<doi:10.2307/2291460>
#' 
#' Zeger, S. L., Liang, K.-Y., Albert, P. S. (1988). Models for Longitudinal Data: A Generalized Estimating Equation Approach.
#' Biometrics, 44(4), 1049.<doi:10.2307/2531734>
#' @importFrom Matrix Matrix
#' @export
Model <- R6::R6Class("Model",
                     public = list(
                       #' @field covariance A \link[glmmrBase]{Covariance} object defining the random effects covariance.
                       covariance = NULL,
                       #' @field mean A \link[glmmrBase]{MeanFunction} object, defining the mean function for the model, including the data and covariate design matrix X.
                       mean = NULL,
                       #' @field family One of the family function used in R's glm functions. See \link[stats]{family} for details
                       family = NULL,
                       #' @field weights A vector indicting the weights for the observations.
                       weights = NULL,
                       #' @field trials For binomial family models, the number of trials for each observation. The default is 1 (bernoulli).
                       trials = NULL,
                       #' @field formula The formula for the model. May be empty if separate formulae are specified for the mean and covariance components.
                       formula = NULL,
                       #' @field var_par Scale parameter required for some distributions (Gaussian, Gamma, Beta).
                       var_par = NULL,
                       #' @description
                       #' Sets the model to use or not use "attenuation" when calculating the first-order approximation to
                       #' the covariance matrix.
                       #' @details
                       #' **Attenuation**
                       #' For calculations such as the information matrix, the first-order approximation to the covariance matrix
                       #' proposed by Breslow and Clayton (1993), described above, is used. The approximation is based on the
                       #' marginal quasilikelihood. Zegers, Liang, and Albert (1988) suggest that a better approximation to the
                       #' marginal mean is achieved by "attenuating" the linear predictor. Setting `use` equal to TRUE uses this
                       #' adjustment for calculations using the covariance matrix for non-linear models.
                       #' @param use Logical indicating whether to use "attenuation".
                       #' @return None. Used for effects.
                       use_attenuation = function(use){
                         curr_state <- private$attenuate_parameters
                         if(use){
                           private$attenuate_parameters <- TRUE
                           Model__use_attenuation(private$ptr,TRUE,private$model_type())
                         } else {
                           private$attenuate_parameters <- FALSE
                           Model__use_attenuation(private$ptr,FALSE,private$model_type())
                         }
                         if(private$attenuate_parameters != curr_state){
                           private$genW()
                         }
                       },
                       #' @description
                       #' Return fitted values. Does not account for the random effects. For simulated values based
                       #' on resampling random effects, see also `sim_data()`. To predict the values including random effects at a new location see also
                       #' `predict()`.
                       #' @param type One of either "`link`" for values on the scale of the link function, or "`response`"
                       #' for values on the scale of the response
                       #' @param X (Optional) Fixed effects matrix to generate fitted values
                       #' @param u (Optional) Random effects values at which to generate fitted values
                       #' @param sample Logical. If TRUE then the parameters will be re-sampled from their sampling distribution. Currently only works 
                       #' with existing X matrix and not user supplied matrix X and this will also ignore any provided random effects.
                       #' @param sample_n Integer. If sample is TRUE, then this is the number of samples.
                       #' @return Fitted values as either a vector or matrix depending on the number of samples
                       fitted = function(type="link", X, u, sample= FALSE, sample_n = 100){
                         if(missing(X)){
                           if(!sample){
                             Xb <- self$mean$linear_predictor()
                           } else {
                             Xb <- matrix(NA,nrow=self$n(),ncol=sample_n)
                             b_curr <- Model__get_beta(private$ptr,private$model_type())
                             M <- self$information_matrix()
                             ML <- chol(solve(M))
                             for(i in 1:sample_n){
                               u <- rnorm(length(b_curr))
                               Model__update_beta(private$ptr,b_curr + ML%*%u,private$model_type())
                               Xb[,i] <- drop(Model__xb(private$ptr,private$model_type()))
                             }
                             Model__update_beta(private$ptr,b_curr,private$model_type())
                           }
                         } else {
                           Xb <- X%*%self$mean$parameters 
                         }
                         if(!missing(u) & !sample){
                           Xb <- Xb + self$covariance$Z%*%u
                         }
                         if(type=="response"){
                           Xb <- self$family$linkinv(Xb)
                         }
                         return(Xb)
                       },
                       #' @description
                       #' Generates the residuals for the model
                       #' 
                       #' Generates one of several types of residual for the model. If conditional = TRUE then 
                       #' the residuals include the random effects, otherwise only the fixed effects are included. For type,
                       #' there are raw, pearson, and standardized residuals. For conditional residuals a matrix is returned 
                       #' with each column corresponding to a sample of the random effects.
                       #' @param type Either "standardized", "raw" or "pearson"
                       #' @param conditional Logical indicating whether to condition on the random effects (TRUE) or not (FALSE)
                       #' @return A matrix with either one column is conditional is false, or with number of columns corresponding 
                       #' to the number of MCMC samples.
                       residuals = function(type = "standardized",
                                            conditional = TRUE){
                         if(!private$y_has_been_updated)stop("No data y has been provided")
                         if(!type%in%c("standardized","raw","pearson"))stop("type must be one of standardized, raw, or pearson")
                         rtype_int <- match(type, c("raw","pearson","standardized")) - 1
                         R <- Model__residuals(private$ptr,rtype_int,conditional,private$model_type())
                         return(R)
                       },
                       #' @description 
                       #' Generate predictions at new values
                       #' 
                       #' Generates predicted values using a new data set to specify covariance 
                       #' values and values for the variables that define the covariance function.
                       #' The function will return a list with the linear predictor, conditional 
                       #' distribution of the new random effects term conditional on the current estimates
                       #' of the random effects, and some simulated values of the random effects if requested.
                       #' @param newdata A data frame specifying the new data at which to generate predictions
                       #' @param m Number of samples of the random effects to draw
                       #' @param offset Optional vector of offset values for the new data
                       #' @return A list with the linear predictor, parameters (mean and covariance matrices) for
                       #' the conditional distribution of the random effects, and any random effect samples.
                       predict = function(newdata,
                                          offset = rep(0,nrow(newdata)),
                                          m=0
                       ){
                         preddata <- private$model_data(newdata)
                         out <- Model__predict(private$ptr,as.matrix(preddata),offset,m,private$model_type())
                         return(out)
                       },
                       #' @description
                       #' Create a new Model object. Typically, a model is generated from a formula and data. However, it can also be 
                       #' generated from a previous model fit. 
                       #' @param formula A model formula containing fixed and random effect terms. The formula can be one way (e.g. `~ x + (1|gr(cl))`) or
                       #' two-way (e.g. `y ~ x + (1|gr(cl))`). One way formulae will generate a valid model enabling data simulation, matrix calculation, 
                       #' and power, etc. Outcome data can be passed directly to model fitting functions, or updated later using member function `update_y()`.
                       #' For binomial models, either the syntax `cbind(y, n-y)` can be used for outcomes, or just `y` and the number of trials passed to the argument
                       #' `trials` described below.
                       #' @param covariance (Optional) Either a \link[glmmrBase]{Covariance} object, an equivalent list of arguments
                       #' that can be passed to `Covariance` to create a new object, or a vector of parameter values. At a minimum the list must specify a formula.
                       #' If parameters are not included then they are initialised to 0.5. 
                       #' @param mean (Optional) Either a \link[glmmrBase]{MeanFunction} object, an equivalent list of arguments
                       #' that can be passed to `MeanFunction` to create a new object, or a vector of parameter values. At a minimum the list must specify a formula.
                       #' If parameters are not included then they are initialised to 0.
                       #' @param data A data frame with the data required for the mean function and covariance objects. This argument
                       #' can be ignored if data are provided to the covariance or mean arguments either via `Covariance` and `MeanFunction`
                       #' object, or as a member of the list of arguments to both `covariance` and `mean`.
                       #' @param family A family object expressing the distribution and link function of the model, see \link[stats]{family}. Currently accepts \link[stats]{binomial},
                       #' \link[stats]{gaussian}, \link[stats]{Gamma}, \link[stats]{poisson}, \link[glmmrBase]{Beta}, and \link[glmmrBase]{Quantile}.
                       #' @param var_par (Optional) Scale parameter required for some distributions, including Gaussian. Default is NULL.
                       #' @param offset (Optional) A vector of offset values. Optional - could be provided to the argument to mean instead.
                       #' @param trials (Optional) For binomial family models, the number of trials for each observation. If it is not set, then it will
                       #' default to 1 (a bernoulli model).
                       #' @param weights (Optional) A vector of weights. 
                       #' @param model_fit (optional) A `mcml` model fit resulting from a call to `MCML` or `LA`
                       #' @return A new Model class object
                       #' @seealso \link[glmmrBase]{nelder}, \link[glmmrBase]{MeanFunction}, \link[glmmrBase]{Covariance}
                       #' @examples
                       #' \dontshow{
                       #' setParallel(FALSE) 
                       #' }
                       #' # For more examples, see the examples for MCML.
                       #' 
                       #' #create a data frame describing a cross-sectional parallel cluster
                       #' #randomised trial
                       #' df <- nelder(~(cl(10)*t(5)) > ind(10))
                       #' df$int <- 0
                       #' df[df$cl > 5, 'int'] <- 1
                       #' mod <- Model$new(
                       #'   formula = ~ factor(t) + int - 1 + (1|gr(cl)) + (1|gr(cl,t)),
                       #'   data = df,
                       #'   family = stats::gaussian()
                       #' )
                       #' 
                       #' # We can also include the outcome data in the model initialisation. 
                       #' # For example, simulating data and creating a new object:
                       #' df$y <- mod$sim_data()
                       #'
                       #' mod <- Model$new(
                       #'   formula = y ~ factor(t) + int - 1 + (1|gr(cl)) + (1|gr(cl,t)),
                       #'   data = df,
                       #'   family = stats::gaussian()
                       #' )
                       #'
                       #' # Here we will specify a cohort study
                       #' df <- nelder(~ind(20) * t(6))
                       #' df$int <- 0
                       #' df[df$t > 3, 'int'] <- 1
                       #' 
                       #' des <- Model$new(
                       #'   formula = ~ int + (1|gr(ind)),
                       #'   data = df,
                       #'   family = stats::poisson()
                       #' )
                       #'   
                       #' # or with parameter values specified
                       #'   
                       #' des <- Model$new(
                       #'   formula = ~ int + (1|gr(ind)),
                       #'   covariance = c(0.05),
                       #'   mean = c(1,0.5),
                       #'   data = df,
                       #'   family = stats::poisson()
                       #' )
                       #'
                       #' #an example of a spatial grid with two time points
                       #'
                       #' df <- nelder(~ (x(10)*y(10))*t(2))
                       #' spt_design <- Model$new(formula = ~ 1 + (1|ar0(t)*fexp(x,y)),
                       #'                         data = df,
                       #'                         family = stats::gaussian())
                       initialize = function(formula,
                                             covariance,
                                             mean,
                                             data = NULL,
                                             family = NULL,
                                             var_par = NULL,
                                             offset = NULL,
                                             weights = NULL,
                                             trials = NULL,
                                             model_fit = NULL){
                         if(!is.null(model_fit)){
                           if(!missing(formula) | !missing(covariance) | !missing(mean)) message("Previous model fit has been provided, all other arguments are ignored")
                           self$family <- do.call(model_fit$family, list(link = model_fit$link))
                           if(model_fit$var_par_family){
                             self$var_par <- model_fit$coefficients$est[model_fit$P + model_fit$Q + 1]
                           } else {
                             self$var_par <- 1
                           }
                           self$covariance <- Covariance$new(
                             formula = model_fit$cov_form,
                             parameters = model_fit$coefficients$est[(model_fit$P + 1):(model_fit$P + model_fit$Q)]
                           )
                           self$mean <- MeanFunction$new(
                             formula = model_fit$mean_form,
                             data = model_fit$model_data$data,
                             parameters = model_fit$coefficients$est[1:(model_fit$P)]
                           )
                           self$mean$offset <- model_fit$model_data$offset
                           self$weights <- model_fit$model_data$weights
                           if(self$family[[1]]=="binomial") self$trials <- model_fit$model_data$trials
                         } else {
                           if(is.null(family)){
                             stop("No family specified.")
                           } else {
                             self$family <- family
                           }
                           
                           if(!is.null(var_par)){
                             self$var_par <- var_par
                           } else {
                             self$var_par <- 1
                           }
                           
                           if(!missing(formula)){
                             
                             if(!missing(covariance) && is(covariance,"R6"))stop("Do not specify both formula and Covariance class object")
                             if(!missing(mean) && is(mean,"R6"))stop("Do not specify both formula and MeanFunction class object")
                             form_1 <- private$check_y_formula(formula,data,self$family)
                             processed_data_all <- private$process_data(form_1,data,TRUE,TRUE)
                             self$formula <- Reduce(paste,as.character(processed_data_all$form))
                             if(is.null(data)){
                               stop("Data must be specified with a formula")
                             } else {
                               processed_data_cov <- private$process_data(form_1,data,TRUE,FALSE)
                               if(missing(covariance) || (!all(is(covariance,"numeric")) & !"parameters"%in%names(covariance))){
                                 self$covariance <- Covariance$new(
                                   formula = processed_data_cov$formula
                                 )
                               } else {
                                 if("parameters"%in%names(covariance)){
                                   self$covariance <- Covariance$new(
                                     formula = processed_data_cov$formula,
                                     parameters = covariance$parameters
                                   )
                                 } else if(all(is(covariance,"numeric"))){
                                   self$covariance <- Covariance$new(
                                     formula = processed_data_cov$formula,
                                     parameters = covariance
                                   )
                                 } else {
                                   stop("Cannot interpret covariance argument")
                                 }
                               }
                               if(missing(mean) || (!"parameters"%in%names(mean) & !all(is(mean,"numeric")))){
                                 processed_data <- private$process_data(form_1,data,FALSE,TRUE)
                                 self$mean <- MeanFunction$new(
                                   formula = processed_data$formula,
                                   data = processed_data$data
                                 )
                               } else {
                                 if("parameters"%in%names(mean)){
                                   processed_data <- private$process_data(form_1,data,FALSE,TRUE)
                                   self$mean <- MeanFunction$new(
                                     formula = processed_data$formula,
                                     data = processed_data$data,
                                     parameters = mean$parameters
                                   )
                                 } else if(all(is(mean,"numeric"))){
                                   processed_data <- private$process_data(form_1,data,FALSE,TRUE)
                                   self$mean <- MeanFunction$new(
                                     formula = processed_data$formula,
                                     data = processed_data$data,
                                     parameters = mean
                                   )
                                 } else {
                                   stop("Cannot interpret mean argument")
                                 }
                               }
                             }
                           } else {
                             if(is(covariance,"R6")){
                               if(is(covariance,"Covariance")){
                                 self$covariance <- covariance
                                 if(is.null(covariance$data)){
                                   if(is.null(data)){
                                     stop("No data specified in covariance object or call to function.")
                                   } else {
                                     self$covariance$data <- private$process_data(covariance$formula,data,TRUE,FALSE)$data
                                   }
                                 }
                               } else {
                                 stop("covariance should be Covariance class or parameter vector")
                               }
                             } else {
                               stop("covariance should be Covariance class or parameter vector")
                             } 
                             # if(is(covariance,"list")){
                             #   if(is.null(covariance$formula))stop("A formula must be specified for the covariance")
                             #   if(is.null(covariance$data) & is.null(data))stop("No data specified in covariance list or call to function.")
                             #   self$covariance <- Covariance$new(
                             #     formula= covariance$formula
                             #   )
                             #   if(is.null(covariance$data)){
                             #     self$covariance$data <- private$process_data(self$covariance$formula,data,TRUE,FALSE)$data
                             #   } else {
                             #     self$covariance$data <- covariance$data
                             #   }
                             #   if(!is.null(covariance$parameters))self$covariance$update_parameters(covariance$parameters)
                             # }
                             if(is(mean,"R6")){
                               if(is(mean,"MeanFunction")){
                                 self$mean <- mean
                               } else {
                                 stop("mean should be MeanFunction class or parameter vector")
                               }
                             } else {
                               stop("mean should be MeanFunction class or parameter vector")
                             }
                             
                             #   if(is(mean,"list")){
                             #   if(is.null(mean$formula))stop("A formula must be specified for the mean function.")
                             #   if(is.null(mean$data) & is.null(data))stop("No data specified in mean list or call to function.")
                             #   if(is.null(mean$data)){
                             #     processed_data <- private$process_data(form_1,data,TRUE,FALSE)
                             #     self$mean <- MeanFunction$new(
                             #       formula = processed_data$formula,
                             #       data = processed_data$data
                             #     )
                             #   } else {
                             #     self$mean <- MeanFunction$new(
                             #       formula = mean$formula,
                             #       data = mean$data
                             #     )
                             #   }
                             #   if(!is.null(mean$parameters))self$mean$update_parameters(mean$parameters)
                             # }
                           }
                           if(is.null(offset)){
                             self$mean$offset <- rep(0,nrow(self$mean$data))
                           } else {
                             self$mean$offset <- offset
                           }
                           if(is.null(weights)){
                             self$weights <- rep(1,nrow(self$mean$data))
                           } else {
                             self$weights <- weights
                           }
                           if(self$family[[1]]=="binomial"){
                             if((is.null(trials) || all(trials == 1)) & is.null(self$trials)){
                               self$trials <- rep(1,nrow(self$mean$data))
                               self$family[[1]] <- "bernoulli"
                             } else {
                               if(is.null(self$trials))self$trials <- trials
                             }
                           }
                         }
                         private$session_id <- Sys.getpid()
                         
                         if(!is.null(model_fit)){
                           self$covariance$data = model_fit$model_data$data
                         } else {
                           self$covariance$data = private$process_data(form_1,data,TRUE,FALSE)$data
                         }
                         private$update_ptr()
                         
                         if(private$y_in_formula){
                           if(! private$y_name %in% colnames(data)) stop(paste0(private$y_name," not in data"))
                           self$update_y(data[,private$y_name])
                         }
                       },
                       #' @description
                       #' Print method for `Model` class
                       #' @details
                       #' Calls the respective print methods of the linked covariance and mean function objects.
                       #' @param ... ignored
                       print = function(){
                         cat("\U2BC8 GLMM Model")
                         cat("\n   \U2BA1 Family :",self$family[[1]])
                         cat("\n   \U2BA1 Link :",self$family[[2]])
                         cat("\n   \U2BA1 Linear predictor")
                         cat("\n   \U2223     \U2BA1 Formula: ~",self$mean$formula)
                         cat("\n   \U2223     \U2BA1 Parameters: ",self$mean$parameters)
                         cat("\n   \U2BA1 Covariance")
                         cat("\n   \U2223     \U2BA1 Terms: ",re_names(self$covariance$formula))
                         if(private$model_type() == 1)cat(" (NNGP)")
                         if(private$model_type() == 2)cat(" (HSGP)")
                         cat("\n   \U2223     \U2BA1 Parameters: ",self$covariance$parameters)
                         cat("\n   \U2223     \U2BA1 N random effects: ",ncol(self$covariance$Z))
                         cat("\n   \U2BA1 N:",self$n())
                         cat("\n")
                       },
                       #' @description
                       #' Returns the number of observations in the model
                       #' @details
                       #' The matrices X and Z both have n rows, where n is the number of observations in the model/design.
                       #' @param ... ignored
                       n = function(...){
                         self$mean$n()
                       },
                       #' @description
                       #' Subsets the design keeping specified observations only
                       #'
                       #' Given a vector of row indices, the corresponding rows will be kept and the
                       #' other rows will be removed from the mean function and covariance
                       #' @param index Integer or vector integers listing the rows to keep
                       #' @return The function updates the object and nothing is returned.
                       subset_rows = function(index){
                         self$mean$subset_rows(index)
                         self$covariance$subset(index)
                         private$update_ptr(TRUE)
                       },
                       #'@description
                       #'Generates a realisation of the design
                       #'
                       #'Generates a single vector of outcome data based upon the
                       #'specified GLMM design. 
                       #'@param type Either 'y' to return just the outcome data, 'data'
                       #' to return a data frame with the simulated outcome data alongside the model data,
                       #' or 'all', which will return a list with simulated outcomes y, matrices X and Z,
                       #' parameters beta, and the values of the simulated random effects.
                       #' @return Either a vector, a data frame, or a list
                       #' @examples
                       #' df <- nelder(~(cl(10)*t(5)) > ind(10))
                       #' df$int <- 0
                       #' df[df$cl > 5, 'int'] <- 1
                       #' \dontshow{
                       #' setParallel(FALSE) # for the CRAN check
                       #' }
                       #' des <- Model$new(
                       #'   formula = ~ factor(t) + int - 1 + (1|gr(cl)*ar0(t)),
                       #'   covariance = c(0.05,0.8),
                       #'   mean = c(rep(0,5),0.6),
                       #'   data = df,
                       #'   family = stats::binomial()
                       #' )
                       #' ysim <- des$sim_data()
                       sim_data = function(type = "y"){
                         re <- self$covariance$simulate_re()
                         xb <- self$mean$linear_predictor()
                         mu <- xb + drop(as.matrix(self$covariance$Z)%*%re)
                         f <- self$family
                         if(f[1]=="poisson"){
                           if(f[2]=="log"){
                             y <- rpois(self$n(),exp(mu))
                           }
                           if(f[2]=="identity"){
                             y <- rpois(self$n(),mu)
                           }
                         }
                         if(f[1]%in%c("binomial","bernoulli")){
                           if(f[2]=="logit"){
                             y <- rbinom(self$n(),self$trials,exp(mu)/(1+exp(mu)))
                           }
                           if(f[2]=="log"){
                             y <- rbinom(self$n(),self$trials,exp(mu))
                           }
                           if(f[2]=="identity"){
                             y <- rbinom(self$n(),self$trials,mu)
                           }
                           if(f[2]=="probit"){
                             y <- rbinom(self$n(),self$trials,pnorm(mu))
                           }
                         }
                         if(f[1]=="gaussian"){
                           if(f[2]=="identity"){
                             if(is.null(self$var_par))stop("For gaussian(link='identity') provide var_par")
                             y <- rnorm(self$n(),mu,self$var_par/self$weights)
                           }
                           if(f[2]=="log"){
                             if(is.null(self$var_par))stop("For gaussian(link='log') provide var_par")
                             #CHECK THIS IS RIGHT
                             y <- rnorm(self$n(),exp(mu),self$var_par/self$weights)
                           }
                         }
                         if(f[1]=="Gamma"){
                           if(f[2]=="inverse"){
                             if(is.null(self$var_par))stop("For gamma(link='inverse') provide var_par")
                             y <- rgamma(self$n(),shape = self$var_par,rate = self$var_par*mu)
                           }
                           if(f[2]=="log"){
                             if(is.null(self$var_par))stop("For gamma(link='log') provide var_par")
                             y <- rgamma(self$n(),shape = self$var_par,rate = self$var_par/exp(mu))
                           }
                           if(f[2]=="identity"){
                             if(is.null(self$var_par))stop("For gamma(link='identity') provide var_par")
                             y <- rgamma(self$n(),shape = self$var_par,rate = self$var_par/mu)
                           }
                         }
                         if(f[1]=="beta"){
                           if(f[2]=="logit"){
                             if(is.null(self$var_par))stop("For beta(link='logit') provide var_par")
                             logitxb <- exp(mu)/(1+exp(mu))
                             y <- rbeta(self$n(),logitxb*self$var_par,(1-logitxb)*self$var_par)
                           }
                         }
                         if(f[1]%in%c("quantile","quantile_scaled")){
                           message("Quantile based methods are currently EXPERIMENTAL")
                           message("Simulation from quantile family produces random draws from an asymmetric Laplace distribution")
                           if(f[2]=="logit"){
                             mu <- exp(mu)/(1+exp(mu))
                           } else if(f[2]=="log"){
                             mu <- exp(mu)
                           } else if(f[2] == "inverse"){
                             mu <- 1/mu
                           } else if(f[2] == "probit"){
                             mu <- pnorm(mu)
                           }
                           rand_u <- runif(length(mu))
                           y <- rep(NA,length(mu))
                           for(i in 1:length(y)){
                             if(rand_u[i] <= self$family$q){
                               y[i] <- (self$var_par/(1-self$family$q))*log(rand_u[i]/self$family$q) + mu[i]
                             } else {
                               y[i] <- (self$var_par/self$family$q)*log((1-rand_u[i])/(1-self$family$q)) + mu[i]
                             }
                           }
                         }
                         if(type=="data.frame"|type=="data")y <- cbind(y,self$mean$data)
                         if(type=="all")y <- list(y = y, X = self$mean$X, beta = self$mean$parameters,
                                                  Z = self$covariance$Z, u = re)
                         return(y)
                       },
                       #' @description
                       #' Updates the parameters of the mean function and/or the covariance function
                       #'
                       #' @details
                       #' Using `update_parameters()` is the preferred way of updating the parameters of the
                       #' mean or covariance objects as opposed to direct assignment, e.g. `self$covariance$parameters <- c(...)`.
                       #' The function calls check functions to automatically update linked matrices with the new parameters.
                       #'
                       #' @param mean.pars (Optional) Vector of new mean function parameters
                       #' @param cov.pars (Optional) Vector of new covariance function(s) parameters
                       #' @param var.par (Optional) A scalar value for var_par
                       #' @examples
                       #' \dontshow{
                       #' setParallel(FALSE) # for the CRAN check
                       #' }
                       #' df <- nelder(~(cl(10)*t(5)) > ind(10))
                       #' df$int <- 0
                       #' df[df$cl > 5, 'int'] <- 1
                       #' des <- Model$new(
                       #'   formula = ~ factor(t) + int - 1 + (1|gr(cl)*ar0(t)),
                       #'   data = df,
                       #'   family = stats::binomial()
                       #' )
                       #' des$update_parameters(cov.pars = c(0.1,0.9))
                       update_parameters = function(mean.pars = NULL,
                                                    cov.pars = NULL,
                                                    var.par = NULL){
                         if(!is.null(mean.pars)){
                           self$mean$update_parameters(mean.pars)
                           if(!is.null(private$ptr)){
                             Model__update_beta(private$ptr,mean.pars,private$model_type())
                           }
                         }
                         if(!is.null(cov.pars)){
                           self$covariance$update_parameters(cov.pars)
                           if(!is.null(private$ptr)){
                             Model__update_theta(private$ptr,cov.pars,private$model_type())
                           }
                         }
                         if(!is.null(var.par)){
                           self$var_par <- var.par
                           if(!is.null(private$ptr)){
                             Model__set_var_par(private$ptr,var.par,private$model_type())
                           }
                         }
                       },
                       #' @description
                       #' Generates the information matrix of the mixed model GLS estimator (X'S^-1X). The inverse of this matrix is an 
                       #' estimator for the variance-covariance matrix of the fixed effect parameters. For various small sample corrections
                       #' see `small_sample_correction()` and `box()`. For models with non-linear functions of fixed effect parameters,
                       #' a correction to the Hessian matrix is required, which is automatically calculated or optionally returned or disabled. 
                       #' @param include.re logical indicating whether to return the information matrix including the random effects components (TRUE), 
                       #' or the mixed model information matrix for beta only (FALSE).
                       #' @param theta Logical. If TRUE the function will return the variance-coviariance matrix for the covariance parameters and ignore the first argument. Otherwise, the fixed effect
                       #' parameter information matrix is returned.
                       #' @param oim Logical. If TRUE, returns the observed information matrix for both beta and theta, disregarding other arguments to the function.
                       #' @return A matrix
                       information_matrix = function(include.re = FALSE, theta = FALSE, oim = FALSE){
                         if(oim & !private$y_has_been_updated) stop("No y data has been added")
                         private$update_ptr()
                         nonlin <- Model__any_nonlinear(private$ptr,private$model_type())
                         # if(nonlin){
                         #   if(!private$y_has_been_updated) stop("No y data has been added")
                         #   # if(!hessian.corr %in% c("add","return","none"))stop("hessian.corr must be add, return, or none")
                         # }
                         if(oim){
                           M <- Model__observed_information_matrix(private$ptr,private$model_type())
                         } else {
                           if(theta){
                             M <- Model__infomat_theta(private$ptr,private$model_type())
                           } else {
                             if(include.re & !private$model_type()>0){
                               M <- Model__obs_information_matrix(private$ptr,private$model_type())
                             } else {
                               if(private$model_type()==0){
                                 M <- Model__information_matrix(private$ptr,private$model_type())
                               } else {
                                 M <- Model__information_matrix_crude(private$ptr,private$model_type())
                               }
                             }
                             # if((nonlin & hessian.corr%in%c("add","none")) | !nonlin){
                             #   
                             # }
                             # if(Model__any_nonlinear(private$ptr,private$model_type()) & hessian.corr %in% c("add","return")){
                             #   A <- Model__hessian_correction(private$ptr,private$model_type())
                             #   if(any(eigen(A)$values < 0) & adj.nonspd){
                             #     if(adj.nonspd)message("Hessian correction for non-linear parameters is not positive semi-definite and will be adjusted. To disable this feature set adj.nonspd = FALSE")
                             #     A <- near_semi_pd(A)
                             #   }
                             #   if(hessian.corr == "add"){
                             #     M <- M + A
                             #   } else {
                             #     M <- A
                             #   }
                             # }
                           }
                         }
                         
                         return(M)
                       },
                       #' @description 
                       #' Returns the robust sandwich variance-covariance matrix for the fixed effect parameters
                       #' @return A PxP matrix
                       sandwich = function(){
                         private$update_ptr()
                         return(Model__sandwich(private$ptr,private$model_type()))
                       },
                       #' @description 
                       #' Returns a small sample correction. The option "KR" returns the Kenward-Roger bias-corrected variance-covariance matrix 
                       #' for the fixed effect parameters and degrees of freedom. Option "KR2"  returns an improved correction given 
                       #' in Kenward & Roger (2009) <doi:j.csda.2008.12.013>. Note, that the corrected/improved version is invariant 
                       #' under reparameterisation of the covariance, and it will also make no difference if the covariance is linear 
                       #' in parameters. Exchangeable covariance structures in this package (i.e. `gr()`) are parameterised in terms of 
                       #' the variance rather than standard deviation, so the results will be unaffected. Option "sat" returns the "Satterthwaite"
                       #' correction, which only includes corrected degrees of freedom, along with the GLS standard errors.
                       #' @param type Either "KR", "KR2", or "sat", see description.
                       #' @param oim Logical. If TRUE use the observed information matrix, otherwise use the expected information matrix
                       #' @return A PxP matrix
                       small_sample_correction = function(type, oim = FALSE){
                         if(oim & !private$y_has_been_updated) stop("No y data has been added")
                         private$update_ptr()
                         if(!type %in% c("KR","KR2","sat"))stop("type must be either KR, KR2, or sat")
                         ss_type <- ifelse(type == "KR",1,ifelse(type == "KR2",4,5))
                         return(Model__small_sample_correction(private$ptr,ss_type,oim,private$model_type()))
                       },
                       #' @description 
                       #' Returns the inferential statistics (F-stat, p-value) for a modified Box correction <doi:10.1002/sim.4072> for
                       #' Gaussian-identity models.
                       #' @param y Optional. If provided, will update the vector of outcome data. Otherwise it will use the data from 
                       #' the previous model fit.
                       #' @return A data frame.
                       box = function(y){
                         if(!(self$family[[1]]=="gaussian"&self$family[[2]]=="identity"))stop("Box only available for linear models")
                         private$update_ptr()
                         if(!missing(y))private$set_y(y)
                         results <- Model__box(private$ptr,private$model_type())
                         results_out <- data.frame(parameter = Model__beta_parameter_names(private$ptr,private$model_type()),
                                                   "F value" = results$test_stat,
                                                   DoF = results$dof,
                                                   scale = results$scale,
                                                   "p value" = results$p_value)
                         return(results_out)
                       },
                       #' @description
                       #' Estimates the power of the design described by the model using the square root
                       #' of the relevant element of the GLS variance matrix:
                       #'
                       #'  \deqn{(X^T\Sigma^{-1}X)^{-1}}
                       #'
                       #' Note that this is equivalent to using the "design effect" for many
                       #' models.
                       #' @param alpha Numeric between zero and one indicating the type I error rate.
                       #' Default of 0.05.
                       #' @param two.sided Logical indicating whether to use a two sided test
                       #' @param alternative For a one-sided test whether the alternative hypothesis is that the
                       #' parameter is positive "pos" or negative "neg"
                       #' @return A data frame describing the parameters, their values, expected standard
                       #' errors and estimated power.
                       #' @examples
                       #' \dontshow{
                       #' setParallel(FALSE) # for the CRAN check
                       #' }
                       #' df <- nelder(~(cl(10)*t(5)) > ind(10))
                       #' df$int <- 0
                       #' df[df$cl > 5, 'int'] <- 1
                       #' des <- Model$new(
                       #'   formula = ~ factor(t) + int - 1 + (1|gr(cl)) + (1|gr(cl,t)),
                       #'   covariance = c(0.05,0.1),
                       #'   mean = c(rep(0,5),0.6),
                       #'   data = df,
                       #'   family = stats::gaussian(),
                       #'   var_par = 1
                       #' )
                       #' des$power() #power of 0.90 for the int parameter
                       power = function(alpha=0.05,two.sided=TRUE,alternative = "pos"){
                         M <- self$information_matrix()
                         v0 <- solve(M)
                         v0 <- as.vector(sqrt(diag(v0)))
                         if(two.sided){
                           pwr <- pnorm(abs(self$mean$parameters/v0) - qnorm(1-alpha/2))
                         } else {
                           if(alternative == "pos"){
                             pwr <- pnorm(self$mean$parameters/v0 - qnorm(1-alpha))
                           } else {
                             pwr <- pnorm(-self$mean$parameters/v0 - qnorm(1-alpha))
                           }
                         }
                         
                         res <- data.frame(Value = self$mean$parameters,
                                           SE = v0,
                                           Power = pwr)
                         return(res)
                       },
                       #' @description
                       #' Returns the diagonal of the matrix W used to calculate the covariance matrix approximation
                       #' @return A vector with values of the glm iterated weights
                       w_matrix = function(){
                         private$update_ptr()
                         private$genW()
                         return(private$W)
                       },
                       #' @description
                       #' Returns the derivative of the link function with respect to the linear preditor
                       #' @return A vector
                       dh_deta = function(){
                         Q = dlinkdeta(self$fitted(),self$family[[2]])
                         return(Q)
                       },
                       #' @description
                       #' Returns the (approximate) covariance matrix of y
                       #'
                       #' Returns the covariance matrix Sigma. For non-linear models this is an approximation. See Details.
                       #' @param inverse Logical indicating whether to provide the covariance matrix or its inverse
                       #' @return A matrix.
                       Sigma = function(inverse = FALSE){
                         private$update_ptr()
                         return(Model__Sigma(private$ptr,inverse,private$model_type()))
                       },
                       #'@description
                       #'Stochastic Maximum Likelihood model fitting
                       #'
                       #'@details
                       #'**Monte Carlo maximum likelihood**
                       #'Fits generalised linear mixed models using one of several algorithms: Markov Chain Newton
                       #'Raphson (MCNR), Markov Chain Expectation Maximisation (MCEM), or stochastic approximation expectation 
                       #'maximisation (SAEM) with or without Polyak-Ruppert averaging. MCNR and MCEM are described by McCulloch (1997)
                       #'<doi:10.1080/01621459.1997.10473613>. For each iteration
                       #'of the algorithms the unobserved random effect terms (\eqn{\gamma}) are simulated
                       #'using Markov Chain Monte Carlo (MCMC) methods,
                       #'and then these values are conditioned on in the subsequent steps to estimate the covariance
                       #'parameters and the mean function parameters (\eqn{\beta}). SAEM uses a Robbins-Munroe approach to approximating 
                       #'the likelihood and requires fewer MCMC samples and may have lower Monte Carlo error, see Jank (2006)<doi:10.1198/106186006X157469>. 
                       #'The option `alpha` determines the rate at which succesive iterations "forget" the past and must be between 0.5 and 1. Higher values
                       #'will result in lower Monte Carlo error but slower convergence. The options `mcem.adapt` and `mcnr.adapt` will modify the number of MCMC samples during each step of model fitting 
                       #'using the suggested values in Caffo, Jank, and Jones (2006)<doi:10.1111/j.1467-9868.2005.00499.x> 
                       #'as the estimates converge. 
                       #'
                       #'The accuracy of the algorithm depends on the user specified tolerance. For higher levels of
                       #'tolerance, larger numbers of MCMC samples are likely need to sufficiently reduce Monte Carlo error. However,
                       #'the SAEM approach does overcome reduce the required samples, especially with R-P averaging. As such a lower number (20-50)
                       #'samples per iteration is normally sufficient to get convergence. 
                       #'
                       #'There are several stopping rules for the algorithm. Either the algorithm will terminate when succesive parameter estimates are
                       #'all within a specified tolerance of each other (`conv.criterion = 1`), or when there is a high probability that the estimated 
                       #'log-likelihood has not been improved. This latter criterion can be applied to either the overall log-likelihood (`conv.criterion = 2`),
                       #'the likelihood just for the fixed effects (`conv.criterion = 3`), or both the likelihoods for the fixed effects and covariance parameters 
                       #'(`conv.criterion = 4`; default).
                       #'
                       #' Options for the MCMC sampler are set by changing the values in `self$mcmc_options`. The information printed to the console
                       #' during model fitting can be controlled with the `self$set_trace()` function.
                       #' 
                       #' To provide weights for the model fitting, store them in self$weights. To set the number of 
                       #' trials for binomial models, set self$trials.
                       #' 
                       #'@param y Optional. A numeric vector of outcome data. If this is not provided then either the outcome must have been specified when 
                       #' initialising the Model object, or the outcome data has been updated using member function `update_y()`
                       #'@param method The MCML algorithm to use, either `mcem` or `mcnr`, or `saem` see Details. Default is `saem`. `mcem.adapt` and `mcnr.adapt` will use adaptive 
                       #'MCMC sample sizes starting small and increasing to the the maximum value specified in `mcmc_options$sampling`, which results in faster convergence. `saem` uses a
                       #'stochastic approximation expectation maximisation algorithm. MCMC samples are kept from all iterations and so a smaller number of samples are needed per iteration. 
                       #'@param tol Numeric value, tolerance of the MCML algorithm, the maximum difference in parameter estimates
                       #'between iterations at which to stop the algorithm. If two values are provided then different tolerances will be 
                       #'applied to the fixed effect and covariance parameters.
                       #'@param max.iter Integer. The maximum number of iterations of the MCML algorithm.
                       #'@param se String. Type of standard error and/or inferential statistics to return. Options are "gls" for GLS standard errors (the default),
                       #' "robust" for robust standard errors, "kr" for original Kenward-Roger bias corrected standard errors, 
                       #' "kr2" for the improved Kenward-Roger correction, "sat" for Satterthwaite degrees of freedom correction (this is the same 
                       #' degrees of freedom correction as Kenward-Roger, but with GLS standard errors), "box" to use a modified Box correction (does not return confidence intervals),
                       #' "bw" to use GLS standard errors with a between-within correction to the degrees of freedom, "bwrobust" to use robust 
                       #' standard errors with between-within correction to the degrees of freedom.
                       #'@param oim Logical. If TRUE use the observed information matrix, otherwise use the expected information matrix for standard error and related calculations.
                       #'@param reml Logical. Whether to use a restricted maximum likelihood correction for fitting the covariance parameters
                       #'@param mcmc.pkg String. Either `cmdstan` for cmdstan (requires the package `cmdstanr`), `rstan` to use rstan sampler, or
                       #'`hmc` to use a cruder Hamiltonian Monte Carlo sampler. cmdstan is recommended as it has by far the best number 
                       #' of effective samples per unit time. cmdstanr will compile the MCMC programs to the library folder the first time they are run, 
                       #' so may not currently be an option for some users.
                       #'@param se.theta Logical. Whether to calculate the standard errors for the covariance parameters. This step is a slow part
                       #' of the calculation, so can be disabled if required in larger models. Has no effect for Kenward-Roger standard errors.
                       #'@param algo Integer. 1 = L-BFGS for beta and BOBYQA for theta, 2 = BOBYQA for both, 3 = L-BFGS for both (default). The L-BFGS algorithm 
                       #'may perform poorly with some covariance structures, in this case select 1 or 2, or apply an upper bound.
                       #'@param lower.bound Optional. Vector of lower bounds for the fixed effect parameters. To apply bounds use MCEM.
                       #'@param upper.bound Optional. Vector of upper bounds for the fixed effect parameters. To apply bounds use MCEM.
                       #'@param lower.bound.theta Optional. Vector of lower bounds for the covariance parameters (default is 0; negative values will cause an error)
                       #'@param upper.bound.theta Optional. Vector of upper bounds for the covariance parameters. 
                       #'@param alpha If using SAEM then this parameter controls the step size. On each iteration i the step size is (1/alpha)^i, default is 0.8. Values around 0.5 
                       #'will result in lower bias but slower convergence, values closer to 1 will result in higher convergence but potentially higher error.
                       #'@param convergence.prob Numeric value in (0,1) indicating the probability of convergence if using convergence criteria 2, 3, or 4.
                       #'@param pr.average Logical indicating whether to use Polyak-Ruppert averaging if using the SAEM algorithm (default is TRUE)
                       #'@param conv.criterion Integer. The convergence criterion for the algorithm. 1 = the maximum difference between parameter estimates between iterations as defined by `tol`,
                       #'2 = The probability of improvement in the overall log-likelihood is less than 1 - `convergence.prob`
                       #'3 = The probability of improvement in the log-likelihood for the fixed effects is less than 1 - `convergence.prob`
                       #'4 = The probabilities of improvement in the log-likelihood the fixed effects and covariance parameters are both less than 1 - `convergence.prob`
                       #'@param skip.theta Logical. If TRUE then the covariance parameter estimation step is skipped. This option is mainly used for testing, but may be useful
                       #'if covariance parameters are known.
                       #'@return A `mcml` object
                       #'@seealso \link[glmmrBase]{Model}, \link[glmmrBase]{Covariance}, \link[glmmrBase]{MeanFunction}
                       #'@examples
                       #'\dontrun{
                       #' # Simulated trial data example
                       #'data(SimTrial,package = "glmmrBase")
                       #' model <- Model$new(
                       #'   formula = y ~ int + factor(t) - 1 + (1|gr(cl)*ar1(t)),
                       #'   data = SimTrial,
                       #'   family = gaussian()
                       #' )
                       #' glm3 <- model$MCML()
                       #' 
                       #' # Salamanders data example
                       #' data(Salamanders,package="glmmrBase")
                       #' model <- Model$new(
                       #'   mating~fpop:mpop-1+(1|gr(mnum))+(1|gr(fnum)),
                       #'   data = Salamanders,
                       #'   family = binomial()
                       #' )
                       #' 
                       #' # we will try MCEM with 500 MCMC iterations
                       #' model$mcmc_options$samps <- 500
                       #' # view the grouping structure 
                       #' glm2 <- model$MCML(method = "mcem")
                       #'
                       #' # Example using simulated data
                       #' #create example data with six clusters, five time periods, and five people per cluster-period
                       #' df <- nelder(~(cl(6)*t(5)) > ind(5))
                       #' # parallel trial design intervention indicator
                       #' df$int <- 0
                       #' df[df$cl > 3, 'int'] <- 1 
                       #' # specify parameter values in the call for the data simulation below
                       #' des <- Model$new(
                       #'   formula= ~ factor(t) + int - 1 +(1|gr(cl)*ar0(t)),
                       #'   covariance = c(0.05,0.7),
                       #'   mean = c(rep(0,5),0.2),
                       #'   data = df,
                       #'   family = gaussian()
                       #' )
                       #' ysim <- des$sim_data() # simulate some data from the model
                       #' fit1 <- des$MCML(y = ysim) # Default model fitting with SAEM-PR
                       #' # use MCEM instead and stop when parameter values are within 1e-2 on successive iterations
                       #' fit2 <- des$MCML(y = ysim, method="mcem",tol=1e-2,conv.criterion = 1)
                       #' 
                       #' # Non-linear model fitting example using the example provided by nlmer in lme4
                       #' data(Orange, package = "lme4")
                       #' 
                       #' # the lme4 example:
                       #' startvec <- c(Asym = 200, xmid = 725, scal = 350)
                       #' (nm1 <- lme4::nlmer(circumference ~ SSlogis(age, Asym, xmid, scal) ~ Asym|Tree,
                       #'               Orange, start = startvec))
                       #' 
                       #' Orange <- as.data.frame(Orange)
                       #' Orange$Tree <- as.numeric(Orange$Tree)
                       #' 
                       #' # Here we can specify the model as a function. 
                       #' 
                       #' model <- Model$new(
                       #'   circumference ~ Asym/(1 + exp((xmid - (age))/scal)) - 1 + (Asym|gr(Tree)),
                       #'   data = Orange,
                       #'   family = gaussian(),
                       #'   mean = c(200,725,350),
                       #'   covariance = c(500),
                       #'   var_par = 50
                       #' )
                       #' 
                       #' # for this example, we will use MCEM with adaptive MCMC sample sizes
                       #' 
                       #' model$mcmc_options$samps <- 1000
                       #' nfit <- model$MCML(method = "mcem.adapt")
                       #' 
                       #' summary(nfit)
                       #' summary(nm1)
                       #' 
                       #' 
                       #'}
                       #'@md
                       MCML = function(y = NULL,
                                       method = "saem",
                                       tol = 1e-2,
                                       max.iter = 50,
                                       se = "gls",
                                       oim = FALSE,
                                       reml = TRUE,
                                       mcmc.pkg = "rstan",
                                       se.theta = TRUE,
                                       algo = 2,
                                       lower.bound = NULL,
                                       upper.bound = NULL,
                                       lower.bound.theta = NULL,
                                       upper.bound.theta = NULL,
                                       alpha = 0.8,
                                       convergence.prob = 0.95,
                                       pr.average = FALSE,
                                       conv.criterion = 2,
                                       skip.theta = FALSE){
                         # Checks on options and data
                         if(is.null(y)){
                           if(!private$y_has_been_updated)stop("y not specified and not updated in Model object")
                         } else {
                           private$verify_data(y)
                           private$set_y(y)
                         }
                         if(private$model_type() > 0 & reml == TRUE) stop("REML not available with HSGP/NNGP approximations, please set reml=FALSE")
                         Model__use_attenuation(private$ptr,private$attenuate_parameters,private$model_type())
                         if(!se %in% c("gls","kr","kr2","bw","sat","bwrobust","box"))stop("Option se not recognised")
                         if(self$family[[1]]%in%c("Gamma","beta") & se %in% c("kr","kr2","sat"))stop("KR standard errors are not currently available with gamma or beta families")
                         if(se != "gls" & private$model_type() != 0)stop("Only GLS standard errors supported for GP approximations.")
                         if(se == "box" & !(self$family[[1]]=="gaussian"&self$family[[2]]=="identity"))stop("Box only available for linear models")
                         if(!mcmc.pkg %in% c("cmdstan","rstan","hmc"))stop("mcmc.pkg must be one of cmdstan, rstan, or hmc")
                         if(!method %in% c("mcem", "mcnr", "saem", "mcem.adapt", "mcnr.adapt"))stop("method must be either mcem, mcnr, saem, mcem.adapt, mcnr.adapt")
                         if(self$family[[1]]%in%c("quantile","quantile_scaled") & method == "mcnr")stop("MCNR with quantile currently disabled, please use SAEM or MCEM with MCML")
                         append_u <- FALSE
                         if(mcmc.pkg == "hmc" & method == "saem")stop("saem and hmc options not currently compatible")
                         adaptive <- method %in% c("mcnr.adapt","mcem.adapt")
                         if(!conv.criterion %in% 1:4)stop("Convergence criterion must be 1, 2, or 3")
                         if(alpha < 0.5 | alpha >= 1)stop("alpha must be in [0.5, 1)")
                         if(convergence.prob <= 0 | convergence.prob >= 1)stop("Convergence probability must be in (0, 1)")
                         if(self$family[[1]]%in%c("quantile","quantile_scaled")){
                           Model__set_quantile(private$ptr,self$family$q,private$model_type())
                           message("Quantile regression is EXPERIMENTAL.")
                         }
                         if(!mcmc.pkg == "hmc"){
                           Model__mcmc_set_lambda(private$ptr,self$mcmc_options$lambda,private$model_type())
                           Model__mcmc_set_max_steps(private$ptr,self$mcmc_options$maxsteps,private$model_type())
                           Model__mcmc_set_refresh(private$ptr,self$mcmc_options$refresh,private$model_type())
                         }
                         if(!is.null(lower.bound)){
                           Model__set_bound(private$ptr,lower.bound,TRUE,TRUE,private$model_type())
                         }
                         if(!is.null(upper.bound)){
                           Model__set_bound(private$ptr,upper.bound,TRUE,FALSE,private$model_type())
                         }
                         if(!is.null(lower.bound.theta)){
                           if(any(lower.bound.theta < 0))stop("Theta lower bound cannot be negative")
                           Model__set_bound(private$ptr,lower.bound.theta,FALSE,TRUE,private$model_type())
                         }
                         if(!is.null(upper.bound.theta)){
                           Model__set_bound(private$ptr,upper.bound.theta,FALSE,FALSE,private$model_type())
                         }
                         if(length(tol) == 1){
                           algo_tol <- rep(tol,2)
                         } else {
                           algo_tol <- tol[1:2]
                         }
                         Model__use_reml(private$ptr,reml,private$model_type())
                         Model__reset_fn_counter(private$ptr,private$model_type())
                         # set up all the required vectors and data to monitor the algorithm
                         balgo <- ifelse(algo %in% c(1,3) ,2,0) # & !self$mean$any_nonlinear()
                         if(method == "saem") balgo <- 0
                         beta <- self$mean$parameters
                         theta <- self$covariance$parameters
                         var_par <- self$var_par
                         var_par_family <- I(self$family[[1]]%in%c("gaussian","Gamma","beta","quantile_scaled"))
                         ncovpar <- ifelse(var_par_family,length(theta)+1,length(theta))
                         all_pars <- c(beta,theta)
                         if(var_par_family)all_pars <- c(all_pars,var_par)
                         all_pars_new <- rep(1,length(all_pars))
                         var_par_new <- var_par
                         if(private$trace >= 1)message(paste0("using method: ",method))
                         if(private$trace >= 1)cat("\nStart: ",all_pars,"\n")
                         niter <- self$mcmc_options$samps
                         invfunc <- self$family$linkinv
                         L <- Matrix::Matrix(Model__L(private$ptr,private$model_type()))
                         #parse family
                         file_type <- mcnr_family(self$family,mcmc.pkg == "cmdstan")                         
                         ## set up sampler
                         if(mcmc.pkg == "cmdstan"){
                           if(!requireNamespace("cmdstanr")){
                             stop("cmdstanr is required to use Stan for sampling. See https://mc-stan.org/cmdstanr/ for details on how to install.
                                    Set option usestan=FALSE to use the in-built MCMC sampler.")
                           } else {
                             if(private$trace >= 2)message("If this is the first time running this model, it will be compiled by cmdstan.")
                             model_file <- system.file("cmdstan",
                                                       file_type$file,
                                                       package = "glmmrBase",
                                                       mustWork = TRUE)
                             mod <- suppressMessages(cmdstanr::cmdstan_model(model_file))
                           }
                         }
                         # SET UP MCMC DATA STRUCTURE
                         data <- list(
                           Q = Model__Q(private$ptr,private$model_type()),
                           Xb = Model__xb(private$ptr,private$model_type()),
                           Z = Model__ZL(private$ptr,private$model_type()),
                           type=as.numeric(file_type$type)
                         )
                         if(self$family[[1]]%in%c("gaussian","beta","Gamma","quantile","quantile_scaled"))data <- append(data,list(N_cont = self$n(),
                                                                                  N_int = 1,
                                                                                  N_binom = 1,
                                                                                  sigma = rep(self$var_par/self$weights, self$n()),
                                                                                  ycont = Model__y(private$ptr,private$model_type()),
                                                                                  yint = array(0,dim = 1),
                                                                                  q = 0,
                                                                                  n = array(0,dim = 1)))
                         if(self$family[[1]]%in%c("binomial","bernoulli","poisson"))data <- append(data,list(N_int = self$n(),
                                                                                                      N_cont = 1,
                                                                                                      N_binom = 1,
                                                                                                      sigma = array(0,dim = 1),
                                                                                                      yint = Model__y(private$ptr,private$model_type()),
                                                                                                      ycont = array(0,dim = 1),
                                                                                                      q = 0,
                                                                                                      n = array(0,dim = 1)))
                         if(self$family[[1]]=="binomial"){
                           data$N_binom = self$n()
                           data$n = self$trials
                         }
                         if(self$family[[1]]%in%c("beta","Gamma","quantile","quantile_scaled"))data$sigma = rep(self$var_par,self$n())
                         if(self$family[[1]]%in%c("quantile","quantile_scaled"))data$q = self$family$q
                         iter <- 0
                         n_mcmc_sampling <- ifelse(adaptive, 20, self$mcmc_options$samps)
                         beta_diff <- 1
                         theta_diff <- 1
                         converged <- FALSE
                         # run one iteration of fitting beta without re (i.e. glm) to get reasonable starting values
                         # otherwise the algorithm can struggle to converge
                         Model__update_u(private$ptr,matrix(0,nrow = Model__Q(private$ptr,private$model_type()),ncol=1),FALSE,private$model_type())
                         if(private$trace >= 1)cat("\nIter: 0\n")
                         Model__set_sml_parameters(private$ptr, FALSE, self$mcmc_options$samps, alpha, pr.average, private$model_type())
                         Model__ml_beta(private$ptr,0,private$model_type())
                         beta <- Model__get_beta(private$ptr,private$model_type())
                         var_par <- Model__get_var_par(private$ptr,private$model_type())
                         all_pars <- c(beta,theta)
                         if(var_par_family) all_pars <- c(all_pars,var_par)
                         if(private$trace >= 1){
                           cat("\nStarting Beta (GLM): ", round(beta,5))
                           cat("\n",Reduce(paste0,rep("-",40)),"\n")
                         }
                         Model__set_sml_parameters(private$ptr, method == "saem", self$mcmc_options$samps, alpha, pr.average, private$model_type())
                         # START THE MAIN ALGORITHM
                         while(!converged & iter < max.iter){
                           if(iter > 0){
                             all_pars <- all_pars_new
                             beta <- beta_new
                             theta <- theta_new
                           }
                           iter <- iter + 1
                           append_u <- I(method=="saem" & iter > 1)
                           if(private$trace >= 1)cat("\nIter: ",iter,"\n",Reduce(paste0,rep("-",40)),"\n")
                           if(private$trace == 2)t1 <- Sys.time()
                           if(mcmc.pkg == "cmdstan" | mcmc.pkg == "rstan"){
                             data$Xb <-  Model__xb(private$ptr,private$model_type())
                             data$Z <- Model__ZL(private$ptr,private$model_type())
                             if(self$family[[1]]=="gaussian")data$sigma = var_par_new/self$weights
                             if(self$family[[1]]%in%c("beta","Gamma"))data$var_par = var_par_new
                             if(private$trace <= 1){
                               if(mcmc.pkg == "cmdstan"){
                                 capture.output(fit <- mod$sample(data = data,
                                                                  chains = self$mcmc_options$chains,
                                                                  iter_warmup = self$mcmc_options$warmup,
                                                                  iter_sampling = n_mcmc_sampling,
                                                                  refresh = 0),
                                                file=tempfile())
                               } else {
                                 capture.output(suppressWarnings(fit <- rstan::sampling(stanmodels[[file_type$file]],
                                                                                        data=data,
                                                                                        chains=self$mcmc_options$chains,
                                                                                        iter = self$mcmc_options$warmup+n_mcmc_sampling,
                                                                                        warmup = self$mcmc_options$warmup,
                                                                                        refresh = 0)),
                                                file=tempfile())
                               }
                             } else {
                               # warnings have been suppressed below as it warns about R-hat etc, which is not reliable with a single chain.
                               if(mcmc.pkg == "cmdstan"){
                                 suppressWarnings(fit <- mod$sample(data = data,
                                                                    chains = self$mcmc_options$chains,
                                                                    iter_warmup = self$mcmc_options$warmup,
                                                                    iter_sampling = n_mcmc_sampling,
                                                                    refresh = 50))
                               } else {
                                 
                                 suppressWarnings(fit <- rstan::sampling(stanmodels[[file_type$file]],
                                                                         data=data,
                                                                         chains=self$mcmc_options$chains,
                                                                         iter = self$mcmc_options$warmup+n_mcmc_sampling,
                                                                         warmup = self$mcmc_options$warmup,
                                                                         refresh = 50))
                               }
                             }
                             if(mcmc.pkg == "cmdstan"){
                               dsamps <- fit$draws("gamma",format = "matrix")
                               class(dsamps) <- "matrix"
                             } else {
                               dsamps <- rstan::extract(fit,pars = "gamma",permuted = FALSE)
                               dsamps <- as.matrix(dsamps[,1,])
                             }
                             Model__update_u(private$ptr,t(dsamps),append_u,private$model_type())
                           } else {
                             Model__mcmc_sample(private$ptr, self$mcmc_options$warmup, n_mcmc_sampling, self$mcmc_options$adapt, private$model_type())
                           }
                           if(private$trace==2)t2 <- Sys.time()
                           if(private$trace==2)cat("\nMCMC sampling took: ",t2-t1,"s")
                           if(method=="mcem" | method=="saem"){
                             Model__ml_beta(private$ptr,balgo,private$model_type())
                           } else {
                             Model__nr_beta(private$ptr,private$model_type())
                           }
                           if(!skip.theta){
                             if(algo == 3){ #& !self$mean$any_nonlinear()
                               tryCatch(Model__ml_theta(private$ptr,2,private$model_type()),
                                        error = function(e) {
                                          if(private$trace >= 1)cat("\nL-BFGS failed for theta, switching to BOBYQA");
                                          Model__ml_theta(private$ptr,0,private$model_type());
                                        })
                             } else {
                               Model__ml_theta(private$ptr,0,private$model_type())
                             }
                           }
                           # set up the vectors needed 
                           beta_new <- Model__get_beta(private$ptr,private$model_type())
                           theta_new <- Model__get_theta(private$ptr,private$model_type())
                           var_par_new <- Model__get_var_par(private$ptr,private$model_type())
                           all_pars_new <- c(beta_new,theta_new)
                           llvals <- Model__get_log_likelihood_values(private$ptr,private$model_type())
                           beta_diff <- max(abs(beta-beta_new))
                           theta_diff <- max(abs(theta-theta_new))
                           fn_counter <- Model__get_fn_counter(private$ptr,private$model_type())
                           if(conv.criterion == 1){
                             converged <- !(beta_diff > algo_tol[1]) & !(theta_diff > algo_tol[2])
                           } 
                           if(iter > 1){
                             udiagnostic <- Model__u_diagnostic(private$ptr,private$model_type())
                             uval <- ifelse(conv.criterion == 2, Reduce(sum,udiagnostic), udiagnostic$first)
                             llvar <- Model__ll_diff_variance(private$ptr, TRUE, conv.criterion==2, private$model_type())
                             if(adaptive) n_mcmc_sampling <- max(n_mcmc_sampling, min(self$mcmc_options$samps, ceiling(llvar * (qnorm(convergence.prob) + qnorm(0.8))^2)/uval^2))
                             if(conv.criterion %in% c(2,3)){
                               conv.criterion.value <- uval + qnorm(convergence.prob)*sqrt(llvar/n_mcmc_sampling)
                               prob.converged <- pnorm(-uval/sqrt(llvar/n_mcmc_sampling))
                               converged <- conv.criterion.value < 0
                             } 
                             if(conv.criterion == 4){
                               llvart <- Model__ll_diff_variance(private$ptr, FALSE, TRUE, private$model_type())
                               conv.criterion.value <- udiagnostic$first + qnorm(convergence.prob)*sqrt(llvar/n_mcmc_sampling)
                               prob.converged <- pnorm(-udiagnostic$first/sqrt(llvar/n_mcmc_sampling))
                               conv.criterion.valuet <- udiagnostic$second + qnorm(convergence.prob)*sqrt(llvart/n_mcmc_sampling)
                               prob.convergedt <- pnorm(-udiagnostic$second/sqrt(llvart/n_mcmc_sampling))
                               converged <- conv.criterion.value < 0 & conv.criterion.valuet < 0
                             }
                           }
                           if(var_par_family)all_pars_new <- c(all_pars_new,var_par_new)
                           if(private$trace==2)t3 <- Sys.time()
                           if(private$trace==2)cat("\nModel fitting took: ",t3-t2,"s")
                           if(private$trace >= 1){
                             cat("\nBeta: ", round(beta_new,5))
                             cat("\nMax beta difference: ", round(beta_diff,5))
                             cat("\nTheta: ", round(theta_new,5))
                             cat("\nMax theta difference: ", round(theta_diff,5))
                             if(var_par_family)cat("\nSigma: ",round(var_par_new,5))
                             cat("\nMax. difference : ", round(max(abs(all_pars-all_pars_new)),5))
                             cat("\nLog-likelihoods: beta ", round(llvals$first,5)," theta ",round(llvals$second,5))
                             cat("\nFn evaluations: beta ",fn_counter$first," theta ",fn_counter$second)
                             if(iter>1){
                               if(adaptive)cat("\nMCMC sample size (adaptive): ",n_mcmc_sampling)
                               cat("\nLog-lik diff values: ", round(udiagnostic$first,5),", ", round(udiagnostic$second,5)," overall: ", round(Reduce(sum,udiagnostic), 5))
                               cat("\nLog-lik variance: ", round(llvar,5))
                               if(conv.criterion >= 2)cat(" convergence criterion:", ifelse(conv.criterion == 4, " (beta) "," "), round(conv.criterion.value,5)," Prob.: ",round(prob.converged,3))
                               if(conv.criterion == 4)cat(" (theta) ", round(conv.criterion.valuet,5)," Prob.: ",round(prob.convergedt,3))
                             }
                             cat("\n",Reduce(paste0,rep("-",40)),"\n")
                           }
                         }
                         if(!converged)message(paste0("Algorithm not converged. Max. difference between iterations :",round(max(abs(all_pars-all_pars_new)),4)))
                         self$update_parameters(mean.pars = beta_new,
                                                cov.pars = theta_new)
                         if(private$trace >= 1)cat("\n\nCalculating standard errors...\n")
                         self$var_par <- var_par_new
                         u <- Model__u(private$ptr,TRUE,private$model_type())
                         if(private$model_type()==0){
                           if(se == "gls" || se == "bw" || se == "box"){
                             M <- self$information_matrix() #Matrix::solve(Model__obs_information_matrix(private$ptr,private$model_type()))[1:length(beta),1:length(beta)]
                             M <- solve(M)
                             if(se.theta){
                               SE_theta <- tryCatch(sqrt(diag(solve(Model__infomat_theta(private$ptr,private$model_type())))), error = rep(NA, ncovpar))
                             } else {
                               SE_theta <- rep(NA, ncovpar)
                             }
                           } else if(se == "robust" || se == "bwrobust"){
                             M <- Model__sandwich(private$ptr,private$model_type())
                             if(se.theta){
                               SE_theta <- tryCatch(sqrt(diag(Model__infomat_theta(private$ptr,private$model_type()))), error = rep(NA, ncovpar))
                             } else {
                               SE_theta <- rep(NA, ncovpar)
                             }
                           } else if(se == "kr" || se == "kr2" || se == "sat"){
                             ss_type <- ifelse(se=="kr",1,ifelse(se=="kr2",4,5))
                             Mout <- Model__small_sample_correction(private$ptr,ss_type,oim,private$model_type())
                             M <- Mout[[1]]
                             SE_theta <- sqrt(diag(Mout[[2]]))
                           } 
                         } else {
                           # crudely calculate the information matrix for GP approximations - this will be integrated into the main
                           # library in future versions, but can cause error/crash with the above methods
                           M <- self$information_matrix()#Model__information_matrix_crude(private$ptr,private$model_type())
                           nB <- nrow(M)
                           M <- tryCatch(solve(M), error = matrix(NA,nrow = nB,ncol=nB))
                           SE_theta <- rep(NA, ncovpar)
                         }
                         SE <- sqrt(diag(M))
                         repar_table <- self$covariance$parameter_table()
                         beta_names <- Model__beta_parameter_names(private$ptr,private$model_type())
                         theta_names <- repar_table$term
                         if(self$family[[1]]%in%c("Gamma","beta","quantile_scaled")){
                           mf_pars_names <- c(beta_names,theta_names,"sigma")
                           SE <- c(SE,rep(NA,length(theta_new)+1))
                         } else {
                           mf_pars_names <- c(beta_names,theta_names)
                           if(self$family[[1]]%in%c("gaussian")) mf_pars_names <- c(mf_pars_names,"sigma")
                           SE <- c(SE,SE_theta)
                         }
                         res <- data.frame(par = c(mf_pars_names,paste0("d",1:nrow(u))),
                                           est = c(all_pars_new,rowMeans(u)),
                                           SE=c(SE,rep(NA,nrow(u))),
                                           t = NA,
                                           p = NA,
                                           lower = NA,
                                           upper = NA)
                         dof <- rep(self$n(),length(beta))
                         if(se == "kr" || se == "kr2" || se == "sat"){
                           for(i in 1:length(beta)){
                             if(!is.na(res$SE[i])){
                               res$t[i] <- (res$est[i]/res$SE[i])#*sqrt(lambda)
                               res$p[i] <- 2*(1-stats::pt(abs(res$t[i]),Mout$dof[i],lower.tail=TRUE))
                               res$lower[i] <- res$est[i] - stats::qt(0.975,Mout$dof[i],lower.tail=TRUE)*res$SE[i]
                               res$upper[i] <- res$est[i] + stats::qt(0.975,Mout$dof[i],lower.tail=TRUE)*res$SE[i]
                             }
                             dof[i] <- Mout$dof[i]
                           }
                         } else if(se=="bw" || se == "bwrobust" ){
                           res$t <- res$est/res$SE
                           bwdof <- sum(repar_table$count) - length(beta)
                           res$p <- 2*(1-stats::pt(abs(res$t),bwdof,lower.tail=TRUE))
                           res$lower <- res$est - qt(1-0.05/2,bwdof,lower.tail=TRUE)*res$SE
                           res$upper <- res$est + qt(1-0.05/2,bwdof,lower.tail=TRUE)*res$SE
                           dof <- rep(bwdof,length(beta))
                         } else if(se == "box"){
                           box_result <- Model__box(private$ptr,private$model_type())
                           res$t <- box_result$test_stat
                           res$p <- box_result$p_value
                           dof <- data.frame(dof = box_result$dof, scale = box_result$scale)
                         } else {
                           res$t <- res$est/res$SE
                           res$p <- 2*(1-stats::pnorm(abs(res$t)))
                           res$lower <- res$est - qnorm(1-0.05/2)*res$SE
                           res$upper <- res$est + qnorm(1-0.05/2)*res$SE
                         }
                         repar_table <- repar_table[!duplicated(repar_table$id),]
                         if(private$model_type()!=2){
                           rownames(u) <- rep(repar_table$term,repar_table$count)
                         } else {
                           if(nrow(repar_table)==1) rownames(u) <-  rep(repar_table$term,nrow(u))
                         }
                         aic <- ifelse(private$model_type()==0 , Model__aic(private$ptr,private$model_type()),NA)
                         xb <- Model__xb(private$ptr,private$model_type())
                         zd <- self$covariance$Z %*% rowMeans(u)
                         wdiag <- Matrix::diag(self$w_matrix())
                         total_var <- var(Matrix::drop(xb)) + var(Matrix::drop(zd)) + mean(wdiag)
                         condR2 <- (var(Matrix::drop(xb)) + var(Matrix::drop(zd)))/total_var
                         margR2 <- var(Matrix::drop(xb))/total_var
                         out <- list(coefficients = res,
                                     converged = converged,
                                     method = method,
                                     m = dim(u)[2],
                                     tol = tol,
                                     sim_lik = FALSE, #sim.lik.step,
                                     aic = aic,
                                     se=se,
                                     vcov = M,
                                     Rsq = c(cond = condR2,marg=margR2),
                                     logl = llvals$first,
                                     logl_theta = llvals$second,
                                     mean_form = self$mean$formula,
                                     cov_form = self$covariance$formula,
                                     family = self$family[[1]],
                                     link = self$family[[2]],
                                     re.samps = u,
                                     iter = iter,
                                     dof = dof,
                                     reml = reml,
                                     P = length(self$mean$parameters),
                                     Q = length(self$covariance$parameters),
                                     var_par_family = var_par_family,
                                     model_data = list(
                                       y = Model__y(private$ptr,private$model_type()),
                                       data = private$model_data_frame(),
                                       trials = self$trials,
                                       offset = self$mean$offset,
                                       weights = self$weights
                                     ),
                                     fn_count = fn_counter)
                         class(out) <- "mcml"
                         return(out)
                       },
                       #'@description
                       #'Maximum Likelihood model fitting with Laplace Approximation
                       #'
                       #'@details
                       #'**Laplace approximation**
                       #'Fits generalised linear mixed models using Laplace approximation to the log likelihood. For non-Gaussian models
                       #'the covariance matrix is approximated using the first order approximation based on the marginal
                       #'quasilikelihood proposed by Breslow and Clayton (1993). The marginal mean in this approximation
                       #'can be further adjusted following the proposal of Zeger et al (1988), use the member function `use_attenuated()` in this
                       #'class, see \link[glmmrBase]{Model}. To provide weights for the model fitting, store them in self$weights. To 
                       #'set the number of trials for binomial models, set self$trials. To control the information printed to the console 
                       #' during model fitting use the `self$set_trace()` function.
                       #'
                       #'@param y Optional. A numeric vector of outcome data. If this is not provided then either the outcome must have been specified when 
                       #' initialising the Model object, or the outcome data has been updated using member function `update_y()`
                       #'@param start Optional. A numeric vector indicating starting values for the model parameters.
                       #'@param method String. Either "nloptim" for non-linear optimisation, or "nr" for Newton-Raphson (default) algorithm
                       #'@param se String. Type of standard error and/or inferential statistics to return. Options are "gls" for GLS standard errors (the default),
                       #' "robust" for robust standard errors, "kr" for original Kenward-Roger bias corrected standard errors, 
                       #' "kr2" for the improved Kenward-Roger correction, "sat" for Satterthwaite degrees of freedom correction (this is the same 
                       #' degrees of freedom correction as Kenward-Roger, but with GLS standard errors)"box" to use a modified Box correction (does not return confidence intervals),
                       #' "bw" to use GLS standard errors with a between-within correction to the degrees of freedom, "bwrobust" to use robust 
                       #' standard errors with between-within correction to the degrees of freedom. 
                       #' Note that Kenward-Roger assumes REML estimates, which are not currently provided by this function.
                       #'@param oim Logical. If TRUE use the observed information matrix, otherwise use the expected information matrix for standard error and related calculations.
                       #'@param reml Logical. Whether to use a restricted maximum likelihood correction for fitting the covariance parameters
                       #'@param max.iter Maximum number of algorithm iterations, default 20.
                       #'@param tol Maximum difference between successive iterations at which to terminate the algorithm
                       #'@param se.theta Logical. Whether to calculate the standard errors for the covariance parameters. This step is a slow part
                       #' of the calculation, so can be disabled if required in larger models. Has no effect for Kenward-Roger standard errors.
                       #'@param algo Integer. 1 = L-BFGS for beta-u and BOBYQA for theta (default), 2 = BOBYQA for both.
                       #'@param lower.bound Optional. Vector of lower bounds for the fixed effect parameters. To apply bounds use nloptim.
                       #'@param upper.bound Optional. Vector of upper bounds for the fixed effect parameters. To apply bounds use nloptim.
                       #'@param lower.bound.theta Optional. Vector of lower bounds for the covariance parameters. 
                       #'@param upper.bound.theta Optional. Vector of upper bounds for the covariance parameters. 
                       #'@return A `mcml` object
                       #' @seealso \link[glmmrBase]{Model}, \link[glmmrBase]{Covariance}, \link[glmmrBase]{MeanFunction}
                       #'@examples
                       #' \dontshow{
                       #' setParallel(FALSE) # for the CRAN check
                       #' }
                       #' #create example data with six clusters, five time periods, and five people per cluster-period
                       #' df <- nelder(~(cl(6)*t(5)) > ind(5))
                       #' # parallel trial design intervention indicator
                       #' df$int <- 0
                       #' df[df$cl > 3, 'int'] <- 1 
                       #' # specify parameter values in the call for the data simulation below
                       #' des <- Model$new(
                       #'   formula = ~ factor(t) + int - 1 + (1|gr(cl)*ar0(t)),
                       #'   covariance = c(0.05,0.7),
                       #'   mean = c(rep(0,5),-0.2),
                       #'   data = df,
                       #'   family = stats::binomial()
                       #' )
                       #' ysim <- des$sim_data() # simulate some data from the model
                       #' fit1 <- des$LA(y = ysim)
                       #'@md
                       LA = function(y = NULL,
                                     start,
                                     method = "nr",
                                     se = "gls",
                                     oim = FALSE,
                                     reml = TRUE,
                                     max.iter = 40,
                                     tol = 1e-4,
                                     se.theta = TRUE,
                                     algo = 2,
                                     lower.bound = NULL,
                                     upper.bound = NULL,
                                     lower.bound.theta = NULL,
                                     upper.bound.theta = NULL){
                         
                         if(is.null(y)){
                           if(!private$y_has_been_updated)stop("y not specified and not updated in Model object")
                         } else {
                           private$verify_data(y)
                           private$set_y(y)
                         }
                         if(private$model_type() > 0 & reml == TRUE) stop("REML not available with HSGP/NNGP approximations, please set reml=FALSE")
                         Model__use_attenuation(private$ptr,private$attenuate_parameters,private$model_type())
                         if(!se %in% c("gls","kr","kr2","bw","sat","bwrobust","box"))stop("Option se not recognised")
                         if(self$family[[1]]%in%c("Gamma","beta") & (se == "kr"||se == "kr2"||se == "sat"))stop("KR standard errors are not currently available with gamma or beta families")
                         if(!method%in%c("nloptim","nr"))stop("method should be either nr or nloptim")
                         if(self$family[[1]]%in%c("quantile","quantile_scaled") & method == "nr")stop("nr with quantile currently disabled, please use nloptim with LA")
                         if(se == "box" & !(self$family[[1]]=="gaussian"&self$family[[2]]=="identity"))stop("Box only available for linear models")
                         if(!is.null(lower.bound)){
                           Model__set_bound(private$ptr,lower.bound,TRUE,TRUE,private$model_type())
                         }
                         if(!is.null(upper.bound)){
                           Model__set_bound(private$ptr,upper.bound,TRUE,FALSE,private$model_type())
                         }
                         if(!is.null(lower.bound.theta)){
                           if(any(lower.bound.theta < 0))stop("Theta lower bound cannot be negative")
                           Model__set_bound(private$ptr,lower.bound.theta,FALSE,TRUE,private$model_type())
                         }
                         if(!is.null(upper.bound.theta)){
                           Model__set_bound(private$ptr,upper.bound.theta,FALSE,FALSE,private$model_type())
                         }
                         Model__use_reml(private$ptr,reml,private$model_type())
                         var_par_family <- I(self$family[[1]]%in%c("gaussian","Gamma","beta","quantile_scaled"))
                         beta <- self$mean$parameters
                         theta <- self$covariance$parameters
                         ncovpar <- ifelse(var_par_family,length(theta)+1,length(theta))
                         var_par <- self$var_par
                         all_pars <- c(beta,theta)
                         if(var_par_family)all_pars <- c(all_pars,var_par)
                         all_pars_new <- rep(1,length(all_pars))
                         iter <- 0
                         while(any(abs(all_pars-all_pars_new)>tol)&iter < max.iter){
                           all_pars <- all_pars_new
                           iter <- iter + 1
                           if(private$trace >= 1)cat("\nIter: ",iter,"\n",Reduce(paste0,rep("-",40)))
                           if(method=="nr"){
                             Model__laplace_beta_u(private$ptr,private$model_type())
                           } else {
                             if(algo %in% c(1,3)){
                               tryCatch(Model__laplace_ml_beta_u(private$ptr,2,private$model_type()),
                                        error = function(e) {
                                          if(private$trace >= 1)cat("\nL-BFGS failed, switching to BOBYQA");
                                          Model__laplace_ml_beta_u(private$ptr,0,private$model_type());
                                        })
                             } else {
                               Model__laplace_ml_beta_u(private$ptr,0,private$model_type())
                             }
                           }
                           if(algo == 3){
                             tryCatch(Model__laplace_ml_theta(private$ptr,2,private$model_type()),
                                      error = function(e) {
                                        if(private$trace >= 1)cat("\nL-BFGS failed, switching to BOBYQA");
                                        Model__laplace_ml_theta(private$ptr,0,private$model_type());
                                      })
                           } else {
                             Model__laplace_ml_theta(private$ptr,0,private$model_type())
                           }
                           beta_new <- Model__get_beta(private$ptr,private$model_type())
                           theta_new <- Model__get_theta(private$ptr,private$model_type())
                           var_par_new <- Model__get_var_par(private$ptr,private$model_type())
                           all_pars_new <- c(beta_new,theta_new)
                           if(var_par_family)all_pars_new <- c(all_pars_new,var_par)
                           if(private$trace >= 1){
                             cat("\nBeta: ", beta_new)
                             cat("\nTheta: ", theta_new)
                             if(var_par_family)cat("\nSigma: ",var_par_new)
                             cat("\nMax. diff: ", round(max(abs(all_pars-all_pars_new)),5))
                             cat("\n",Reduce(paste0,rep("-",40)))
                           }
                         }
                         not_conv <- iter > max.iter|any(abs(all_pars-all_pars_new)>tol)
                         if(not_conv)message(paste0("algorithm not converged. Max. difference between iterations :",round(max(abs(all_pars-all_pars_new)),4)))
                         #Model__laplace_ml_beta_theta(private$ptr,0,private$model_type())
                         beta_new <- Model__get_beta(private$ptr,private$model_type())
                         theta_new <- Model__get_theta(private$ptr,private$model_type())
                         var_par_new <- Model__get_var_par(private$ptr,private$model_type())
                         all_pars_new <- c(beta_new,theta_new)
                         if(var_par_family)all_pars_new <- c(all_pars_new,var_par_new)
                         self$update_parameters(mean.pars = beta_new,
                                                cov.pars = theta_new)
                         self$var_par <- var_par_new
                         u <- Model__u(private$ptr,TRUE,private$model_type())
                         if(private$trace >= 1)cat("\n\nCalculating standard errors...\n")
                         if(se == "gls" || se =="bw" || se == "box"){
                           M <- self$information_matrix()#Matrix::solve(Model__obs_information_matrix(private$ptr,private$model_type()))[1:length(beta),1:length(beta)]
                           M <- solve(M)
                           if(se.theta){
                             SE_theta <- tryCatch(sqrt(diag(solve(Model__infomat_theta(private$ptr,private$model_type())))), error = rep(NA, ncovpar))
                           } else {
                             SE_theta <- rep(NA, ncovpar)
                           }
                         } else if(se == "robust" || se == "bwrobust" ){
                           M <- Model__sandwich(private$ptr,private$model_type())
                           if(se.theta){
                             SE_theta <- tryCatch(sqrt(diag(Model__infomat_theta(private$ptr,private$model_type()))), error = rep(NA, ncovpar))
                           } else {
                             SE_theta <- rep(NA, ncovpar)
                           }
                         } else if(se == "kr" || se == "kr2" || se == "sat"){
                           krtype <- ifelse(se=="kr",1,ifelse(se=="kr2",4,5))
                           Mout <- Model__small_sample_correction(private$ptr,krtype,oim,private$model_type())
                           M <- Mout[[1]]
                           SE_theta <- sqrt(diag(Mout[[2]]))
                         }
                         SE <- sqrt(Matrix::diag(M))
                         repar_table <- self$covariance$parameter_table()
                         beta_names <- Model__beta_parameter_names(private$ptr,private$model_type())
                         theta_names <- repar_table$term
                         if(self$family[[1]]%in%c("Gamma","beta","quantile_scaled")){
                           mf_pars_names <- c(beta_names,theta_names,"sigma")
                           SE <- c(SE,rep(NA,length(theta_new)+1))
                         } else {
                           mf_pars_names <- c(beta_names,theta_names)
                           if(self$family[[1]]=="gaussian") mf_pars_names <- c(mf_pars_names,"sigma")
                           SE <- c(SE,SE_theta)
                         }
                         res <- data.frame(par = c(mf_pars_names,paste0("d",1:nrow(u))),
                                           est = c(all_pars_new,rowMeans(u)),
                                           SE=c(SE,rep(NA,nrow(u))),
                                           t = NA,
                                           p = NA,
                                           lower = NA,
                                           upper = NA)
                         
                         dof <- rep(self$n(),length(beta))
                         if(se == "kr" || se == "kr2" || se == "sat"){
                           for(i in 1:length(beta)){
                             if(!is.na(res$SE[i])){
                               res$t[i] <- (res$est[i]/res$SE[i])#*sqrt(lambda)
                               res$p[i] <- 2*(1-stats::pt(abs(res$t[i]),Mout$dof[i],lower.tail=TRUE))
                               res$lower[i] <- res$est[i] - stats::qt(0.975,Mout$dof[i],lower.tail=TRUE)*res$SE[i]
                               res$upper[i] <- res$est[i] + stats::qt(0.975,Mout$dof[i],lower.tail=TRUE)*res$SE[i]
                             }
                             dof[i] <- Mout$dof[i]
                           }
                         } else if(se=="bw" || se == "bwrobust" ){
                           res$t <- res$est/res$SE
                           bwdof <- sum(repar_table$count) - length(beta)
                           res$p <- 2*(1-stats::pt(abs(res$t),bwdof,lower.tail=TRUE))
                           res$lower <- res$est - qt(1-0.05/2,bwdof,lower.tail=TRUE)*res$SE
                           res$upper <- res$est + qt(1-0.05/2,bwdof,lower.tail=TRUE)*res$SE
                           dof <- rep(bwdof,length(beta))
                         } else if (se == "box") {
                           box_result <- Model__box(private$ptr,private$model_type())
                           res$t <- box_result$test_stat
                           res$p <- box_result$p_value
                           dof <- data.frame(dof = box_result$dof, scale = box_result$scale)
                         } else {
                           res$t <- res$est/res$SE
                           res$p <- 2*(1-stats::pnorm(abs(res$t)))
                           res$lower <- res$est - qnorm(1-0.05/2)*res$SE
                           res$upper <- res$est + qnorm(1-0.05/2)*res$SE
                         }
                         repar_table <- repar_table[!duplicated(repar_table$id),]
                         rownames(u) <- rep(repar_table$term,repar_table$count)
                         aic <- ifelse(private$model_type()==0 , Model__aic(private$ptr,private$model_type()),NA)
                         xb <- Model__xb(private$ptr,private$model_type())
                         zd <- self$covariance$Z %*% u
                         wdiag <- Matrix::diag(self$w_matrix())
                         total_var <- var(Matrix::drop(xb)) + var(Matrix::drop(zd)) + mean(wdiag)
                         condR2 <- (var(Matrix::drop(xb)) + var(Matrix::drop(zd)))/total_var
                         margR2 <- var(Matrix::drop(xb))/total_var
                         out <- list(coefficients = res,
                                     converged = !not_conv,
                                     method = method,
                                     m = dim(u)[2],
                                     tol = tol,
                                     sim_lik = FALSE,
                                     aic = aic,
                                     se =se ,
                                     vcov = M,
                                     Rsq = c(cond = condR2,marg=margR2),
                                     mean_form = self$mean$formula,
                                     cov_form = self$covariance$formula,
                                     logl = Model__log_likelihood(private$ptr,private$model_type()),
                                     family = self$family[[1]],
                                     link = self$family[[2]],
                                     re.samps = u,
                                     iter = iter,
                                     dof = dof,
                                     reml = reml,
                                     P = length(self$mean$parameters),
                                     Q = length(self$covariance$parameters),
                                     var_par_family = var_par_family,
                                     model_data = list(
                                       y = Model__y(private$ptr,private$model_type()),
                                       data = private$model_data_frame(),
                                       trials = self$trials,
                                       offset = self$mean$offset,
                                       weights = self$weights
                                     ))
                         class(out) <- "mcml"
                         return(out)
                       },
                       #' @description 
                       #' Set whether to use sparse matrix methods for model calculations and fitting.
                       #' By default the model does not use sparse matrix methods.
                       #' @param sparse Logical indicating whether to use sparse matrix methods
                       #' @param amd Logical indicating whether to use and Approximate Minimum Degree algorithm to calculate an efficient permutation matrix so 
                       #' that the Cholesky decomposition of PAP^T is calculated rather than A.
                       #' @return None, called for effects
                       sparse = function(sparse = TRUE, amd = TRUE){
                         if(!is.null(private$ptr)){
                           if(private$model_type() == 1){
                             message("Sparse has no effect with NNGP models")
                           } else {
                             if(sparse){
                               Model__make_sparse(private$ptr,amd,private$model_type())
                             } else {
                               Model__make_dense(private$ptr,private$model_type())
                             }
                             self$covariance$sparse(sparse,amd)
                           }
                           private$useSparse = sparse
                         } 
                       },
                       #' @description 
                       #' Generate an MCMC sample of the random effects
                       #' @param mcmc.pkg String. Either `cmdstan` for cmdstan (requires the package `cmdstanr`), `rstan` to use rstan sampler, or
                       #'`hmc` to use a cruder Hamiltonian Monte Carlo sampler. cmdstan is recommended as it has by far the best number 
                       #' of effective samples per unit time. cmdstanr will compile the MCMC programs to the library folder the first time they are run, 
                       #' so may not currently be an option for some users.
                       #' @return A matrix of samples of the random effects
                       mcmc_sample = function(mcmc.pkg = "rstan"){
                         if(!mcmc.pkg %in% c("cmdstan","rstan","hmc"))stop("mcmc.pkg must be one of cmdstan, rstan, or hmc")
                         if(!private$y_has_been_updated) stop("No y data has been added")
                         if(mcmc.pkg == "cmdstan" | mcmc.pkg == "rstan"){
                           file_type <- mcnr_family(self$family,mcmc.pkg == "cmdstan")
                           if(mcmc.pkg == "cmdstan"){
                             if(!requireNamespace("cmdstanr")){
                               stop("cmdstanr is required to use cmdstan for sampling. See https://mc-stan.org/cmdstanr/ for details on how to install.")
                             } else {
                               if(private$trace>=1)message("If this is the first time running this model, it will be compiled by cmdstan.")
                               model_file <- system.file("cmdstan",
                                                         file_type$file,
                                                         package = "glmmrBase",
                                                         mustWork = TRUE)
                               mod <- suppressMessages(cmdstanr::cmdstan_model(model_file))
                             }
                           }
                           data <- list(
                             Q = Model__Q(private$ptr,private$model_type()),
                             Xb = Model__xb(private$ptr,private$model_type()),
                             Z = Model__ZL(private$ptr,private$model_type()),
                             type=as.numeric(file_type$type)
                           )
                           if(self$family[[1]]%in%c("gaussian","beta","Gamma","quantile","quantile_scaled"))data <- append(data,list(N_cont = self$n(),
                                                                                                                                     N_int = 1,
                                                                                                                                     N_binom = 1,
                                                                                                                                     sigma = rep(self$var_par/self$weights, self$n()),
                                                                                                                                     ycont = Model__y(private$ptr,private$model_type()),
                                                                                                                                     yint = array(0,dim = 1),
                                                                                                                                     q = 0,
                                                                                                                                     n = array(0,dim = 1)))
                           if(self$family[[1]]%in%c("binomial","bernoulli","poisson"))data <- append(data,list(N_int = self$n(),
                                                                                                               N_cont = 1,
                                                                                                               N_binom = 1,
                                                                                                               sigma = array(0,dim = 1),
                                                                                                               yint = Model__y(private$ptr,private$model_type()),
                                                                                                               ycont = array(0,dim = 1),
                                                                                                               q = 0,
                                                                                                               n = array(0,dim = 1)))
                           if(self$family[[1]]=="binomial")data <- append(data,list(N_binom  = self$n(), n = self$trials))
                           if(self$family[[1]]%in%c("beta","Gamma","quantile","quantile_scaled"))data$sigma = rep(self$var_par,self$n())
                           if(self$family[[1]]%in%c("quantile","quantile_scaled"))data$q = self$family$q
                           if(private$trace <= 1){
                             if(private$trace==1)message("Starting MCMC sampling. Set self$trace(2) for detailed output")
                             if(mcmc.pkg == "cmdstan"){
                               capture.output(fit <- mod$sample(data = data,
                                                                chains = 1,
                                                                iter_warmup = self$mcmc_options$warmup,
                                                                iter_sampling = self$mcmc_options$samps,
                                                                refresh = 0),
                                              file=tempfile())
                             } else {
                               capture.output(fit <- rstan::sampling(stanmodels[[file_type$file]],
                                                                     data=data,
                                                                     chains=1,
                                                                     iter = self$mcmc_options$warmup+self$mcmc_options$samps,
                                                                     warmup = self$mcmc_options$warmup,
                                                                     refresh = 0),
                                              file=tempfile())
                             }
                           } else {
                             if(mcmc.pkg == "cmdstan"){
                               fit <- mod$sample(data = data,
                                                 chains = 1,
                                                 iter_warmup = self$mcmc_options$warmup,
                                                 iter_sampling = self$mcmc_options$samps,
                                                 refresh = 50)
                             } else {
                               fit <- rstan::sampling(stanmodels[[file_type$file]],
                                                      data=data,
                                                      chains=1,
                                                      iter = self$mcmc_options$warmup+self$mcmc_options$samps,
                                                      warmup = self$mcmc_options$warmup,
                                                      refresh = 50)
                             }
                           }
                           if(mcmc.pkg == "cmdstan"){
                             dsamps <- fit$draws("gamma",format = "matrix")
                             class(dsamps) <- "matrix"
                           } else {
                             dsamps <- rstan::extract(fit,"gamma",FALSE)
                             dsamps <- as.matrix(dsamps[,1,])
                           }
                           if(private$trace==1)message("Sampling complete, updating model")
                           Model__update_u(private$ptr,as.matrix(t(dsamps)),private$model_type())
                           dsamps <- Matrix::Matrix(Model__L(private$ptr, private$model_type()) %*% Matrix::t(dsamps)) #check this
                         } else {
                           Model__use_attenuation(private$ptr,private$attenuate_parameters, private$model_type())
                           Model__mcmc_set_lambda(private$ptr,self$mcmc_options$lambda, private$model_type())
                           Model__mcmc_set_max_steps(private$ptr,self$mcmc_options$maxsteps, private$model_type())
                           Model__mcmc_set_refresh(private$ptr,self$mcmc_options$refresh, private$model_type())
                           Model__mcmc_sample(private$ptr,self$mcmc_options$warmup,self$mcmc_options$samps,self$mcmc_options$adapt, private$model_type())
                           dsamps <- Model__u(private$ptr,TRUE, private$model_type())
                           dsamps <- Matrix::Matrix(Model__L(private$ptr, private$model_type()) %*% dsamps)
                         }
                         return(invisible(dsamps))
                       },
                       #' @description 
                       #' The gradient of the log-likelihood with respect to either the random effects or
                       #' the model parameters. The random effects are on the N(0,I) scale, i.e. scaled by the
                       #' Cholesky decomposition of the matrix D. To obtain the random effects from the last 
                       #' model fit, see member function `$u`
                       #' @param y (optional) Vector of outcome data, if not specified then data must have been set in another function.
                       #' @param u (optional) Vector of random effects scaled by the Cholesky decomposition of D
                       #' @param beta Logical. Whether the log gradient for the random effects (FALSE) or for the linear predictor parameters (TRUE)
                       #' @return A vector of the gradient
                       gradient = function(y,u,beta=FALSE){
                         if(!missing(y)){
                           private$verify_data(y)
                           private$set_y(y)
                         } else {
                           if(!private$y_has_been_updated) stop("No y data has been added")
                         }
                         if(missing(u)){
                           u_in <- Model__u(private$ptr, FALSE, private$model_type())
                         } else {
                           u_in <- u
                         }
                         grad <- matrix(NA,ifelse(beta,ncol(self$mean$X),nrow(u_in)),ncol(u_in))
                         for(i in 1:ncol(u)){
                           grad[,i] <- Model__log_gradient(private$ptr,u_in,beta, private$model_type())
                         }
                         return(grad)
                       },
                       #' @description 
                       #' The partial derivatives of the covariance matrix Sigma with respect to the covariance
                       #' parameters. The function returns a list in order: Sigma, first order derivatives, second 
                       #' order derivatives. The second order derivatives are ordered as the lower-triangular matrix
                       #' in column major order. Letting 'd(i)' mean the first-order partial derivative with respect 
                       #' to parameter i, and d2(i,j) mean the second order derivative with respect to parameters i 
                       #' and j, then if there were three covariance parameters the order of the output would be:
                       #' (sigma, d(1), d(2), d(3), d2(1,1), d2(1,2), d2(1,3), d2(2,2), d2(2,3), d2(3,3)).
                       #' @return A list of matrices, see description for contents of the list.
                       partial_sigma = function(){
                         private$update_ptr()
                         out <- Model__cov_deriv(private$ptr, private$model_type())
                         return(out)
                       },
                       #' @description 
                       #' Returns the sample of random effects from the last model fit, or updates the samples for the model.
                       #' @param scaled Logical indicating whether the samples are on the N(0,I) scale (`scaled=FALSE`) or
                       #' N(0,D) scale (`scaled=TRUE`)
                       #' @param u (optional) Matrix of random effect samples. If provided then the internal samples are replaced with these values. These samples should be N(0,I).
                       #' @return A matrix of random effect samples
                       u = function(scaled = TRUE, u){
                         if(is.null(private$ptr))stop("Model not set")
                         if(missing(u)){
                           return(Model__u(private$ptr,scaled, private$model_type()))
                         } else {
                           Model__update_u(private$ptr,u,FALSE,private$model_type()) 
                         }
                       },
                       #' @description 
                       #' The log likelihood for the GLMM. The random effects can be left 
                       #' unspecified. If no random effects are provided, and there was a previous model fit with the same data `y`
                       #' then the random effects will be taken from that model. If there was no
                       #' previous model fit then the random effects are assumed to be all zero.
                       #' @param y A vector of outcome data
                       #' @param u An optional matrix of random effect samples. This can be a single column.
                       #' @return The log-likelihood of the model parameters
                       log_likelihood = function(y,u){
                         if(!missing(y)){
                           private$verify_data(y)
                           private$set_y(y)
                         } else {
                           if(!private$y_has_been_updated) stop("No y data has been added")
                         }
                         if(!missing(u)) Model__update_u(private$ptr,u, private$model_type())
                         return(Model__log_likelihood(private$ptr, private$model_type()))
                       },
                       #' @field mcmc_options There are five options for MCMC sampling that are specified in this list:
                       #' * `warmup` The number of warmup iterations. Note that if using the internal HMC
                       #' sampler, this only applies to the first iteration of the MCML algorithm, as the
                       #' values from the previous iteration are carried over.
                       #' * `samps` The number of MCMC samples drawn in the MCML algorithms. For
                       #' smaller tolerance values larger numbers of samples are required. For the internal
                       #' HMC sampler, larger numbers of samples are generally required than if using Stan since
                       #' the samples generally exhibit higher autocorrealtion, especially for more complex
                       #' covariance structures. For SAEM a small number is recommended as all samples are stored and used 
                       #' from every iteration.
                       #' * `lambda` (Only relevant for the internal HMC sampler) Value of the trajectory length of the leapfrog integrator in Hamiltonian Monte Carlo
                       #'  (equal to number of steps times the step length). Larger values result in lower correlation in samples, but
                       #'  require larger numbers of steps and so is slower. Smaller numbers are likely required for non-linear GLMMs.
                       #'  * `refresh` How frequently to print to console MCMC progress if displaying verbose output.
                       #'  * `maxsteps` (Only relevant for the internal HMC sampler) Integer. The maximum number of steps of the leapfrom integrator
                       mcmc_options = list(warmup = 100,
                                           samps = 25,
                                           chains = 1,
                                           lambda = 1,
                                           refresh = 500,
                                           maxsteps = 100,
                                           target_accept = 0.95,
                                           adapt = 50),
                       #' @description
                       #' Prints the internal instructions and data used to calculate the linear predictor. 
                       #' Internally the class uses a reverse polish notation to store and 
                       #' calculate different functions, including user-specified non-linear mean functions. This 
                       #' function will print all the steps. Mainly used for debugging and determining how the 
                       #' class has interpreted non-linear model specifications. 
                       #' @return None. Called for effects.
                       calculator_instructions = function(){
                         Model__print_names(private$ptr,TRUE, TRUE, private$model_type())
                         Model__print_instructions(private$ptr,private$model_type())
                       },
                       #' @description
                       #' Calculates the marginal effect of variable x. There are several options for 
                       #' marginal effect and several types of conditioning or averaging. The type of marginal
                       #' effect can be the derivative of the mean with respect to x (`dydx`), the expected 
                       #' difference E(y|x=a)-E(y|x=b) (`diff`), or the expected log ratio log(E(y|x=a)/E(y|x=b)) (`ratio`).
                       #' Other fixed effect variables can be set at specific values (`at`), set at their mean values
                       #' (`atmeans`), or averaged over (`average`). Averaging over a fixed effects variable here means
                       #' using all observed values of the variable in the relevant calculation. 
                       #' The random effects can similarly be set at their 
                       #' estimated value (`re="estimated"`), set to zero (`re="zero"`), set to a specific value 
                       #' (`re="at"`), or averaged over (`re="average"`). Estimates of the expected values over the random
                       #' effects are generated using MCMC samples. MCMC samples are generated either through 
                       #' MCML model fitting or using `mcmc_sample`. In the absence of samples `average` and `estimated` 
                       #' will produce the same result. The standard errors are calculated using the delta method with one 
                       #' of several options for the variance matrix of the fixed effect parameters.
                       #' Several of the arguments require the names
                       #' of the variables as given to the model object. Most variables are as specified in the formula,
                       #' factor variables are specified as the name of the `variable_value`, e.g. `t_1`. To see the names
                       #' of the stored parameters and data variables see the member function `names()`.
                       #' @param x String. Name of the variable to calculate the marginal effect for.
                       #' @param type String. Either `dydx` for derivative, `diff` for difference, or `ratio` for log ratio. See description.
                       #' @param re String. Either `estimated` to condition on estimated values, `zero` to set to zero, `at` to
                       #' provide specific values, or `average` to average over the random effects.
                       #' @param se String. Type of standard error to use, either `GLS` for the GLS standard errors, `KR` for 
                       #' Kenward-Roger estimated standard errors, `KR2` for the improved Kenward-Roger correction (see `small_sample_correction()`),
                       #'  or `robust` to use a robust sandwich estimator.
                       #' @param at Optional. A vector of strings naming the fixed effects for which a specified value is given.
                       #' @param atmeans Optional. A vector of strings naming the fixed effects that will be set at their mean value.
                       #' @param average Optional. A vector of strings naming the fixed effects which will be averaged over.
                       #' @param xvals Optional. A vector specifying the values of `a` and `b` for `diff` and `ratio`. The default is (1,0).
                       #' @param atvals Optional. A vector specifying the values of fixed effects specified in `at` (in the same order).
                       #' @param revals Optional. If `re="at"` then this argument provides a vector of values for the random effects.
                       #' @param oim Logical. If TRUE use the observed information matrix, otherwise use the expected information matrix for standard error and related calculations.
                       #' @return A named vector with elements `margin` specifying the point estimate and `se` giving the standard error.
                       marginal = function(x,type,re,se,at = c(),atmeans = c(),average=c(),xvals=c(1,0),atvals=c(),revals=c(),oim = FALSE){
                         margin_types <- c("dydx","diff","ratio")
                         re_types <- c("estimated","at","zero","average")
                         se_types <- c("GLS","KR","Robust","BW","KR2","Sat")
                         if(!type%in%margin_types)stop("type not recognised")
                         if(!re%in%re_types)stop("re not recognised")
                         if(!se%in%se_types)stop("se not recognised")
                         result <- Model__marginal(xp = private$ptr,
                                                   x = x,
                                                   margin = which(margin_types==type)-1,
                                                   re = which(re_types==re)-1, 
                                                   se = which(se_types==se)-1,
                                                   oim = oim,
                                                   at = at,
                                                   atmeans = atmeans,
                                                   average = average,
                                                   xvals_first = xvals[1],
                                                   xvals_second =xvals[2],
                                                   atvals = atvals,
                                                   revals = revals,
                                                   type = private$model_type())
                         return(c("margin"=unname(result[1]),"SE"=unname(result[2])))
                       },
                       #' @description
                       #' Updates the outcome data y
                       #' 
                       #' Some functions require outcome data, which is by default set to all zero if no model fitting function 
                       #' has been run. This function can update the interval y data.
                       #' @param y Vector of outcome data
                       #' @return None. Called for effects
                       update_y = function(y){
                         private$verify_data(y)
                         private$set_y(y)
                       },
                       #' @description
                       #' Controls the information printed to the console for other functions. 
                       #' @param trace Integer, either 0 = no information, 1 = some information, 2 = all information
                       #' @return None. Called for effects.
                       set_trace = function(trace){
                         if(!trace%in%c(0,1,2))stop("trace must be 0, 1, or 2")
                         private$trace <- trace
                         Model__set_trace(private$ptr,trace, private$model_type())
                       }
                     ),
                     private = list(
                       W = NULL,
                       Xb = NULL,
                       trace = 1,
                       useSparse = TRUE,
                       session_id = NULL,
                       logit = function(x){
                         exp(x)/(1+exp(x))
                       },
                       genW = function(){
                         Model__update_W(private$ptr, private$model_type())
                         private$W <- Model__get_W(private$ptr, private$model_type())
                       },
                       attenuate_parameters = FALSE,
                       ptr = NULL,
                       y_in_formula = FALSE,
                       y_name = NULL,
                       y_has_been_updated = FALSE,
                       set_y = function(y){
                         private$update_ptr()
                         Model__set_y(private$ptr,y, private$model_type())
                         private$y_has_been_updated <- TRUE
                       },
                       model_type = function(){
                         type <- self$covariance$.__enclos_env__$private$type
                         return(type)
                       },
                       update_ptr = function(force = FALSE){
                         if(is.null(private$ptr) | force | private$session_id != Sys.getpid()){
                           if(!self$family[[1]]%in%c("poisson","binomial","gaussian","bernoulli","Gamma","beta","quantile","quantile_scaled"))stop("family must be one of Poisson, Binomial, Gaussian, Gamma, Beta, or quantile")
                           # if(gsub(" ","",self$mean$formula) != gsub(" ","",self$covariance$formula)){
                           #   form <- paste0(self$mean$formula,"+",self$covariance$formula)
                           # } else {
                           #   form <- gsub(" ","",self$mean$formula)
                           # }
                           form <- gsub(" ","",self$formula)
                           form <- gsub("~","",self$formula)
                           if(grepl("nngp",form)){
                             self$covariance$.__enclos_env__$private$type <- 1
                             form <- gsub("nngp_","",form)
                           } else if(grepl("hsgp",form)){
                             self$covariance$.__enclos_env__$private$type <- 2
                             form <- gsub("hsgp_","",form)
                           }
                           type <- private$model_type()
                           data <- self$covariance$data
                           if(any(!colnames(self$mean$data)%in%colnames(data))){
                             cnames <- which(!colnames(self$mean$data)%in%colnames(data))
                             data <- cbind(data,self$mean$data[,cnames,drop=FALSE])
                           }
                           #data <- private$process_data(as.formula(paste0("~",form)),data,TRUE,TRUE)
                           if(self$family[[1]]=="bernoulli" & any(self$trials>1))self$family[[1]] <- "binomial"
                           if(is.null(self$covariance$parameters)){
                             if(type == 0){
                               private$ptr <- Model__new(form,
                                                         as.matrix(data),
                                                         colnames(data),
                                                         tolower(self$family[[1]]),
                                                         self$family[[2]])
                             } else if(type==1){
                               nngp <- self$covariance$nngp()
                               private$ptr <- Model_nngp__new(form,
                                                              as.matrix(data),
                                                              colnames(data),
                                                              tolower(self$family[[1]]),
                                                              self$family[[2]],
                                                              nngp[2])
                             } else if(type==2){
                               private$ptr <- Model_hsgp__new(form,
                                                              as.matrix(data),
                                                              colnames(data),
                                                              tolower(self$family[[1]]),
                                                              self$family[[2]])
                             }
                             Model__update_beta(private$ptr,self$mean$parameters,type)
                             ncovpar <- Model__n_cov_pars(private$ptr,type)
                             self$covariance$parameters <- runif(ncovpar)
                             re <- Model__re_terms(private$ptr,type)
                             paridx <- Model__parameter_fn_index(private$ptr,type)+1
                             names(self$covariance$parameters) <- re[paridx]
                             Model__update_theta(private$ptr,self$covariance$parameters,type)
                           } else {
                             if(type == 0){
                               private$ptr <- Model__new_w_pars(form,
                                                                as.matrix(data),
                                                                colnames(data),
                                                                tolower(self$family[[1]]),
                                                                self$family[[2]],
                                                                self$mean$parameters,
                                                                self$covariance$parameters)
                             } else if(type==1){
                               nngp <- self$covariance$nngp()
                               private$ptr <- Model_nngp__new_w_pars(form,
                                                                     as.matrix(data),
                                                                     colnames(data),
                                                                     tolower(self$family[[1]]),
                                                                     self$family[[2]],
                                                                     self$mean$parameters,
                                                                     self$covariance$parameters,
                                                                     nngp[2])
                             } else if(type==2){
                               private$ptr <- Model_hsgp__new_w_pars(form,
                                                                     as.matrix(data),
                                                                     colnames(data),
                                                                     tolower(self$family[[1]]),
                                                                     self$family[[2]],
                                                                     self$mean$parameters,
                                                                     self$covariance$parameters)
                             }
                           }
                           
                           Model__set_offset(private$ptr,self$mean$offset,type)
                           Model__set_weights(private$ptr,self$weights,type)
                           Model__set_var_par(private$ptr,self$var_par,type)
                           if(self$family[[1]] == "binomial")Model__set_trials(private$ptr,self$trials,type)
                           if(self$family[[1]] %in% c("quantile","quantile_scaled")) Model__set_quantile(private$ptr,self$family$q,type)
                           # Model__update_beta(private$ptr,self$mean$parameters,type)
                           # Model__update_theta(private$ptr,self$covariance$parameters,type)
                           Model__update_u(private$ptr,matrix(rnorm(Model__Q(private$ptr,type)),ncol=1),type) # initialise random effects to random
                           Model__mcmc_set_lambda(private$ptr,self$mcmc_options$lambda,type)
                           Model__mcmc_set_max_steps(private$ptr,self$mcmc_options$maxsteps,type)
                           Model__mcmc_set_refresh(private$ptr,self$mcmc_options$refresh,type)
                           Model__mcmc_set_target_accept(private$ptr,self$mcmc_options$target_accept,type)
                           if(!private$useSparse & type == 1) Model__make_dense(private$ptr,type)
                           # set covariance pointer
                           self$covariance$.__enclos_env__$private$model_ptr <- private$ptr
                           self$covariance$.__enclos_env__$private$ptr <- NULL
                           self$covariance$.__enclos_env__$private$cov_form()
                           private$session_id <- Sys.getpid()
                         }
                       },
                       verify_data = function(y){
                         if(any(is.na(y)))stop("NAs in y")
                         if(self$family[[1]]=="binomial"){
                           if(!(all(y>=0 & y%%1 == 0)))stop("y must be integer >= 0")
                           if(any(y > self$trials))stop("Number of successes > number of trials")
                         } else if(self$family[[1]]=="poisson"){
                           if(any(y <0) || any(y%%1 != 0))stop("y must be integer >= 0")
                         } else if(self$family[[1]]=="beta"){
                           if(any(y<0 || y>1))stop("y must be between 0 and 1")
                         } else if(self$family[[1]]=="Gamma") {
                           if(any(y<=0))stop("y must be positive")
                         } else if(self$family[[1]]=="gaussian" & self$family[[2]]=="log"){
                           if(any(y<=0))stop("y must be positive")
                         } else if(self$family[[1]]=="bernoulli"){
                           if(any(y > self$trials))stop("Number of successes > number of trials")
                           if(any(y > 1))self$family[[1]] <- "binomial"
                           if(!(all(y>=0 & y%%1 == 0)))stop("y must be 0 or 1")
                         }
                       },
                       model_data = function(newdata){
                         cnames <- colnames(self$covariance$data)
                         if(any(!colnames(self$mean$data)%in%cnames)){
                           cnames <- c(cnames1, which(!colnames(self$mean$data)%in%cnames))
                         }
                         if(!isTRUE(all.equal(cnames,colnames(newdata)))){
                           newdat <- newdata[,cnames[cnames%in%colnames(newdata)],drop=FALSE]
                           newcnames <- cnames[!cnames%in%colnames(newdata)]
                           for(i in newcnames){
                             if(grepl("factor",i)){
                               id1 <- gregexpr("\\[",i)
                               id2 <- gregexpr("\\]",i)
                               var <- substr(i,id1[[1]][1]+1,id2[[1]][1]-1)
                               if(!var%in%colnames(newdata))stop(paste0("factor ",var," not in data"))
                               val <- substr(i,id2[[1]][1]+1,nchar(i))
                               newcol <- I(newdata[,var]==val)*1
                               newdat <- cbind(newdat,newcol)
                               colnames(newdat)[ncol(newdat)] <- i
                             } else {
                               stop(paste0("Variable ",i," not in data"))
                             }
                           }
                         } else {
                           newdat <- newdata
                         }
                         newdat <- newdat[,cnames,drop=FALSE]
                         return(newdat)
                       },
                       model_data_frame = function(){
                         cnames <- colnames(self$mean$data)
                         dat <- self$mean$data
                         if(any(!colnames(self$covariance$data)%in%cnames)){
                           dat <- cbind(dat, self$covariance$data[,which(!colnames(self$covariance$data)%in%cnames), drop = FALSE])
                         }
                         return(dat)
                       },
                       check_y_formula = function(form,data,family){
                         # assume formula is a string
                         str1 <- as.character(form)
                         if(length(str1)==3){
                           private$y_in_formula <- TRUE
                           if(grepl("cbind",str1[[2]])){
                             if(!family[[1]]%in%c("binomial"))stop("cbind specification of outcome requires binomial model")
                             yc <- unlist(lapply(regmatches(str1[2], gregexpr("\\(.*?\\)", str1[2])), function(x)gsub("[\\(\\)]","",x)))
                             yc <- unlist(strsplit(unlist(yc),","))
                             yc <- gsub(" ","",yc)
                             if(!yc[1]%in%colnames(data))stop(paste0(yc[1]," not in data"))
                             private$y_name <- yc[1]
                             self$trials <- eval(parse(text = yc[2]),envir = data) + data[,yc[1]]
                           } else {
                             if(!str1[2]%in%colnames(data))stop(paste0(str1[2]," not in data"))
                             private$y_name <- str1[2]
                           }
                           return(as.formula(paste0("~",str1[3])))
                         } else {
                           return(form)
                         }
                       },
                       process_data = function(form,data,is_covariance = FALSE,is_mean = TRUE){
                         s1 <- c()
                         f1 <- as.character(form)[2]
                         f1 <- gsub(" ","",f1[length(f1)])
                         cnames <- colnames(data)
                         result <- list(formula= NA, data = NA)
                         # first check if we can just use R's functions:
                         if(is_mean){
                           r1 <- re_names(f1)
                           f2 <- f1
                           for(i in 1:length(r1)){
                             r1[i] <- gsub("\\(","\\\\(",r1[i])
                             r1[i] <- gsub("\\|","\\\\|",r1[i])
                             r1[i] <- gsub("\\)","\\\\)",r1[i])
                             f2 <- gsub(paste0("\\+",r1[i]),"",f2)
                           }
                           mm_result <- tryCatch(model.matrix(as.formula(paste0("~",f2)),data),
                                                 error = function(e)return(NA))
                           if(!is(mm_result,"logical")){
                             if(any(colnames(mm_result)=="(Intercept)")) mm_result <- mm_result[,-which(colnames(mm_result)=="(Intercept)"),drop=FALSE]
                             for(i in 1:ncol(mm_result)){
                               colnames(mm_result)[i] <- gsub("-","",colnames(mm_result)[i])
                               colnames(mm_result)[i] <- gsub("+","",colnames(mm_result)[i])
                               if(grepl("factor",colnames(mm_result)[i])){
                                 colnames(mm_result)[i] <- gsub("factor","",colnames(mm_result)[i])
                                 colnames(mm_result)[i] <- gsub("\\(","",colnames(mm_result)[i])
                                 colnames(mm_result)[i] <- gsub("\\)","",colnames(mm_result)[i])
                               }
                             }
                             new_formula <- paste0(colnames(mm_result),collapse = "+")
                             if(grepl("-1",f1))new_formula <- paste0(new_formula,"-1")
                             r1 <- re_names(f1)
                             for(i in 1:length(r1)){
                               new_formula <- paste0(new_formula,"+",r1[i])
                             }
                             new_formula <- as.formula(paste0("~ ",new_formula))
                             result <- list(formula = new_formula, data = as.data.frame(mm_result))
                           } 
                         }
                         
                         if(is(result$formula,"logical")){
                           if(is_covariance){
                             s1a <- re_names(f1, FALSE)
                             s1a <- s1a[s1a!="1"]
                             s1a <- Reduce(c,strsplit(s1a,"\\*"))
                             s1alen <- Reduce(c,gregexpr("\\(.*?\\)", s1a))
                             s1akeep <- which(s1alen < 0)
                             s1b <- lapply(regmatches(s1a, gregexpr("\\(.*?\\)", s1a)), function(x)gsub("[\\(\\)]","",x))
                             s1b <- Reduce(c,lapply(s1b,function(x)strsplit(x,",")))
                             s1b[s1akeep] <- s1a[s1akeep]
                             s1 <- c(s1,unique(Reduce(c,s1b)))
                           } 
                           if(is_mean) {
                             s1 <- c(s1,get_variable_names(f1,cnames))
                           }
                           data_idx <- match(s1,cnames)
                           data_idx <- data_idx[!is.na(data_idx)]
                           new_data <- data[,data_idx,drop=FALSE]
                           if(ncol(new_data)>0){
                             for(i in 1:ncol(new_data)){
                               if(is(new_data[,i],"character")|is(new_data[,i],"factor"))new_data[,i] <- as.numeric(as.factor(new_data[,i]))
                             }
                           }
                           return(list(formula = form, data = new_data))
                           # if(!all(s1 %in% cnames)){
                           #   not_in <- which(!s1 %in% cnames)
                           #   stop(paste0("The following variables are not in the data: ",paste0(s1[not_in],collapse = " ")))
                           # } else {
                           #   
                           # }
                         } else {
                           return(result)
                         }
                         
                       }
                     ))