R/simulate.HL.R

Defines functions simulate.HLfitlist simulate.HLfit .guess_new_BinomialDen simulate_ranef .simulate_ranef .calc_ZAlist_newdata .calc_ZAlist_newdata_mv .r_resid_var_over_cols .get_family_par .get_family_parlist .r_resid_var .newetaFix .noRanef

Documented in simulate.HLfit simulate.HLfitlist simulate_ranef

## 'has-no-ranef' can be tested by is.null(.parseBars(re.form))
# devel version of lme4 has noReForm() that assumes that there in no fixed effect in re.form, with result
# TRUE if argument is NA or ~0, 
# FALSE if argument is NULL or a non-trivial formula (re.form=NULL in particular means that prediction assumes the original random-effect terms).
# We additionally want TRUE if argument is ~<only fixef> (fixed effect will be ignored anyway)
# The following is equivalent to noReForm() except for the last test, is.null(.parseBars(re.form)) 
# ~0 returns TRUE, ~Days returns TRUE [as the last test is TRUE in both cases], ~<including ranefs> returns FALSE 
.noRanef <- function(re.form) { # Stupidly returns FALSE for explicit formula with LHS. So re.form better not have a LHS.
  (!is.null(re.form) && !inherits(re.form,"formula") && is.na(re.form)) ||
    (inherits(re.form,"formula") && length(re.form)==2 && is.null(.parseBars(re.form)))
}

.newetaFix <- function(object, newMeanFrames,validnames=NULL) { # newMeanFrames must have cols for the fixed beta's, absent from the object's $X.pv 
  ## newdata -> offset must be recomputed. 
  ## dans l'état actuel $fixef et complet, incluant les etaFix$beta: pas besoin de les separer
  ## mais il peut contenir des NA ! à enlever
  # le newMeanFrames$X contient a priori les cols des etaFix$beta, contrairement à object$X.pv => don't use the latter cols 
  if (is.null(validnames)) {
    est_and_fix <- names(which(!is.na(object$fixef))) ## estimated + etaFix$beta
    validnames <- intersect(colnames(newMeanFrames$X) ,est_and_fix) # would be colnames(newMeanFrames$X) when there is no NA if object$fixef. would validnames=est_and_fix suffice ?
  }
  if (length(validnames)) {
    etaFix <-  drop(newMeanFrames$X[,validnames,drop=FALSE] %*% object$fixef[validnames]) ## valid even if ncol(newMeanFrames$X) = 0
  } else etaFix <- rep(0, nrow(newMeanFrames$X))
  off <- model.offset( newMeanFrames$mf) ### look for offset from (ori)Formula 
  if ( ! is.null(off)) etaFix <- etaFix + off   
  return(etaFix)
}

.r_resid_var <- function(mu,phiW,sizes,family,
                         # COMP_nu,NB_shape,
                         family_par=.get_family_par(family=family),
                         zero_truncated=identical(family$zero_truncated,TRUE), 
                         famfam, nsim=1L) { 
  # we cannot use family()$simulate bc it assumes a fit object as input
  resu <- switch(famfam,
                 "gaussian" = rnorm(nsim*length(mu),mean=mu,sd=sqrt(phiW)),
                 "poisson" = .rpois(nsim*length(mu),mu,zero_truncated=zero_truncated), 
                 "binomial" = rbinom(nsim*length(mu),size=sizes,prob=mu),
                 "Gamma" = {
                   y <- rgamma(nsim*length(mu), shape= 1 / phiW, scale=mu*phiW) # mean=sh*sc=mu, var=sh*sc^2 = mu^2 phiW
                   Gamma_min_y <- .spaMM.data$options$Gamma_min_y
                   is_low_y <- (y < Gamma_min_y)
                   if (any(is_low_y)) y[which(is_low_y)] <- Gamma_min_y 
                   y
                 }, ## ie shape increase with prior weights, consistent with Gamma()$simulate / spaMM_Gamma()$simulate
                 "COMPoisson" = {
                   lambdas <- attr(mu,"lambda") # F I X M E an environment would keep values ?
                   if (is.null(lambdas)) {
                     sapply(mu, function(muv) {
                       lambda <- family$mu2lambda(muv)
                       .COMP_simulate(lambda=lambda,nu=family_par, #= COMP_nu 
                                      nsim=nsim)
                     })
                   } else sapply(lambdas,.COMP_simulate,nu=family_par, #= COMP_nu 
                                 nsim=nsim)
                 },
                 "negbin" = .rnbinom(nsim*length(mu), size=family_par, #= NB_shape
                                   mu_str=mu, zero_truncated=zero_truncated),
                 "negbin1" = .rnbinom(nsim*length(mu), size=mu*family_par, #= NB_shape
                                    mu_str=mu, zero_truncated=zero_truncated),
                 "negbin2" = .rnbinom(nsim*length(mu), size=family_par, #= NB_shape
                                    mu_str=mu, zero_truncated=zero_truncated),
                 "beta_resp" = {
                   # spaMM's phi is here 1, so phiW must be 1/prior.weights and so for the precision parameter:
                   Wfamily_par <- family_par/phiW
                   y <- rbeta(n=nsim * length(mu), 
                         shape1=mu*Wfamily_par,shape2=(1-mu)*Wfamily_par)
                   beta_min_y <- .spaMM.data$options$beta_min_y
                   is_low_y <- (y < beta_min_y)
                   if (any(is_low_y)) y[which(is_low_y)] <- beta_min_y 
                   beta_max_y <- 1- beta_min_y
                   is_high_y <- (y > beta_max_y)
                   if (any(is_high_y)) y[which(is_high_y)] <- beta_max_y 
                   y
                 }, # fam_par being the precision parameter in the Cribari-Neto parametrisation 
                 "betabin" = {
                   # phiW: same logic as for beta_resp
                   ntot <- nsim*length(mu) 
                   Wfamily_par <- family_par/phiW
                   rmu <- rbeta(n=ntot, shape1=mu*Wfamily_par,shape2=(1-mu)*Wfamily_par)
                   rbinom(ntot, size=sizes, prob=rmu)
                 },
                 stop("(!) random sampling from given family not yet implemented")
  )
  if (nsim>1L) dim(resu) <- c(length(mu),nsim)
  resu
} ## vector-valued function from vector input

.get_family_parlist <- function(object, families=object$families, family=object$family, newdata) {
  if ( ! is.null(families)) {
    family_parlist <- vector("list", length(families))
    for (mv_it in seq_along(families)) {
      family_parlist[mv_it] <- list(.get_family_par(family=families[[mv_it]], newdata=newdata)) 
    }
    return(family_parlist)
  } else .get_family_par(family=family, famfam=family$family, newdata=newdata)
}

.get_family_par <- function(family, famfam=family$family, newdata=NULL, mv_it=NULL) {
  if ( ! is.null(newdata) &&  ! is.null(resid.formula <- (disp_env <- family$resid.model)$resid.formula)) {
    residFrames <- .get_terms_info(formula=resid.formula, data=newdata, famfam="")
    # handles both "rdiOff" and "rdiForm" cases:
    if ( is.null(off <- model.offset(residFrames$mf)) ) {family_par <- 0} else family_par <- off
    if ( ! is.null(disp_env$beta)) family_par <- family_par + residFrames$X %*% disp_env$beta
  } else if (famfam =="COMPoisson") {   ##  all the next cases are OK whether there is no new data or whether there is no resid.formula
    family_par <- environment(family$aic)$nu
  } else if (famfam %in% c("beta_resp","betabin")) {
    family_par <- environment(family$aic)$prec
  } else if (famfam  %in% c("negbin","negbin1","negbin2")) {
    family_par <- environment(family$aic)$shape
  } else family_par <- NULL
  family_par
}


.r_resid_var_over_cols <- function(mu,  # a list over nsim !
                                   phiW, 
                                   family_par,
                                   sizes, family, families, resp_range=NULL, is_mu_fix_btwn_sims,
                                   is_phiW_fix_btwn_sims=attr(phiW,"is_phiW_fix_btwn_sims"),
                                   nsim=1L, as_matrix=FALSE, mv_it=NULL,
                                   zero_truncated=identical(family$zero_truncated,TRUE),
                                   cum_nobs=attr(families,"cum_nobs")) {
  if ( ! is.null(families)) {
    rowS <- vector("list", length(families))
    for (mv_it in seq_along(families)) {
      resp_range <- .subrange(cumul=cum_nobs, it=mv_it)
      family <- families[[mv_it]] # copy needed for zero_truncated to get the correct family...
      rowS[[mv_it]] <- .r_resid_var_over_cols(mu, 
                                              phiW=phiW[resp_range,,drop=FALSE], 
                                              family_par=family_par[[mv_it]],
                                              sizes=sizes[resp_range], 
                                              family=family, families=NULL, resp_range=resp_range,
                                              is_mu_fix_btwn_sims=is_mu_fix_btwn_sims,
                                              is_phiW_fix_btwn_sims=is_phiW_fix_btwn_sims[mv_it], nsim=nsim, as_matrix=TRUE, mv_it=mv_it,
                                              zero_truncated=identical(family$zero_truncated,TRUE))
    }
    return(do.call(rbind, rowS))
  }
  block <- NA*phiW
  famfam <- family$family
  if (is_mu_fix_btwn_sims && is_phiW_fix_btwn_sims) { # 2nd condition should be trivially true for count models without prior.weights,
                                                      # even those with a resid.model as it has fixed effects only.  
                                                      # and "presumably" also even for count models with prior weights (in beta_resp, at least):
                                                      # only variable prior weights btwn_sims (how?) should make it FALSE but the coe ignores that possibility.
    if (is.null(resp_range)) {
      mu_all <- mu[[1L]] 
    } else mu_all <- structure(mu[[1L]][resp_range], p0=attr(mu[[1L]],"p0")[[mv_it]], mu_U=attr(mu[[1L]],"mu_U")[[mv_it]])
    # : subsetting otherwise loses crucial p0 and mu_U attributes for Tnegbin
    # These attributes also exists for Tpoisson
    block <- .r_resid_var(mu_all, phiW=phiW[,1L],sizes=sizes, 
                          zero_truncated=zero_truncated, 
                          # cases where family_par is not needed but the is a promise available... => it may be possible to merge the codes?
                          famfam=famfam, family=family, nsim=nsim) # vector or nsim-col matrix
    if (as_matrix && is.null(dim(block))) dim(block) <- c(length(block),1L) # forces matrix for mv code using rbind()
  } else {
    for (sim_it in seq_len(nsim)) {
      if (is.null(resp_range)) {
        mu_it <- mu[[sim_it]]
      } else mu_it <- structure(mu[[sim_it]][resp_range], p0=attr(mu[[sim_it]],"p0")[[mv_it]], mu_U=attr(mu[[sim_it]],"mu_U")[[mv_it]])
      block[ ,sim_it] <- .r_resid_var(mu_it, 
                                      phiW=phiW[ ,sim_it], # rlevant rowas have been selected in the mv case 
                                      sizes=sizes, 
                                      family_par= family_par,  
                                      zero_truncated=zero_truncated, famfam=famfam, family=family, nsim=1L)
    }
  }
  return(block)
}

.calc_ZAlist_newdata_mv <- function(object, newdata, new_X_ZACblob=NULL) {
  map_rd_mv <- attr(object$ZAlist, "map_rd_mv")
  ori_exp_ranef_terms <- attr(object$ZAlist, "exp_ranef_terms")
  ori_exp_ranef_strings <- attr(object$ZAlist,"exp_ranef_strings")
  locdataS <- new_X_ZACblob$locdata # needed because it result from check of all variables needed for prediction (such as residVar predictors)
  # This currently do not bear the variable of the ranefs that were not "conditioned upon", but these variables are needed here
  # since this function construct all design matrices (out of which those for "conditioned upon" ranefs will be ...%*% [ranV=0])
  for (mv_it in seq_along(map_rd_mv)) {
    rd_in_mv <- map_rd_mv[[mv_it]]
    exp_ranef_strings_it <- ori_exp_ranef_strings[rd_in_mv]
    old_ranef_form <- as.formula(paste("~",(paste(exp_ranef_strings_it,collapse="+")))) 
    #exp_ranef_terms_it <- structure(ori_exp_ranef_terms[rd_in_mv], type=attr(ori_exp_ranef_terms,"type")[rd_in_mv])
    if ( is.null(newdata_it <- locdataS[[mv_it]])) newdata_it <- newdata
    Zlist <- .calc_Zlist(exp_ranef_terms=ori_exp_ranef_terms, data=newdata_it, 
                         rmInt=0L, drop=TRUE,sparse_precision=FALSE,
                         corr_info=.get_from_ranef_info(object), 
                         rd_in_mv=rd_in_mv,
                         sub_oldZAlist=object$ZAlist, # OK if we use only colnames, not attributes of the list...
                         lcrandfamfam=attr(object$rand.families,"lcrandfamfam"))
    amatrices <- .get_new_AMatrices(object,newdata=newdata_it) # .calc_newFrames_ranef(formula=old_ranef_form,data=newdata, fitobject=object)$mf)[rd_in_mv]
    ZAlist_it <- .calc_normalized_ZAlist(Zlist=Zlist,
                                         AMatrices=amatrices,
                                         vec_normIMRF=object$ranef_info$vec_normIMRF,
                                         strucList=object$strucList[rd_in_mv])
    # In the *fit* preprocessing, .merge_ZAlists is called on ZA lists for each submodel, named in ref to the submodels only;
    # .merge_ZAlists uses "exp_ranef_strings" or similar info to match the lists, not list names. 
    names(ZAlist_it) <- rd_in_mv
    attr(ZAlist_it,"exp_ranef_strings") <- exp_ranef_strings_it
    if (mv_it>1L) {
      newZAlist <- .merge_ZAlists(newZAlist, ZAlist_it, 
                                  nobs1=nrow(newdata_it), # presumably does not matter
                                  nobs2=nrow(newdata_it), mv_it)
    } else newZAlist <- ZAlist_it
  }  
  newZAlist
}

#  Build full Zlist for simulation; ultimately only elements for "marginalized upon" ranefs will be used
#   (Z's for "conditioned upon" ranefs were provided by .calcènewèX_ZAC[_mv] and used in distinct .point_predict step)
.calc_ZAlist_newdata <- function(object, newdata, new_X_ZACblob) {
  # we simulate with all ranefs (treated conditionally|ranef or marginally) hence 
  # * we need design matrices for all ranefs
  # * we need values of all the original variables
  # hence we use an "## effective '.noFixef'" : formula with only ranefs of the fit. But 1st version fails for mv; second may be more straightforward anyway
  #old_ranef_form <- formula.HLfit(object, which="")[-2L] 
  #old_ranef_form <- as.formula(paste("~",(paste(.process_bars(old_ranef_form),collapse="+")))) ## 
  #frame_ranefs <- .calc_newFrames_ranef(formula=old_ranef_form,data=newdata, fitobject=object)$mf 
  #exp_ranef_terms <- .process_bars(old_ranef_form[[2L]], # barlist must remain missing here
  #                                 expand=TRUE, which. = "exp_ranef_terms")
  if (is.null(vec_nobs <- object$vec_nobs)) { #  *univariate*-resp model 
    old_ranef_form <- as.formula(paste("~",(paste(attr(object$ZAlist,"exp_ranef_strings"),collapse="+")))) 
    exp_ranef_terms <- attr(object$ZAlist, "exp_ranef_terms")
    Zlist <- .calc_Zlist(exp_ranef_terms=exp_ranef_terms, data=newdata, rmInt=0L, drop=TRUE,sparse_precision=FALSE,
                         corr_info=.get_from_ranef_info(object),
                         sub_oldZAlist=object$ZAlist,
                         # Note no levels_type="seq_len" here: important to get correct matrix for Matern...
                         lcrandfamfam=attr(object$rand.families,"lcrandfamfam"))
    amatrices <- .get_new_AMatrices(object,newdata=newdata) # .calc_newFrames_ranef(formula=old_ranef_form,data=newdata, fitobject=object)$mf)
    newZAlist <- .calc_normalized_ZAlist(Zlist=Zlist,
                                         AMatrices=amatrices,
                                         vec_normIMRF=object$ranef_info$vec_normIMRF, 
                                         strucList=object$strucList)
  } else {
    newZAlist <- .calc_ZAlist_newdata_mv(object, newdata, new_X_ZACblob = new_X_ZACblob)
 
  }
  return(newZAlist)
}

.simulate_ranef <- function(object, rd, newdata, 
                           cum_n_u_h=attr(object$lambda,"cum_n_u_h"), 
                           vec_n_u_h=diff(cum_n_u_h), 
                           fittedLambda=object$lambda.object$lambda_est, 
                           nsim, 
                           lcrandfamfam=attr(object$rand.families,"lcrandfamfam")) {
  nr <- vec_n_u_h[rd]
  u.range <- (cum_n_u_h[rd]+1L):(cum_n_u_h[rd+1L])
  if ( ! is.null(object$rand.families[[rd]]$prior_lam_fac) && ! is.null(newdata)) { # prior_lam_fac is the 'design' for non-ranCoef (wei-1|.)
    leftOfBar_terms <- attr(object$ZAlist,"exp_ranef_terms")[[rd]][[2L]]
    leftOfBar_mf <- model.frame(as.formula(paste("~",leftOfBar_terms)), newdata, xlev = NULL) 
    prior_lam_fac <- leftOfBar_mf[,1L]^2 ## assumes simple syntax (wei-1|.)
    loclambda <- object$lambda.object$lambda_list[[rd]]* prior_lam_fac
  } else loclambda <- fittedLambda[u.range] ## includes prior_lam_fac
  newU <- replicate(nsim, {
    switch(lcrandfamfam[rd], ## remainder of code should be OK for rand.families
           gaussian = rnorm(nr,sd=sqrt(loclambda)),
           gamma = rgamma(nr,shape=1/loclambda,scale=loclambda),
           beta = rbeta(nr,1/(2*loclambda),1/(2*loclambda)),
           "inverse.gamma" = 1/rgamma(nr,shape=1+1/loclambda,scale=loclambda), ## yields inverse gamma (1+1/object$lambda,1/object$lambda)
           conditional= rep(0,length(loclambda)), ## conditional random effects already in predictor
           stop("(!) random sample from given rand.family not yet implemented")
    )},simplify=TRUE) ## should have nsim columns
  object$rand.families[[rd]]$linkfun(newU) 
}

simulate_ranef <- function(object, which=NULL, newdata=NULL, nsim=1L) {
  cum_n_u_h <- attr(object$lambda,"cum_n_u_h")
  vec_n_u_h <- diff(cum_n_u_h)
  if (is.null(which)) which <- seq_along(vec_n_u_h)
  nwhich <- length(which)
  newb <- vector("list", nwhich)
  for (rd in seq_along(which)) {
    newb[[rd]] <- .simulate_ranef(object=object, rd=which[rd], newdata=newdata, 
                                  cum_n_u_h=cum_n_u_h, 
                                  vec_n_u_h=vec_n_u_h, 
                                  fittedLambda=object$lambda.object$lambda_est, 
                                  nsim=nsim, 
                                  lcrandfamfam=attr(object$rand.families,"lcrandfamfam"))
  }
  if (nsim>1L) {
    newb <- do.call(rbind, newb)
  } else newb <- unlist(newb)
  newb
}

.guess_new_BinomialDen <- function(object, cum_nobs, sizes, mu) {
  # ____F I X M E____ maybe I should warn in cases of matching by multiple copy
  #  and stop instead of current warning when there is no match even by multiple copy.
  if ( inherits(object,"fitmv") ) {
    ori_cum_nobs <- attr(object$vec_nobs,"cum_nobs")
    ori_vec_nobs <- diff(ori_cum_nobs)
    vec_nobs <- diff(cum_nobs)
    newsizes <- vector("list", length(object$families))
    for (fam_it in seq_along(object$families)) {
      if (object$families[[fam_it]]$family=="binomial") {
        if (vec_nobs[fam_it] %% ori_vec_nobs[fam_it]) {
          famfams <- sapply(object$families, `[[`, x="family")
          warning(paste0("'sizes' argument does not match 'newdata' one.\n",
                         "'sizes' should match counts for succesive submodels\n",
                         paste0(famfams,": ", vec_nobs,collapse=", "),"."),
                  immediate. = TRUE)
        } else {
          ncopy <- vec_nobs[fam_it] %/% ori_vec_nobs[fam_it]
          newsizes[[fam_it]] <- rep(sizes[.subrange(ori_cum_nobs,fam_it)], ncopy)
        }
      } else newsizes[[fam_it]] <- rep(1L, diff(cum_nobs)[fam_it]) 
    }
    sizes <- unlist(newsizes)
  } else {
    if (object$family$family=="binomial") {
      if (length(mu[[1]]) %% length(sizes)) {
        warning(paste0("'sizes' argument does not match expected length=", length(mu[[1]])),
                immediate. = TRUE)
      } # else automatic recycling of sizes presumably works {
      #   ncopy <- length(mu) %/% length(sizes)
      #   newsizes[[fam_it]] <- rep(sizes, ncopy)
      # } 
    } else sizes <- rep(1L, length(mu[[1]])) 
  }
  sizes
}


# simulate.HLfit(fullm[[2]],newdata=fullm[[1]]$data,size=fullm[[1]]$data$total) for multinomial avec binomial nichées de dimension différentes
# FR->FR misses the computation of random effects for new spatial positions: cf comments in the code below
simulate.HLfit <- function(object, nsim = 1, seed = NULL, newdata=NULL,
                           type = "marginal", re.form, conditional=NULL, 
                           verbose=c(type=TRUE, showpbar= eval(spaMM.getOption("barstyle"))),
                           sizes=NULL , resp_testfn=NULL, phi_type="predict", prior.weights=object$prior.weights, 
                           variances=list(), ...) { ## object must have class HLfit; corr pars are not used, but the ZAL matrix is.
  if (inherits(newdata,"tibble")) newdata <- as.data.frame(newdata) 
  ## RNG stuff copied from simulate.lm
  control <- list(simulate=TRUE,
                  keep_ranef_covs_for_simulate=FALSE) # modified in one case below
  was_invColdoldList_NULL <- is.null(object$envir$invColdoldList) # to be able to restore initial state 
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
    runif(1)
  if (is.null(seed))
    RNGstate <- get(".Random.seed", envir = .GlobalEnv)
  else { ## this makes changes to RNG local where 'seed' is used:
    R.seed <- get(".Random.seed", envir = .GlobalEnv)
    set.seed(seed)
    RNGstate <- structure(seed, kind = as.list(RNGkind()))
    on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
  }
  if (inherits(object,"HLfitlist")) { 
    message("simulate does not yet work on list of fits as returned by multinomial fit:")
    message(" run simulate on each of the individual fits in the list")
    stop() ## FR->FR also some basic changes in fixedLRT but more would be needed 
  }  
  if ( ! is.null(conditional)) {
    warning("argument 'conditional' is obsolete and will be deprecated. Use 'type' instead.")
    if (conditional) {type <- "residual"} else type <- "marginal"
  }
  if (is.na(verbose["showpbar"])) { # e.g. verbose =TRUE or verbose=c(type=TRUE)
    if (is.na(verbose["type"])) verbose["type"] <- verbose # need at least a boolean argument here
    verbose["showpbar"] <- eval(.spaMM.data$options$barstyle)
  } else if (is.na(verbose["type"])) verbose["type"] <- TRUE # user set verbose=c(showpbar=.) but not type
  if (type=="predVar") {
    pred_type <- "predVar_s.lato" 
    if ( ! length(variances)) stop("A 'variances' argument must be specified (e.g., variances=list(predVar=TRUE))")
    variances$cov <- TRUE # mandatory, overriding any user's variances$cov argument
  } else if (type=="(ranef|response)") {
    pred_type <- "predVar_s.lato" 
    variances <- list(linPred=TRUE, disp=FALSE, cancel_X.pv=TRUE, cov=TRUE) # mandatory, overriding any user's variance argument
  } else pred_type <- ""
  if (is.null(sizes)) sizes <- .get_BinomialDen(object)
  nrand <- length(object$ZAlist)
  # delayedAssign("cum_nobs", {
  #   if ( is.null(newdata)) {
  #     cum_nobs <- attr(object$families,"cum_nobs")
  #   } else cum_nobs <- c(0L, cumsum(rep(nrow(newdata), length(object$families))))
  # }) # may not cover all cases, otherwise see .calc_new_X_ZAC_mv() which builds a list of newdataS and matching cum_nobs
  if (nrand>0L) {
    if ( missing(re.form)) {
      if (type=="marginal") {
        re.form <- NA # Does not mean that ranefs are entirely ignored as uuCnewnew is computed when control$keep_ranef_covs_for_simulate is TRUE.
      } else if (type=="residual") re.form <- NULL
      # type "predVar" leaves 're.form' missing. as it is not used (which should be equivalent to re.form=NULL)
    } 
    if ( ! missing(re.form)) {
      if (inherits(re.form,"formula")) {
        if (pred_type=="predVar_s.lato") warning("Non-default 're.form' is *currently* ignored when type='",type,"'.")
        re.form <- .preprocess_formula(re.form)
        ori_exp_ranef_strings <- attr(object$ZAlist,"exp_ranef_strings")
        new_exp_ranef_strings <- .process_bars(re.form,expand=TRUE)
        conditioned_upon <- unlist(sapply(lapply(new_exp_ranef_strings, `==`, y= ori_exp_ranef_strings), which)) ## unlist() bc empty list() can otherwise occur  
      } else if (anyNA(re.form)) { # anyNA rather than is.na, to handle NULL
        if (pred_type=="predVar_s.lato") warning("Non-default 're.form' is *currently* ignored when type='",type,"'.")
        if (is.na(re.form)) {
          conditioned_upon <- numeric(0)
        } else conditioned_upon <- seq_len(nrand)
      }
    }
  }
  resu <- NULL
  done <- 0L
  verbtype <- verbose[["type"]]
  is_mu_fix_btwn_sims <- FALSE
  while((needed <- nsim-done)) { ## loop operates only for resp_testfn
    if (nrand==0L) { ## note that replicate mu's can still be variable for non-standard pred_type
      if (pred_type=="predVar_s.lato") { ## re.form ignored so de facto NULL
        if (type=="(ranef|response)") {
          stop("meaningless argument type='(ranef|response)' for a fixed-effect model")
        } else if (verbtype) cat("Simulation from linear predictor variance | observed response:\n") 
        variances$cov <- (NROW(newdata)!=1L)
        control$fix_predVar <- NA
        point_pred_eta <- predict(object,newdata=newdata, type="link", control=control,
                                  variances=variances, verbose=verbose, ...) 
        predVar <- attr(point_pred_eta,"predVar")
        if (is.null(predVar)) stop("A 'variances' argument should be provided so that prediction variances are computed.") 
        rand_eta <- mvrnorm(n=needed,mu=point_pred_eta[,1L], predVar)
        if (needed>1L) rand_eta <- t(rand_eta) ## else mvrnorn value is a vector
        cum_nobs <- attr(point_pred_eta,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
        mu <- .fv_linkinv(eta=rand_eta, family=object$family, families=object$families, cum_nobs=cum_nobs) 
      } else {
        control$fix_predVar <- FALSE
        mu <- predict(object,newdata=newdata,binding=NA,control=control, verbose=verbose)
        is_mu_fix_btwn_sims <- TRUE
        mu <- replicate(needed,mu,simplify=FALSE) # always a list at this stage
        cum_nobs <- attr(mu,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
      }
    } else { ## MIXED MODEL
      if (pred_type=="predVar_s.lato") { ## re.form ignored so de facto NULL
        if (verbtype) {
          if (type=="(ranef|response)") {
            cat("Simulation from random-effects variance | observed response:\n")
          } else cat("Simulation from linear predictor variance | observed response:\n") 
        } 
        if (all(attr(object$rand.families,"lcrandfamfam")=="gaussian")){
          variances$cov <- (NROW(newdata)!=1L) # simulate() always need a covmatrix but for a signle response it is trivial
          # detect all cases where the cov mat is large:
          if (is.null(variances$as_tcrossfac_list)) variances$as_tcrossfac_list <- ( (is.null(newdata) && length(object$y)>200L) ||
                                                                                      NROW(newdata)>200L)
          control$fix_predVar <- NA
          point_pred_eta <- predict(object,newdata=newdata, type="link", control=control,
                                    variances=variances, verbose=verbose, ...) 
          predVar <- attr(point_pred_eta,"predVar")
          if (is.null(predVar)) stop("A 'variances' argument should be provided so that prediction variances are computed.") 
          if (is.list(predVar)) {
            rand_eta <- vector('list',length(predVar))
            for (it in seq_len(length(predVar))) {
              if (it==1L) {mu <- point_pred_eta[,1L]} else {mu <- rep(0,length(point_pred_eta[,1L]))}
              if ( ! is.null(predVar[[it]])) rand_eta[[it]] <- .mvrnorm(n=needed,mu=mu, tcross_Sigma = predVar[[it]])
              # else predVar[[it]] remains NULL [cf IMRF terms for newdata] and lengths() is used to remove them:
            }
            rand_eta <- Reduce("+",rand_eta[lengths(rand_eta)>0L])
          } else rand_eta <- mvrnorm(n=needed,mu=point_pred_eta[,1L], predVar) # n=needed means we will get nsim distinct eta vectors
          if (needed>1L) rand_eta <- t(rand_eta) ## else mvrnorn value is a vector
          cum_nobs <- attr(point_pred_eta,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
          mu <- .fv_linkinv(eta=rand_eta, family=object$family, families=object$families, 
                            cum_nobs=cum_nobs) ## ! freqs for binomial, counts for poisson: suitable for final code
        } else stop("This conditional simulation is not implemented for non-gaussian random-effects")
      } else if ( is.null(re.form)) { ## conditional on (all) predicted ranefs, type = "residual"
      # } else if (type=="residual") { ## conditional on (all) predicted ranefs,
        if (verbtype) cat("simulation of residuals, conditional on point predictions (hence on random effects):\n") 
        control$fix_predVar <- FALSE
        mu <- predict(object,newdata=newdata,binding=NA,control=control, verbose=verbose, ...)
        cum_nobs <- attr(mu,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE; will be needed for .calc_phiW()
        is_mu_fix_btwn_sims <- TRUE
        mu <- replicate(needed,mu,simplify=FALSE) #matrix(rep(mu,nsim),ncol=nsim)
      } else if ( inherits(re.form,"formula") || is.na(re.form) ) { ## explicit re.form; or {unconditional MIXED MODEL, type= "marginal" }
      # } else if (type=="marginal") { ## explicit re.form; or unconditional MIXED MODEL, type= "marginal"
        if (verbtype) {
          if (inherits(re.form,"formula")) {
            cat("Simulation conditional on random effect(s) retained in 're.form':\n")
          } else cat("Unconditional simulation:\n") 
        }
        # mu_fixed <- predict(object, newdata=newdata, re.form=re.form,binding=NA,control=list(fix_predVar=FALSE), verbose=verbose) ## mu_T
        # eta_fixed <- .eta_linkfun(mu_fixed, object$family, object$families)
        control$fix_predVar <- FALSE
        # we will need a ZAL and below we need the newdata to construct it if they are not NULL:
        control$keep_ranef_covs_for_simulate <- ( ! is.null(newdata))
        # Includes the ranefs conditioned upon:
        eta_fixed_cond <- predict(object, newdata=newdata, type="link", variances=variances,
                             re.form=re.form, #  
                             binding=NA,control=control, 
                             verbose=verbose, ...)
        # ... for that purpose, re.form is used to identify those ranefs and to provide the design matrices for them
        # These designn matrices will a priori not be further used (or only in a trivial way,  times zero-valued ranefs),
        # but other elements of the new_X_ZACblob attribute will be used.
        nr <- nrow(newdata)
        if (is.null(newdata)) { ## we simulate with all ranefs (treated conditionally|ranef or marginally) hence no selection of matrix
          ZAL <- get_ZALMatrix(object, force_bind = ! (.is_spprec_fit(object)) )
          cum_n_u_h <- attr(object$lambda,"cum_n_u_h")
          vec_n_u_h <- diff(cum_n_u_h)
        } else {
          new_X_ZACblob <- attr(eta_fixed_cond,"new_X_ZACblob")
          # new_X_ZACblob provided design matrices for ranefs conditioned upon (as controlled by re.form)
          # .calc_ZAlist_newdata() adds design matrices for ranefs treated marginally ( <=> ranefs NOT set to zero, but drawn marginally )
          newZAlist <-  .calc_ZAlist_newdata(object, newdata, new_X_ZACblob=new_X_ZACblob) # new_X_ZACblob$newZAlist not clearly used
          # if ( ! is.null(cum_nobs <- new_X_ZACblob$cum_nobs)) { # mv_fit...
          #   ZALlist <- new_X_ZACblob$newZACpplist
          # } else 
            ZALlist <- .compute_ZAXlist(object$strucList,newZAlist) # not only products:
          ZAL <- .ad_hoc_cbind(ZALlist, as_matrix=FALSE ) 
          vec_n_u_h <- unlist(lapply(ZALlist,ncol)) ## nb cols each design matrix = nb realizations each ranef
          cum_n_u_h <- cumsum(c(0,vec_n_u_h))
        }
        cum_nobs <- attr(eta_fixed_cond,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
        lcrandfamfam <- attr(object$rand.families,"lcrandfamfam") ## unlist(lapply(object$rand.families, function(rf) {tolower(rf$family)}))
        lcrandfamfam[conditioned_upon] <- "conditional" 
        fittedLambda <- object$lambda.object$lambda_est
        newV <- vector("list", length(vec_n_u_h))
        for (rd in seq_along(newV)) {
          newV[[rd]] <- .simulate_ranef(object, rd=rd, vec_n_u_h=vec_n_u_h, cum_n_u_h=cum_n_u_h, 
                                        newdata=newdata, fittedLambda=fittedLambda, 
                                       nsim=needed, lcrandfamfam=lcrandfamfam)
        } ## one multi-rand.family simulation. NOTE special handling of ranefs "conditioned upon" by the switch() in  .simulate_ranef
        newV <- do.call(rbind,newV) ## each column a simulation
        eta <-  matrix(rep(eta_fixed_cond,needed),ncol=needed) + as.matrix(ZAL %id*% newV) ## nobs rows, nsim col
        mu <- vector("list",needed)
        for (it in seq_len(needed)) mu[[it]] <- .fv_linkinv(eta[,it], object$family, object$families, cum_nobs = cum_nobs)
      } else stop("Unknown simulate 'type' value.")
    }
    ## ! mu := freqs for binomial, counts for poisson ## vector or matrix
    # phiW is always a matrix but mu cannot bc its elements may have attributes =>
    if ( ! inherits(mu,"list")) mu <- data.frame(mu) ## makes it always a list
    if (length(mu) != needed) stop("Programming error in simulate.HLfit() (ncol(mu) != needed).")
    #
    
    phiW <- .get_phiW(object=object, newdata=newdata, 
                      dims=c(length(mu[[1]]), length(mu)), # (nrow= response length, ncol= # of replicates)
                      phi_type=phi_type, nsim=needed, 
                      prior.weights=prior.weights) # phiW is always a matrix
    # """as""" phiW but will use an mv-list
    family_par <- .get_family_parlist(object, newdata=newdata)
    # up to version 3.5.49, there was ad hoc code calling COMPoisson()$simulate(object, nsim=nsim) when 
    #    the mu and phi vectors are constant across the nsim simulations, 
    #    to avoid computing the distribution (cumprodfacs in .COMP_simulate()) nsim times.
    # The updated .r_resid_var() -> appears to manage that too (without calling COMPoisson()$simulate()), 
    #  .r_resid_var_over_cols() too (a distinct issue is that multivariate mu may loose the (COMP)-lambda attribute: __FIXME__?)
    
    if (! is.null(newdata) && 
        ! is.null(sizes) && # may be NULL when not needed
        length(mu[[1]]) != length(sizes)
       ) sizes <- .guess_new_BinomialDen(object=object, cum_nobs=cum_nobs, sizes=sizes, mu=mu) 
    
    block <- .r_resid_var_over_cols(mu, 
                                    phiW,
                                    family_par=family_par,
                                    sizes=sizes, family=object$family, families=object$families, is_mu_fix_btwn_sims=is_mu_fix_btwn_sims,
                                    nsim=needed, cum_nobs = cum_nobs, ...)
    if (is.null(resp_testfn)) {
      if (nsim==1L) block <- drop(block)
      attr(block,"nobs") <- diff(cum_nobs)
      return(block) 
    } else {
      check_cond <- apply(block,2L, resp_testfn)
      if (is.null(resu)) {resu <- block[,check_cond,drop=FALSE]} else resu <- cbind(resu,block[,check_cond,drop=FALSE])
      done <- done+length(which(check_cond))
    }
  }
  ## we reach this point only if there is a resp_testfn, and then 'resu'
  if (nsim==1L) resu <- drop(resu)
  if (was_invColdoldList_NULL) object$envir$invColdoldList <- NULL
  return(resu)    
}


simulate.HLfitlist <- function(object,nsim=1,seed=NULL,newdata=object[[1]]$data,sizes=NULL,...) {
  ## RNG stuff copied from simulate.lm
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
    runif(1)
  if (is.null(seed))
    RNGstate <- get(".Random.seed", envir = .GlobalEnv)
  else { ## this makes changes to RNG local where 'seed' is used:
    R.seed <- get(".Random.seed", envir = .GlobalEnv)
    set.seed(seed)
    RNGstate <- structure(seed, kind = as.list(RNGkind()))
    on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
  }
  replicate(nsim, {
    allrownames <- unique(unlist(lapply(object, function(hl){rownames(hl$data)})))
    resu <- matrix(0,nrow=length(allrownames),ncol=length(object)) ## two cols if 3 types
    cumul <- 0
    if (length(sizes) != nrow(newdata)) stop("length(sizes) != nrow(newdata).")
    for (it in seq(ncol(resu))) {
      ## it = 1 se ramène à simulate(object[[1]])
      if (is.null(sizes)) sizes <- .get_BinomialDen(object[[it]])
      resu[,it] <- simulate(object[[it]],newdata=newdata,sizes=sizes - cumul,verbose=FALSE) ## FIXME use of resp_testfn would require it to be a list
      cumul <- rowSums(resu)  
    }
    resu <- cbind(resu,sizes - cumul) ## now 3 cols if 3 types
    rownames(resu) <- allrownames
    colnames(resu) <- attr(object,"sortedTypes")
    as.data.frame(resu)
  },simplify=FALSE)
}

Try the spaMM package in your browser

Any scripts or data that you put into this service are public.

spaMM documentation built on Aug. 30, 2023, 1:07 a.m.