R/simulate.HL.R

Defines functions simulate.HLfitlist simulate.HLfit .wrap_compute_ZALlist4simulate .check_simulate_pw .warn_pw_mismatch .guess_new_BinomialDen .warn_size_mismatch simulate_ranef .simulate_ranef .calc_ZAlist_newdata .calc_ZAlist_newdata_mv .r_resid_var_over_cols .get_family_par .get_family_parlist .r_resid_var .newetaFix .noRanef

Documented in simulate.HLfit simulate.HLfitlist simulate_ranef

## 'has-no-ranef' can be tested by is.null(.parseBars(re.form))
# devel version of lme4 has noReForm() that assumes that there in no fixed effect in re.form, with result
# TRUE if argument is NA or ~0, 
# FALSE if argument is NULL or a non-trivial formula (re.form=NULL in particular means that prediction assumes the original random-effect terms).
# We additionally want TRUE if argument is ~<only fixef> (fixed effect will be ignored anyway)
# The following is equivalent to noReForm() except for the last test, is.null(.parseBars(re.form)) 
# ~0 returns TRUE, ~Days returns TRUE [as the last test is TRUE in both cases], ~<including ranefs> returns FALSE 
.noRanef <- function(re.form) { # Stupidly returns FALSE for explicit formula with LHS. So re.form better not have a LHS.
  (!is.null(re.form) && !inherits(re.form,"formula") && is.na(re.form)) ||
    (inherits(re.form,"formula") && length(re.form)==2 && is.null(.parseBars(re.form)))
}

.newetaFix <- function(object, newMeanFrames,validnames=NULL,
                       X=newMeanFrames$X,
                       mf=newMeanFrames$mf) { # newMeanFrames must have cols for the fixed beta's, absent from the object's $X.pv 
  ## newdata -> offset must be recomputed. 
  ## dans l'état actuel $fixef et complet, incluant les etaFix$beta: pas besoin de les separer
  ## mais il peut contenir des NA ! à enlever
  # le newMeanFrames$X contient a priori les cols des etaFix$beta, contrairement à object$X.pv => don't use the latter cols 
  if (is.null(validnames)) {
    est_and_fix <- names(which(!is.na(object$fixef))) ## estimated + etaFix$beta
    validnames <- intersect(colnames(X) ,est_and_fix) # would be colnames(newMeanFrames$X) when there is no NA if object$fixef. would validnames=est_and_fix suffice ?
  }
  if (length(validnames)) {
    etaFix <-  drop(X[,validnames,drop=FALSE] %*% object$fixef[validnames]) ## valid even if ncol(newMeanFrames$X) = 0
  } else etaFix <- rep(0, nrow(X))
  off <- model.offset( mf) ### look for offset from (ori)Formula 
  if ( ! is.null(off)) etaFix <- etaFix + off   
  return(etaFix)
}

.r_resid_var <- function(mu,phiW,sizes,family,
                         # COMP_nu,NB_shape,
                         family_par=.get_family_par(family=family),
                         zero_truncated=identical(family$zero_truncated,TRUE), 
                         famfam, nsim=1L) { 
  # we cannot use family()$simulate bc it assumes a fit object as input
  if (length(mu)) {
    resu <- switch(famfam,
                   "gaussian" = rnorm(nsim*length(mu),mean=mu,sd=sqrt(phiW)),
                   "poisson" = .rpois(nsim*length(mu),mu,zero_truncated=zero_truncated), 
                   "binomial" = rbinom(nsim*length(mu),size=sizes,prob=mu),
                   "Gamma" = {
                     y <- rgamma(nsim*length(mu), shape= 1 / phiW, scale=mu*phiW) # mean=sh*sc=mu, var=sh*sc^2 = mu^2 phiW
                     Gamma_min_y <- .spaMM.data$options$Gamma_min_y
                     is_low_y <- (y < Gamma_min_y)
                     if (any(is_low_y)) y[which(is_low_y)] <- Gamma_min_y 
                     y
                   }, ## ie shape increase with prior weights, consistent with Gamma()$simulate / spaMM_Gamma()$simulate
                   "COMPoisson" = {
                     lambdas <- attr(mu,"lambda") # F I X M E an environment would keep values ?
                     if (is.null(lambdas)) {
                       sapply(mu, function(muv) {
                         lambda <- family$mu2lambda(muv)
                         .COMP_simulate(lambda=lambda,nu=family_par, #= COMP_nu 
                                        nsim=nsim)
                       })
                     } else sapply(lambdas,.COMP_simulate,nu=family_par, #= COMP_nu 
                                   nsim=nsim)
                   },
                   "negbin" = .rnbinom(nsim*length(mu), size=family_par, #= NB_shape
                                       mu_str=mu, zero_truncated=zero_truncated),
                   "negbin1" = .rnbinom(nsim*length(mu), size=mu*family_par, #= NB_shape
                                        mu_str=mu, zero_truncated=zero_truncated),
                   "negbin2" = .rnbinom(nsim*length(mu), size=family_par, #= NB_shape
                                        mu_str=mu, zero_truncated=zero_truncated),
                   "beta_resp" = {
                     # spaMM's phi is here 1, so phiW must be 1/prior.weights and so for the precision parameter:
                     Wfamily_par <- family_par/phiW
                     y <- rbeta(n=nsim * length(mu), 
                                shape1=mu*Wfamily_par,shape2=(1-mu)*Wfamily_par)
                     beta_min_y <- .spaMM.data$options$beta_min_y
                     is_low_y <- (y < beta_min_y)
                     if (any(is_low_y)) y[which(is_low_y)] <- beta_min_y 
                     beta_max_y <- 1- beta_min_y
                     is_high_y <- (y > beta_max_y)
                     if (any(is_high_y)) y[which(is_high_y)] <- beta_max_y 
                     y
                   }, # fam_par being the precision parameter in the Cribari-Neto parametrisation 
                   "betabin" = {
                     # phiW: same logic as for beta_resp
                     ntot <- nsim*length(mu) 
                     Wfamily_par <- family_par/phiW
                     rmu <- rbeta(n=ntot, shape1=mu*Wfamily_par,shape2=(1-mu)*Wfamily_par)
                     rbinom(ntot, size=sizes, prob=rmu)
                   },
                   stop("(!) random sampling from given family not yet implemented")
    )
  } else resu <- mu # i.e. numeric(0); such mu is possible in mv-fit if predictor variables are missing for one submodel.
  if (nsim>1L) dim(resu) <- c(length(mu),nsim)
  resu
} ## vector-valued function from vector input

.get_family_parlist <- function(object, families=object$families, family=object$family, newdata) {
  if ( ! is.null(families)) {
    family_parlist <- vector("list", length(families))
    for (mv_it in seq_along(families)) {
      family_parlist[mv_it] <- list(.get_family_par(family=families[[mv_it]], newdata=newdata)) 
    }
    return(family_parlist)
  } else .get_family_par(family=family, famfam=family$family, newdata=newdata)
}

.get_family_par <- function(family, famfam=family$family, newdata=NULL, mv_it=NULL) {
  if ( ! is.null(newdata) &&  ! is.null(resid.formula <- (disp_env <- family$resid.model)$resid.formula)) {
    residFrames <- .get_terms_info(formula=resid.formula, data=newdata, famfam="")
    # handles both "rdiOff" and "rdiForm" cases:
    if ( is.null(off <- model.offset(residFrames$mf)) ) {family_par <- 0} else family_par <- off
    if ( ! is.null(disp_env$beta)) family_par <- family_par + residFrames$X %*% disp_env$beta
  } else if (famfam =="COMPoisson") {   ##  all the next cases are OK whether there is no new data or whether there is no resid.formula
    family_par <- environment(family$aic)$nu
  } else if (famfam %in% c("beta_resp","betabin")) {
    family_par <- environment(family$aic)$prec
  } else if (famfam  %in% c("negbin","negbin1","negbin2")) {
    family_par <- environment(family$aic)$shape
  } else family_par <- NULL
  family_par
}


.r_resid_var_over_cols <- function(mu,  # a list over nsim !
                                   phiW, 
                                   family_par,
                                   sizes, family, families, resp_range=NULL, is_mu_fix_btwn_sims,
                                   is_phiW_fix_btwn_sims=attr(phiW,"is_phiW_fix_btwn_sims"),
                                   nsim=1L, as_matrix=FALSE, mv_it=NULL,
                                   zero_truncated=identical(family$zero_truncated,TRUE),
                                   cum_nobs=attr(families,"cum_nobs")) {
  if ( ! is.null(families)) {
    rowS <- vector("list", length(families))
    for (mv_it in seq_along(families)) {
      resp_range <- .subrange(cumul=cum_nobs, it=mv_it)
      family <- families[[mv_it]] # copy needed for zero_truncated to get the correct family...
      rowS[[mv_it]] <- .r_resid_var_over_cols(mu, 
                                              phiW=phiW[resp_range,,drop=FALSE], 
                                              family_par=family_par[[mv_it]],
                                              sizes=sizes[resp_range], 
                                              family=family, families=NULL, resp_range=resp_range,
                                              is_mu_fix_btwn_sims=is_mu_fix_btwn_sims,
                                              is_phiW_fix_btwn_sims=is_phiW_fix_btwn_sims[mv_it], nsim=nsim, as_matrix=TRUE, mv_it=mv_it,
                                              zero_truncated=identical(family$zero_truncated,TRUE))
    }
    return(do.call(rbind, rowS))
  }
  block <- NA*phiW
  famfam <- family$family
  if (is_mu_fix_btwn_sims && is_phiW_fix_btwn_sims) { # 2nd condition should be trivially true for count models without prior.weights,
                                                      # even those with a resid.model as it has fixed effects only.  
                                                      # and "presumably" also even for count models with prior weights (in beta_resp, at least):
                                                      # only variable prior weights btwn_sims (how?) should make it FALSE but the code ignores that possibility.
    if (is.null(resp_range)) { # univariate
      mu_all <- mu[[1L]] 
    } else {
      mu_all <- attr(mu[[1L]],"mv")[[mv_it]]
      if (is.null(mu_all)) {
        # this should no longer occur. See comments above the .r_resid_var_over_cols() call.
        warning("Suspect structure of .r_resid_var_over_cols()'s mu argument. Zero-truncation info may be lost.")
        mu_all <- structure(mu[[1L]][resp_range], p0=attr(mu[[1L]],"p0")[[mv_it]], mu_U=attr(mu[[1L]],"mu_U")[[mv_it]])
      }
    }
    block <- .r_resid_var(mu_all, phiW=phiW[,1L],sizes=sizes, 
                          zero_truncated=zero_truncated, 
                          # cases where family_par is not needed but the is a promise available... => it may be possible to merge the codes?
                          famfam=famfam, family=family, nsim=nsim) # vector or nsim-col matrix
    if (as_matrix && is.null(dim(block))) dim(block) <- c(length(block),1L) # forces matrix for mv code using rbind()
  } else {
    for (sim_it in seq_len(nsim)) {
      if (is.null(resp_range)) {
        mu_it <- mu[[sim_it]]
      } else {
        mu_it <- attr(mu[[sim_it]],"mv")[[mv_it]]
        if (is.null(mu_it)) mu_it <- structure(mu[[sim_it]][resp_range], 
                                               p0=attr(mu[[sim_it]],"p0")[[mv_it]], 
                                               mu_U=attr(mu[[sim_it]],"mu_U")[[mv_it]])
      }
      block[ ,sim_it] <- .r_resid_var(mu_it, 
                                      phiW=phiW[ ,sim_it], # rlevant rowas have been selected in the mv case 
                                      sizes=sizes, 
                                      family_par= family_par,  
                                      zero_truncated=zero_truncated, famfam=famfam, family=family, nsim=1L)
    }
  }
  return(block)
}

.calc_ZAlist_newdata_mv <- function(object, newdata, new_X_ZACblob=NULL) {
  map_rd_mv <- attr(object$ZAlist, "map_rd_mv")
  ori_exp_ranef_terms <- attr(object$ZAlist, "exp_ranef_terms")
  ori_exp_ranef_strings <- attr(object$ZAlist,"exp_ranef_strings")
  locdataS <- new_X_ZACblob$locdata # needed because it result from check of all variables needed for prediction (such as residVar predictors)
  # This currently do not bear the variable of the ranefs that were not "conditioned upon", but these variables are needed here
  # since this function construct all design matrices (out of which those for "conditioned upon" ranefs will be ...%*% [ranV=0])
  for (mv_it in seq_along(map_rd_mv)) {
    rd_in_mv <- map_rd_mv[[mv_it]]
    exp_ranef_strings_it <- ori_exp_ranef_strings[rd_in_mv]
    if (length(exp_ranef_strings_it)) {
      old_ranef_form <- as.formula(paste("~",(paste(exp_ranef_strings_it,collapse="+")))) 
      #exp_ranef_terms_it <- structure(ori_exp_ranef_terms[rd_in_mv], type=attr(ori_exp_ranef_terms,"type")[rd_in_mv])
      if ( is.null(newdata_it <- locdataS[[mv_it]])) newdata_it <- newdata
      Zlist <- .calc_Zlist(exp_ranef_terms=ori_exp_ranef_terms, data=newdata_it, 
                           rmInt=0L, drop=TRUE,sparse_precision=FALSE,
                           corr_info=.get_from_ranef_info(object), 
                           rd_in_mv=rd_in_mv,
                           sub_oldZAlist=object$ZAlist, # OK if we use only colnames, not attributes of the list...
                           lcrandfamfam=attr(object$rand.families,"lcrandfamfam"))
      amatrices <- .get_new_AMatrices(object,newdata=newdata_it) # .calc_newFrames_ranef(formula=old_ranef_form,data=newdata, fitobject=object)$mf)[rd_in_mv]
      ZAlist_it <- .calc_normalized_ZAlist(Zlist=Zlist,
                                           AMatrices=amatrices,
                                           vec_normIMRF=object$ranef_info$vec_normIMRF,
                                           strucList=object$strucList[rd_in_mv])
      # In the *fit* preprocessing, .merge_ZAlists is called on ZA lists for each submodel, named in ref to the submodels only;
      # .merge_ZAlists uses "exp_ranef_strings" or similar info to match the lists, not list names. 
      names(ZAlist_it) <- rd_in_mv
    } else ZAlist_it <- list()
    attr(ZAlist_it,"exp_ranef_strings") <- exp_ranef_strings_it
    if (mv_it>1L) {
      newZAlist <- .merge_ZAlists(newZAlist, ZAlist_it, 
                                  nobs1=nrow(newdata_it), # presumably does not matter
                                  nobs2=nrow(newdata_it), mv_it)
    } else newZAlist <- ZAlist_it
  }  
  newZAlist
}

#  Build full Zlist for simulation; ultimately only elements for "marginalized upon" ranefs will be used
#   (Z's for "conditioned upon" ranefs were provided by .calcènewèX_ZAC[_mv] and used in distinct .point_predict step)
.calc_ZAlist_newdata <- function(object, newdata, new_X_ZACblob) {
  # we simulate with all ranefs (treated conditionally|ranef or marginally) hence 
  # * we need design matrices for all ranefs
  # * we need values of all the original variables
  # hence we use an "## effective '.noFixef'" : formula with only ranefs of the fit. But 1st version fails for mv; second may be more straightforward anyway
  #old_ranef_form <- formula.HLfit(object, which="")[-2L] 
  #old_ranef_form <- as.formula(paste("~",(paste(.process_bars(old_ranef_form),collapse="+")))) ## 
  #frame_ranefs <- .calc_newFrames_ranef(formula=old_ranef_form,data=newdata, fitobject=object)$mf 
  #exp_ranef_terms <- .process_bars(old_ranef_form[[2L]], # barlist must remain missing here
  #                                 expand=TRUE, which. = "exp_ranef_terms")
  if (is.null(vec_nobs <- object$vec_nobs)) { #  *univariate*-resp model 
    old_ranef_form <- as.formula(paste("~",(paste(attr(object$ZAlist,"exp_ranef_strings"),collapse="+")))) 
    exp_ranef_terms <- attr(object$ZAlist, "exp_ranef_terms")
    Zlist <- .calc_Zlist(exp_ranef_terms=exp_ranef_terms, data=newdata, rmInt=0L, drop=TRUE,sparse_precision=FALSE,
                         corr_info=.get_from_ranef_info(object),
                         sub_oldZAlist=object$ZAlist,
                         # Note no levels_type="seq_len" here: important to get correct matrix for Matern...
                         lcrandfamfam=attr(object$rand.families,"lcrandfamfam"))
    amatrices <- .get_new_AMatrices(object,newdata=newdata) # .calc_newFrames_ranef(formula=old_ranef_form,data=newdata, fitobject=object)$mf)
    newZAlist <- .calc_normalized_ZAlist(Zlist=Zlist,
                                         AMatrices=amatrices,
                                         vec_normIMRF=object$ranef_info$vec_normIMRF, 
                                         strucList=object$strucList)
  } else {
    newZAlist <- .calc_ZAlist_newdata_mv(object, newdata, new_X_ZACblob = new_X_ZACblob)
 
  }
  return(newZAlist)
}

.simulate_ranef <- function(object, rd, newdata, 
                           cum_n_u_h=attr(object$lambda,"cum_n_u_h"), 
                           vec_n_u_h=diff(cum_n_u_h), 
                           fittedLambda=object$lambda.object$lambda_est, 
                           nsim, 
                           lcrandfamfam=attr(object$rand.families,"lcrandfamfam")) {
  nr <- vec_n_u_h[rd]
  u.range <- (cum_n_u_h[rd]+1L):(cum_n_u_h[rd+1L])
  if ( ! is.null(object$rand.families[[rd]]$prior_lam_fac) && ! is.null(newdata)) { # prior_lam_fac is the 'design' for non-ranCoef (wei-1|.)
    leftOfBar_terms <- attr(object$ZAlist,"exp_ranef_terms")[[rd]][[2L]]
    leftOfBar_mf <- model.frame(as.formula(paste("~",leftOfBar_terms)), newdata, xlev = NULL) 
    prior_lam_fac <- leftOfBar_mf[,1L]^2 ## assumes simple syntax (wei-1|.)
    loclambda <- object$lambda.object$lambda_list[[rd]]* prior_lam_fac
  } else loclambda <- fittedLambda[u.range] ## includes prior_lam_fac
  newU <- replicate(nsim, {
    switch(lcrandfamfam[rd], ## remainder of code should be OK for rand.families
           gaussian = rnorm(nr,sd=sqrt(loclambda)),
           gamma = rgamma(nr,shape=1/loclambda,scale=loclambda),
           beta = rbeta(nr,1/(2*loclambda),1/(2*loclambda)),
           "inverse.gamma" = 1/rgamma(nr,shape=1+1/loclambda,scale=loclambda), ## yields inverse gamma (1+1/object$lambda,1/object$lambda)
           conditional= rep(0,length(loclambda)), ## conditional random effects already in predictor
           stop("(!) random sample from given rand.family not yet implemented")
    )},simplify=TRUE) ## should have nsim columns
  object$rand.families[[rd]]$linkfun(newU) 
}

simulate_ranef <- function(object, which=NULL, newdata=NULL, nsim=1L) {
  cum_n_u_h <- attr(object$lambda,"cum_n_u_h")
  vec_n_u_h <- diff(cum_n_u_h)
  if (is.null(which)) which <- seq_along(vec_n_u_h)
  nwhich <- length(which)
  newb <- vector("list", nwhich)
  for (rd in seq_along(which)) {
    newb[[rd]] <- .simulate_ranef(object=object, rd=which[rd], newdata=newdata, 
                                  cum_n_u_h=cum_n_u_h, 
                                  vec_n_u_h=vec_n_u_h, 
                                  fittedLambda=object$lambda.object$lambda_est, 
                                  nsim=nsim, 
                                  lcrandfamfam=attr(object$rand.families,"lcrandfamfam"))
  }
  if (nsim>1L) {
    newb <- do.call(rbind, newb)
  } else newb <- unlist(newb)
  newb
}

.warn_size_mismatch <- function(expected, isNullnewData, orisizes, vec_nobs) {
  # Providing a right-sized value facilitates automated tests => warning rather than stop
  # But __F I X M E___ make this despendent on _LOCAL_TESTS_ ?
  if (length(vec_nobs)>1L) {
    warnmess <- paste0("Specifying 'sizes' of length ", expected, " (",
                       paste(vec_nobs,collapse="+")," for respective submodels) is necessary")
  } else warnmess <- paste0("Specifying 'sizes' of length ", expected, " is necessary")
  if ( ! isNullnewData) warnmess <- paste0(warnmess, " for these 'newdata'.\n")
  if (is.null(orisizes))  {
    warnmess <- paste0(warnmess, " NULL")
  } else warnmess <- paste0(warnmess, " Wrong-sized")
  warning(paste0(warnmess, " value is replaced by unit sizes"),
          immediate. = TRUE) 
}

# the previous version made more effort to re-use fit values using a vec_nobs[fam_it] %% ori_vec_nobs[fam_it] test
.guess_new_BinomialDen <- function(sizes, mu, cum_nobs, isNullnewData,
                                   famfams,
                                   size_control=famfams %in% c("binomial","betabin")) {
  if (length(famfams)>1L) { # mvfit
    expected <- cum_nobs[length(cum_nobs)]
    size_mismatch <- length(sizes) != expected
    if (size_mismatch) {
      if (any(size_control)) .warn_size_mismatch(expected=expected, isNullnewData, 
                                                 orisizes=sizes, vec_nobs=diff(cum_nobs))
      sizes <- rep(1L, expected) 
    } 
  } else {
    expected <- length(mu[[1]])
    size_mismatch <-  ! length(sizes) %in% c(expected,1L) # looser non-API control for univariate (unify?)
    if (size_mismatch) {
      if (any(size_control)) .warn_size_mismatch(expected=expected, isNullnewData, 
                                                 orisizes=sizes, vec_nobs=expected)
      sizes <- rep(1L, expected) 
    } 
  }
  sizes
}

.warn_pw_mismatch <- function(chr_expected, isNullnewData, default_pw) {
  # Providing a right-sized value facilitates automated tests => warning rather than stop
  # But __F I X M E___ make this despendent on _LOCAL_TESTS_ ?
  warnmess <- paste0("Specifying 'prior.weights' list (lengths=", chr_expected, " is necessary")
  if ( ! isNullnewData) warnmess <- paste0(warnmess, " for these 'newdata'.\n")
  warnmess <- paste0(warnmess, "Missing of invalid values are replaced by ")
  if (length(default_pw)>1L) {
    warnmess <- paste0(warnmess,"(",paste(default_pw,collapse=","), ") for respective submodels.")
  } else warnmess <- paste0(warnmess, default_pw,".")
  warning(warnmess, immediate. = TRUE) 
}

.check_simulate_pw <- function(prior.weights, mu, cum_nobs, famfams,
                               fit_pw, isNullnewData, 
                               pw_control= ! famfams %in% c("binomial", "poisson", "COMPoisson", 
                                                            "negbin1", "negbin2") # the latter ones have no pw but still a resid disp param
) {
  # Providing a right-sized value facilitates automated tests
  n_mv <- length(famfams)
  if (n_mv>1L) { # mvfit
    if ( ! is.list(prior.weights)) prior.weights <- vector("list", length(famfams))
    vec_nobs <- diff(cum_nobs)
    pw_mismatch <- sapply(prior.weights, length)!=vec_nobs
    if (any(pw_mismatch)) {
      ambiguous_pw <- pw_mismatch & pw_control
      if ( any(ambiguous_pw)) {
        are_units <- sapply(fit_pw, attr, which="is_unit")
        if (is.list(are_units)) are_units <- sapply(are_units, identical, y=TRUE)
        defaults <- rep(NA_real_, n_mv)
        defaults[are_units] <- 1
        if (anyNA(defaults)) {
          are_unique <- sapply(fit_pw, attr, which="is_unique")
          if (is.list(are_unique)) are_unique <- sapply(are_unique, identical, y=TRUE)
          for (mv_it in seq_along(n_mv)) {
            if (is.na(defaults[mv_it]) && are_unique[mv_it]) defaults[mv_it] <- fit_pw[[mv_it]][1]
          }
        }
        if (anyNA(defaults)) {
          defaults[is.na(defaults)] <- 1
          .warn_pw_mismatch(chr_expected=paste(vec_nobs, collapse=","), isNullnewData, 
                            default_pw=defaults)
        }
      } else defaults <- rep(1, n_mv)
      for (mv_it in which(pw_mismatch)) {
        prior.weights[[mv_it]] <- structure(rep(defaults[mv_it],vec_nobs[mv_it]), unique=TRUE)
      }
    }
  } else { # univariate
    if ( length(prior.weights) != length(mu[[1]])) {
      if (pw_control &&
          ! identical(attr(fit_pw,"is_unit"),TRUE) ) { # -> warn
        if (identical(attr(fit_pw,"unique"),TRUE)) {
          default_pw <- fit_pw[1]
        } else default_pw <- 1
        .warn_pw_mismatch(chr_expected=length(mu[[1]]), isNullnewData, default_pw=default_pw)
      } else default_pw <- 1 # no pw_control or pw is unit
      prior.weights <- structure(rep(default_pw,length(mu[[1]])), unique=TRUE)
    }
  }
  prior.weights
}

.wrap_compute_ZALlist4simulate <- function(new_X_ZACblob, newZAlist, strucList) {
  L_newLv_newLv_list <- new_X_ZACblob$L_newLv_newLv_list
  newinold <- new_X_ZACblob$newinold
  for (new_rd in seq_along(newinold)) {
    if (is.null(L_newLv_newLv_list[[new_rd]])) {
      if (! is.null(new_X_ZACblob$cov_newLv_newLv_list[[new_rd]])) {
        # One would wish to check whether
        # corr.model <- attr(object$strucList[[old_rd]],"corr.model")
        # is a corrFamily, but old_rf info is not available. Wait for pb to occur...
        warning("Contact the maintainer about presumably inefficient code for computation of L_newLv_newLv_list...")
        L_newLv_newLv_list[[new_rd]] <- mat_sqrt(new_X_ZACblob$cov_newLv_newLv_list[[new_rd]])
      } else { # e.g. for IMRF *no Cnn*, no Lnn
        old_rd <- newinold[new_rd]
        L_newLv_newLv_list[[new_rd]] <- strucList[[old_rd]]
      }
    }
  }
  .compute_ZAXlist(ZAlist=newZAlist, XMatrix = L_newLv_newLv_list) # may be "notBindable"
}

# simulate.HLfit(fullm[[2]],newdata=fullm[[1]]$data,size=fullm[[1]]$data$total) for multinomial avec binomial nichées de dimension différentes
# FR->FR misses the computation of random effects for new spatial positions: cf comments in the code below
simulate.HLfit <- function(object, nsim = 1, seed = NULL, newdata=NULL,
                           type = "marginal", re.form, conditional=NULL, 
                           verbose=c(type=TRUE, showpbar= eval(spaMM.getOption("barstyle"))),
                           sizes=if (is.null(newdata)) object$BinomialDen , resp_testfn=NULL, phi_type="predict", 
                           prior.weights= if (is.null(newdata)) object$prior.weights, 
                           variances=list(), ...) { ## object must have class HLfit; corr pars are not used, but the ZAL matrix is.
  
  if (inherits(newdata,"tibble")) newdata <- as.data.frame(newdata) 
  ## RNG stuff copied from simulate.lm
  control <- list(simulate=TRUE,
                  keep_ranef_covs_for_simulate=FALSE) # modified in one case below
  was_invColdoldList_NULL <- is.null(object$envir$invColdoldList) # to be able to restore initial state 
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
    runif(1)
  if (is.null(seed))
    RNGstate <- get(".Random.seed", envir = .GlobalEnv)
  else { ## this makes changes to RNG local where 'seed' is used:
    R.seed <- get(".Random.seed", envir = .GlobalEnv)
    set.seed(seed)
    RNGstate <- structure(seed, kind = as.list(RNGkind()))
    on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
  }
  if (inherits(object,"HLfitlist")) { 
    message("simulate does not yet work on list of fits as returned by multinomial fit:")
    message(" run simulate on each of the individual fits in the list")
    stop() ## FR->FR also some basic changes in fixedLRT but more would be needed 
  }  
  if ( ! is.null(conditional)) {
    warning("argument 'conditional' is obsolete and will be deprecated. Use 'type' instead.")
    if (conditional) {type <- "residual"} else type <- "marginal"
  }
  if (is.na(verbose["showpbar"])) { # e.g. verbose =TRUE or verbose=c(type=TRUE)
    if (is.na(verbose["type"])) verbose["type"] <- verbose # need at least a boolean argument here
    verbose["showpbar"] <- eval(.spaMM.data$options$barstyle)
  } else if (is.na(verbose["type"])) verbose["type"] <- TRUE # user set verbose=c(showpbar=.) but not type
  if (type=="predVar") {
    pred_type <- "predVar_s.lato" 
    if ( ! length(variances)) stop("A 'variances' argument must be specified (e.g., variances=list(predVar=TRUE))")
    variances$cov <- TRUE # mandatory, overriding any user's variances$cov argument
  } else if (type=="(ranef|response)") {
    pred_type <- "predVar_s.lato" 
    variances <- list(linPred=TRUE, disp=FALSE, cancel_X.pv=TRUE, cov=TRUE) # mandatory, overriding any user's variance argument
  } else pred_type <- ""
  if (isNullUserSizes <- is.null(sizes)) sizes <- .get_BinomialDen(object)
  nrand <- length(object$ZAlist)
  # delayedAssign("cum_nobs", {
  #   if ( is.null(newdata)) {
  #     cum_nobs <- attr(object$families,"cum_nobs")
  #   } else cum_nobs <- c(0L, cumsum(rep(nrow(newdata), length(object$families))))
  # }) # may not cover all cases, otherwise see .calc_new_X_ZAC_mv() which builds a list of newdataS and matching cum_nobs
  if (nrand>0L) {
    if ( missing(re.form)) {
      if (type=="marginal") {
        re.form <- NA # Does not mean that ranefs are entirely ignored as uuCnewnew is computed when control$keep_ranef_covs_for_simulate is TRUE.
      } else if (type=="residual") re.form <- NULL
      # type "predVar" leaves 're.form' missing. as it is not used (which should be equivalent to re.form=NULL)
    } 
    if ( ! missing(re.form)) {
      if (inherits(re.form,"formula")) {
        if (pred_type=="predVar_s.lato") warning("Non-default 're.form' is *currently* ignored when type='",type,"'.")
        re.form <- .preprocess_formula(re.form)
        ori_exp_ranef_strings <- attr(object$ZAlist,"exp_ranef_strings")
        new_exp_ranef_strings <- .process_bars(re.form,expand=TRUE)
        conditioned_upon <- unlist(sapply(lapply(new_exp_ranef_strings, `==`, y= ori_exp_ranef_strings), which)) ## unlist() bc empty list() can otherwise occur  
      } else if (anyNA(re.form)) { # anyNA rather than is.na, to handle NULL
        if (pred_type=="predVar_s.lato") warning("Non-default 're.form' is *currently* ignored when type='",type,"'.")
        if (is.na(re.form)) {
          conditioned_upon <- numeric(0)
        } else conditioned_upon <- seq_len(nrand)
      }
    }
  }
  resu <- NULL
  done <- 0L
  verbtype <- verbose[["type"]]
  is_mu_fix_btwn_sims <- FALSE
  
  if (length(object$families)) {
    famfams <- sapply(object$families, `[[`, x="family")
  } else famfams <- object$family$family
  
  while((needed <- nsim-done)) { ## loop operates only for resp_testfn
    if (nrand==0L) { ## note that replicate mu's can still be variable for non-standard pred_type
      if (pred_type=="predVar_s.lato") { ## re.form ignored so de facto NULL
        if (type=="(ranef|response)") {
          stop("meaningless argument type='(ranef|response)' for a fixed-effect model")
        } else if (verbtype) cat("Simulation from linear predictor variance | observed response:\n") 
        variances$cov <- (NROW(newdata)!=1L)
        control$fix_predVar <- NA
        point_pred_eta <- predict(object,newdata=newdata, type="link", control=control,
                                  variances=variances, verbose=verbose, ...) 
        predVar <- attr(point_pred_eta,"predVar")
        if (is.null(predVar)) stop("A 'variances' argument should be provided so that prediction variances are computed.") 
        rand_eta <- mvrnorm(n=needed,mu=point_pred_eta[,1L], predVar)
        if (needed>1L) rand_eta <- t(rand_eta) ## else mvrnorn value is a vector
        cum_nobs <- attr(point_pred_eta,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
        mu <- .fv_linkinv(eta=rand_eta, family=object$family, families=object$families, cum_nobs=cum_nobs) 
      } else { # standard simulation withOUT ranefs
        control$fix_predVar <- FALSE
        mu <- predict(object,newdata=newdata,binding=NA,control=control, verbose=verbose)
        is_mu_fix_btwn_sims <- TRUE
        mu <- replicate(needed,mu,simplify=FALSE) # always a list at this stage
        cum_nobs <- attr(mu[[1]],"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
      }
    } else { ## MIXED MODEL
      if (pred_type=="predVar_s.lato") { ## re.form ignored so de facto NULL
        if (verbtype) {
          if (type=="(ranef|response)") {
            cat("Simulation from random-effects variance | observed response:\n")
          } else cat("Simulation from linear predictor variance | observed response:\n") 
        } 
        if (all(attr(object$rand.families,"lcrandfamfam")=="gaussian")){
          variances$cov <- (NROW(newdata)!=1L) # simulate() always need a covmatrix but for a signle response it is trivial
          # detect all cases where the cov mat is large:
          if (is.null(variances$as_tcrossfac_list)) variances$as_tcrossfac_list <- ( (is.null(newdata) && length(object$y)>200L) ||
                                                                                      NROW(newdata)>200L)
          control$fix_predVar <- NA
          point_pred_eta <- predict(object,newdata=newdata, type="link", control=control,
                                    variances=variances, verbose=verbose, ...) 
          predVar <- attr(point_pred_eta,"predVar")
          if (is.null(predVar)) stop("A 'variances' argument should be provided so that prediction variances are computed.") 
          if (is.list(predVar)) {
            rand_eta <- vector('list',length(predVar))
            for (it in seq_len(length(predVar))) {
              if (it==1L) {mu <- point_pred_eta[,1L]} else {mu <- rep(0,length(point_pred_eta[,1L]))}
              if ( ! is.null(predVar[[it]])) rand_eta[[it]] <- .mvrnorm(n=needed,mu=mu, tcross_Sigma = predVar[[it]])
              # else predVar[[it]] remains NULL [cf IMRF terms for newdata] and lengths() is used to remove them:
            }
            rand_eta <- Reduce("+",rand_eta[lengths(rand_eta)>0L])
          } else rand_eta <- mvrnorm(n=needed,mu=point_pred_eta[,1L], predVar) # n=needed means we will get nsim distinct eta vectors
          if (needed>1L) rand_eta <- t(rand_eta) ## else mvrnorn value is a vector
          cum_nobs <- attr(point_pred_eta,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
          mu <- .fv_linkinv(eta=rand_eta, family=object$family, families=object$families, 
                            cum_nobs=cum_nobs) ## ! freqs for binomial, counts for poisson: suitable for final code
        } else stop("This conditional simulation is not implemented for non-gaussian random-effects")
      } else if ( is.null(re.form)) { ## conditional on (all) predicted ranefs, type = "residual"
      # } else if (type=="residual") { ## conditional on (all) predicted ranefs,
        if (verbtype) cat("simulation of residuals, conditional on point predictions (hence on random effects):\n") 
        control$fix_predVar <- FALSE
        mu <- predict(object,newdata=newdata,binding=NA,control=control, verbose=verbose, ...)
        cum_nobs <- attr(mu,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE; will be needed for .calc_phiW()
        is_mu_fix_btwn_sims <- TRUE
        mu <- replicate(needed,mu,simplify=FALSE) #matrix(rep(mu,nsim),ncol=nsim)
      } else if ( inherits(re.form,"formula") || is.na(re.form) ) { ## explicit re.form; or {unconditional MIXED MODEL, type= "marginal" }
        if (verbtype) {
          if (inherits(re.form,"formula")) {
            cat("Simulation conditional on random effect(s) retained in 're.form':\n")
          } else cat("Unconditional simulation:\n") 
        }
        # mu_fixed <- predict(object, newdata=newdata, re.form=re.form,binding=NA,control=list(fix_predVar=FALSE), verbose=verbose) ## mu_T
        # eta_fixed <- .eta_linkfun(mu_fixed, object$family, object$families)
        control$fix_predVar <- FALSE
        # we will need a ZAL and below we need the newdata to construct it if they are not NULL:
        control$keep_ranef_covs_for_simulate <- ( ! is.null(newdata))
        # Includes the ranefs conditioned upon:
        eta_fixed_cond <- predict(object, newdata=newdata, type="link", variances=variances,
                             re.form=re.form, #  
                             binding=NA,control=control, 
                             verbose=verbose, ...)
        # ... for that purpose, re.form is used to identify those ranefs and to provide the design matrices for them
        # These designn matrices will a priori not be further used (or only in a trivial way,  times zero-valued ranefs),
        # but other elements of the new_X_ZACblob attribute will be used.
        nr <- nrow(newdata)
        if (is.null(newdata)) { ## we simulate with all ranefs (treated conditionally|ranef or marginally) hence no selection of matrix
          ZAL <- get_ZALMatrix(object, force_bind = ! (.is_spprec_fit(object)) )
          cum_n_u_h <- attr(object$lambda,"cum_n_u_h")
          vec_n_u_h <- diff(cum_n_u_h)
        } else { # new sampling design
          new_X_ZACblob <- attr(eta_fixed_cond,"new_X_ZACblob")
          # new_X_ZACblob provided design matrices for ranefs conditioned upon (as controlled by re.form)
          # .calc_ZAlist_newdata() adds design matrices for ranefs treated marginally ( <=> ranefs NOT set to zero, but drawn marginally )
          newZAlist <-  .calc_ZAlist_newdata(object, newdata, new_X_ZACblob=new_X_ZACblob) # new_X_ZACblob$newZAlist not clearly used
          ## Here is the big change of (presumably) future version 4.5.0: (__F I X M E___ might be worth re-profiling the backends)
          ZALlist <- .wrap_compute_ZALlist4simulate(new_X_ZACblob, newZAlist, object$strucList)       
          ##   
          # ZAL <- .ad_hoc_cbind(ZALlist, as_matrix=FALSE ) # inappropriate for IMRF andother ZAXlist stuff
          ZAL <- .compute_ZAL(XMatrix=NULL, ZAlist=ZALlist, as_matrix=FALSE, bind.=TRUE)  # may be a ZAXlist if ZALlist is "notBindable"
          #
          vec_n_u_h <- unlist(lapply(ZALlist,.ncol)) ## nb cols each design matrix = nb realizations each ranef
          cum_n_u_h <- cumsum(c(0,vec_n_u_h))
        }
        cum_nobs <- attr(eta_fixed_cond,"new_X_ZACblob")$cum_nobs # presence expected given control$simulate=TRUE
        lcrandfamfam <- attr(object$rand.families,"lcrandfamfam") ## unlist(lapply(object$rand.families, function(rf) {tolower(rf$family)}))
        lcrandfamfam[conditioned_upon] <- "conditional" 
        fittedLambda <- object$lambda.object$lambda_est
        newV <- vector("list", length(vec_n_u_h))
        for (rd in seq_along(newV)) {
          newV[[rd]] <- .simulate_ranef(object, rd=rd, vec_n_u_h=vec_n_u_h, cum_n_u_h=cum_n_u_h, 
                                        newdata=newdata, fittedLambda=fittedLambda, 
                                       nsim=needed, lcrandfamfam=lcrandfamfam)
        } ## one multi-rand.family simulation. NOTE special handling of ranefs "conditioned upon" by the switch() in  .simulate_ranef
        newV <- do.call(rbind,newV) ## each column a simulation
        eta <-  matrix(rep(eta_fixed_cond,needed),ncol=needed) + as.matrix(ZAL %id*% newV) ## nobs rows, nsim col
        mu <- vector("list",needed)
        for (it in seq_len(needed)) mu[[it]] <- .fv_linkinv(eta[,it], object$family, object$families, cum_nobs = cum_nobs)
      } else stop("Unknown simulate 'type' value.")
    }
    ## ! mu := freqs for binomial, counts for poisson ## vector or matrix
    # phiW is always a matrix but mu cannot bc its elements may have attributes =>
    if ( ! inherits(mu,"list")) mu <- data.frame(mu) ## makes it always a list
    if (length(mu) != needed) stop("Programming error in simulate.HLfit() (ncol(mu) != needed).")
    #
    prior.weights <- .check_simulate_pw(prior.weights, mu, cum_nobs, famfams, 
                                        fit_pw=object$prior.weights, isNullnewData=is.null(newdata))
    # It might make sense to remove the missing(prior.weights) condition
    # But in that case .check_simulate_pw() may fail if fit is mv and user provided a non-list...
    #
    phiW <- .get_phiW(object=object, newdata=newdata, 
                      dims=c(length(mu[[1]]), length(mu)), # (nrow= response length, ncol= # of replicates)
                      phi_type=phi_type, nsim=needed, 
                      prior.weights=prior.weights) # phiW is always a matrix
    # """as""" phiW but will use an mv-list
    family_par <- .get_family_parlist(object, newdata=newdata)
    # up to version 3.5.49, there was ad hoc code calling COMPoisson()$simulate(object, nsim=nsim) when 
    #    the mu and phi vectors are constant across the nsim simulations, 
    #    to avoid computing the distribution (cumprodfacs in .COMP_simulate()) nsim times.
    # The updated .r_resid_var() -> appears to manage that too (without calling COMPoisson()$simulate()), 
    #  .r_resid_var_over_cols() too (a distinct issue is that multivariate mu may have lost 
    #     the (COMP)-lambda attribute in some cases, but this should no longer occur or else cause an error).
    
    sizes <- .guess_new_BinomialDen(sizes=sizes, mu=mu, cum_nobs=cum_nobs, 
                                    isNullnewData=is.null(newdata), famfams=famfams) 
    # ((For nsim>1 at least)) the mu has been expanded as a list, 
    # each element of which is the mu for a simulation replicate.
    # *Each such element* is expected by .r_resid_var_over_cols() to bear attributes, 
    # incl. for mv fits an "mv" attribute that stores a list of mu's per submodel, 
    #  each possibly with ZT attributes:
    # List of 1          <=  example of mu list 
    # $ : Named num [1:3057] 0.494 0.514 0.535 0.555 0.575 ...     <= a simulation replicate
    # ..- attr(*, "names")= chr [1:3057] "1.ld02" "2.ld02" "3.ld02" "4.ld02" ...
    # ..- attr(*, "mv")=List of 3        <= list of mu's per submodel
    # .. ..$ : Named num [1:1374] 0.494 0.514 0.535 0.555 0.575 ...
    # .. .. ..- attr(*, "names")= chr [1:1374] "1.ld02" "2.ld02" "3.ld02" "4.ld02" ...
    # .. ..$ : Named num [1:1182] 0.185 0.226 0.248 0.26 0.325 ...
    # .. .. ..- attr(*, "names")= chr [1:1182] "2.fl02" "6.fl02" "8.fl02" "9.fl02" ...
    # .. ..$ : Named num [1:501] 1.73 1.72 1.74 1.59 1.86 ...
    # .. .. ..- attr(*, "mu_U")= num [1:501] 1.22 1.2 1.23 1.01 1.4 ...     <= third sumbodel is ZT
    # .. .. ..- attr(*, "p0")= num [1:501] 0.295 0.301 0.292 0.362 0.247 ...
    # .. .. ..- attr(*, "names")= chr [1:501] "16.hdct02" "18.hdct02" "19.hdct02" "24.hdct02" ...
    #
    # For some time, marginal simulation of newV's followed by mu[[it]] <- .fv_linkinv(eta[,it]...)
    # lost the "mv" attribute (and thus the included ZT info). This has been corrected.
    block <- .r_resid_var_over_cols(mu, 
                                    phiW,
                                    family_par=family_par,
                                    sizes=sizes, family=object$family, families=object$families, is_mu_fix_btwn_sims=is_mu_fix_btwn_sims,
                                    nsim=needed, cum_nobs = cum_nobs, ...)
    if (is.null(resp_testfn)) {
      if (nsim==1L) block <- drop(block)
      attr(block,"nobs") <- diff(cum_nobs)
      return(block) 
    } else {
      check_cond <- apply(block,2L, resp_testfn)
      if (is.null(resu)) {resu <- block[,check_cond,drop=FALSE]} else resu <- cbind(resu,block[,check_cond,drop=FALSE])
      done <- done+length(which(check_cond))
    }
  }
  ## we reach this point only if there is a resp_testfn, and then 'resu'
  if (nsim==1L) resu <- drop(resu)
  if (was_invColdoldList_NULL) object$envir$invColdoldList <- NULL
  return(resu)    
}


simulate.HLfitlist <- function(object,nsim=1,seed=NULL,newdata=object[[1]]$data,sizes=NULL,...) {
  ## RNG stuff copied from simulate.lm
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
    runif(1)
  if (is.null(seed))
    RNGstate <- get(".Random.seed", envir = .GlobalEnv)
  else { ## this makes changes to RNG local where 'seed' is used:
    R.seed <- get(".Random.seed", envir = .GlobalEnv)
    set.seed(seed)
    RNGstate <- structure(seed, kind = as.list(RNGkind()))
    on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
  }
  replicate(nsim, {
    allrownames <- unique(unlist(lapply(object, function(hl){rownames(hl$data)})))
    resu <- matrix(0,nrow=length(allrownames),ncol=length(object)) ## two cols if 3 types
    cumul <- 0
    if (length(sizes) != nrow(newdata)) stop("length(sizes) != nrow(newdata).")
    for (it in seq(ncol(resu))) {
      ## it = 1 se ramène à simulate(object[[1]])
      if (is.null(sizes)) sizes <- .get_BinomialDen(object[[it]])
      resu[,it] <- simulate(object[[it]],newdata=newdata,sizes=sizes - cumul,verbose=FALSE) ## FIXME use of resp_testfn would require it to be a list
      cumul <- rowSums(resu)  
    }
    resu <- cbind(resu,sizes - cumul) ## now 3 cols if 3 types
    rownames(resu) <- allrownames
    colnames(resu) <- attr(object,"sortedTypes")
    as.data.frame(resu)
  },simplify=FALSE)
}

Try the spaMM package in your browser

Any scripts or data that you put into this service are public.

spaMM documentation built on June 22, 2024, 9:48 a.m.