R/simulatedlnm.R

Defines functions simulatedlnm.nonlinearglm simulatedlnm.linearglm simulatedlnm.linearlme simulatedlnm

Documented in simulatedlnm simulatedlnm.linearglm simulatedlnm.linearlme simulatedlnm.nonlinearglm

#' simulatedllm generic.
#'
#' Simulation procedure internally called by the \code{\link{collindlnm}}
#'   function for a given hypothetical effect.
#'
#' @param x an object internally generated by the \code{\link{collindlnm}} function.
#' @export
simulatedlnm <- function(x) UseMethod("simulatedlnm")




#' @describeIn simulatedlnm case of a hypothetical linear effect in a model of
#'   class \code{"lme"}.
#' @export
#' @importFrom nlme fixed.effects
#' @importFrom dlnm crosspred
#' @importFrom stats as.formula fitted formula rnorm
simulatedlnm.linearlme <- function(x) {
  ## method simulatedlnm for "linear" effect and model class "lme"
  model <- x$model
  cb <- x$cb
  at <- x$at
  cen <- x$cen
  effect <- x$effect
  nsim <- x$nsim
  verbose <- x$verbose
  coefnames <- x$coefnames
  x <- x$x

  ### get hypothetical linear predictor
  #  fitted linear predictor
  linpredfitted <- fitted(model, level = 0)
  #  fitted contribution of crossbasis
  betasfixednames <- names(fixed.effects(model))
  betasfixed <- fixed.effects(model)[which(betasfixednames %in% coefnames)]
  linpredcb <- cb %*% matrix(betasfixed, ncol = 1)
  # hypothetical ontribution of crossbasis
  linpredcbh <- as.matrix(x) %*% matrix(effect / at, ncol = 1)
  linpred <-  linpredfitted - linpredcb + linpredcbh
  ### residual standard error
  sdre <- exp(unlist(model$modelStruct$reStruct)) * model$sigma
  ### number of groups at population level
  ng <- length(sdre)
  ### labels for the simulated random effects
  ranefnames <- paste0("ranef", 1:ng)
  ### number of observations
  n <- dim(model$data)[1]
  ### simulations
  res <- matrix(NA, nrow = nsim, ncol = dim(x)[2])
  simvec <- 1:nsim
  ### verbose message
  if (verbose) {
    verbmes <- rep(".", nsim)
    verbmes[(simvec %% 10) == 0] <- simvec[(simvec %% 10) == 0]
    verbbreak <- rep("", nsim)
    verbbreak[(simvec %% 50) == 0] <- "\n"
  }

  for (isim in simvec) {
    if (verbose) {
      cat(verbmes[isim])
      cat(verbbreak[isim])
    }
    ### simulate residual error
    X <- model$data
    X$orderX <- 1:n
    raneflist <- vector("list", ng)
    for (i in 1:ng) {
      ### it requires id's to represent a unique unit (e.g) same id cannot
      ### represent different subjects in different schools
      vari <- model$data[, which(names(X) == names(sdre)[[i]])]
      raneflist[[i]] <- data.frame(as.factor(levels(vari)),
                                   rnorm(length(levels(vari)), 0, sd = sdre[[i]]))
      names(raneflist[[i]]) <- c(names(sdre)[i], ranefnames[i])
      X <- merge(X, raneflist[[i]], by = names(sdre)[[i]], all.x = TRUE)
    }
    X <- X[order(X$orderX), ]
    ### simulate response simY
    Xranef <- X[, ranefnames]
    if (ng == 1) {
      Xranef <- as.matrix(Xranef)
    }
    ### simulate residual error
    reserror <- rnorm(n, 0, sd = model$sigma)
    X$simY <- linpred + rowSums(Xranef) + reserror
    ### reproduce missing values in response
    X$simY[is.na(X[, which(names(X) == as.character(formula(model)[[2]]))])] <- NA
    ### fit model
    modifiedcall <- model$call
    formfixednew <- strsplit(as.character(eval(model$call$fixed)), "\\~")[[3]]
    modifiedcall$fixed <- as.formula(paste0("simY ~", formfixednew))
    modifiedcall$data <- as.name("X")
    modi <- eval(modifiedcall)
    ### predict effects, at 'at' units, centered at 0 because linear effects
    cpi <- crosspred(basis = cb, model = modi, cen = cen, at = at)
    ### store predictions
    res[isim, ] <- cpi$matfit[1, ]
  }
  return(res)
}




#' @describeIn simulatedlnm case of a hypothetical linear effect in a model of
#'   class \code{"glm"}.
#' @export
#' @importFrom dlnm crosspred
#' @importFrom stats as.formula coef formula predict
simulatedlnm.linearglm <- function(x) {
  ## method simulatedlnm for "linear" effect and model class "glm"
  model <- x$model
  cb <- x$cb
  at <- x$at
  cen <- x$cen
  effect <- x$effect
  nsim <- x$nsim
  verbose <- x$verbose
  coefnames <- x$coefnames
  x <- x$x

  implementedfamilies <- c("binomial", "quasibinomial", "poisson", "quasipoisson")

  modelfamily <- model$family$family
  if (!modelfamily %in% implementedfamilies) {
    stop(cat("\n Model families currently implemented are:\n",
             paste(" - ", implementedfamilies, "\n"), "\n"))
  }

  ######################################
  ###
  ### cases of (quasi)binomial, (quasi)poisson
  ###
  ######################################
  ### model data
  X <- model$data
  ###  fitted linear predictor
  linpredfitted <- predict(object = model, newdata = X, type = "link")
  ###  fitted contribution of crossbasis
  betasnames <- names(coef(model))
  betas <- coef(model)[which(betasnames %in% coefnames)]
  linpredcb <- cb %*% matrix(betas, ncol = 1)
  ### hypothetical contribution of crossbasis
  linpredcbh <- as.matrix(x) %*% matrix(effect / at, ncol = 1)
  linpred <-  linpredfitted - linpredcb + linpredcbh
  ### characterize response variable (family and dispersion)
  d <- summary(model)$dispersion
  ytype <- modelfamily
  #############################################################################
  ###
  ### BEGIN provisional solution to problem to generate overdispersed binary data
  ###
  #############################################################################
  ### if family is quasibinomial show a warning and use binomial. To do that,
  ### we create "ytype"
  if (ytype == "quasibinomial") {
    warning("Quasi-binomial distribution to simulate the response variable is not currently implemeted.
            \n Using binomial distribution.")
    d <- 1
    ytype <- "binomial"
  }
  #################################################################################
  ###
  ### END provisional solution to problems to generate overdispersed binary data
  ###
  #################################################################################

  ### simulations
  res <- matrix(NA, nrow = nsim, ncol = dim(x)[2])
  simvec <- 1:nsim
  ### verbose message
  if (verbose) {
    verbmes <- rep(".", nsim)
    verbmes[(simvec %% 10) == 0] <- simvec[(simvec %% 10) == 0]
    verbbreak <- rep("", nsim)
    verbbreak[(simvec %% 50) == 0] <- "\n"
  }

  for (isim in simvec) {
    if (verbose) {
      cat(verbmes[isim])
      cat(verbbreak[isim])
    }
    ### get mu
    mu <- model$family$mu.eta(linpred)
    ### generate response keeping missing values due to NA in the linear predictor
    X$Ysim <- NA
    existsmu <- !is.na(mu)
    X$simY[existsmu] <- rod(n = sum(existsmu), mu = mu[existsmu], d = d, type = ytype)
    ### reproduce missing values in response
    X$simY[is.na(X[, which(names(X) == as.character(formula(model)[[2]]))])] <- NA
    ### reproduce NA in the response due to NA in original response
    yname <- strsplit(as.character(eval(model$call$formula)), "\\~")[[2]]
    yNA <- is.na(model$data[[yname]])
    if (any(yNA)) {
      X$simY[yNA] <- NA
    }
    ### fit model
    modifiedcall <- model$call
    formfixednew <- strsplit(as.character(eval(model$call$formula)), "\\~")[[3]]
    modifiedcall$formula <- as.formula(paste0("simY ~", formfixednew))
    modifiedcall$data <- as.name("X")
    modi <- eval(modifiedcall)
    ### predict effects, at 'at' units, centered at 0 because linear effects
    cpi <- crosspred(basis = cb, model = modi, cen = cen, at = at)
    ### store predictions
    res[isim, ] <- cpi$matRRfit[1, ]
  }
  return(res)
}



#' @describeIn simulatedlnm case of a hypothetical non-linear effect in a model
#'   of class \code{"glm"}.
#' @export
#' @importFrom mgcv tensor.prod.model.matrix
#' @importFrom MASS ginv
#' @importFrom dlnm crossbasis crosspred onebasis
#' @importFrom stats as.formula coef formula predict
simulatedlnm.nonlinearglm <- function(x) {
  ## method simulatedlnm for "nonlinear" effect and model class "glm"
  model <- x$model
  cb <- x$cb
  at <- x$at
  cen <- x$cen
  effect <- x$effect
  nsim <- x$nsim
  verbose <- x$verbose
  coefnames <- x$coefnames
  x <- x$x

  implementedfamilies <- c("binomial", "quasibinomial", "poisson", "quasipoisson")

  modelfamily <- model$family$family
  if (!modelfamily %in% implementedfamilies) {
    stop(cat("\n Model families currently implemented are:\n",
             paste(" - ", implementedfamilies, "\n"), "\n"))
  }
  ######################################
  ###
  ### cases of (quasi)binomial, (quasi)poisson
  ###
  ######################################
  ### model data
  X <- model$data
  ### find new coefficient vector that provides (approximately) the associations
  ### provided by the user
  # from mkXpred code:
  predlag <- attr(cb, "lag")[1]:attr(cb, "lag")[2]
  predvar <- at
  varvec <- if(is.matrix(at)) as.numeric(at) else rep(at, length(predlag))
  lagvec <- rep(predlag, each = length(predvar))
  basisvar <- do.call("onebasis", c(list(x = varvec), attr(cb, "argvar")))
  #basislag <- do.call("onebasis",c(list(x=lagvec),attr(cb,"arglag")))
  basislag <- onebasis(x = lagvec, fun = "strata", breaks = predlag)
  if(!is.null(cen)) {
    basiscen <- do.call("onebasis", c(list(x = cen), attr(cb, "argvar")))
    basisvar <- scale(basisvar, center = basiscen, scale = FALSE)
  }
  Xpred <- mgcv::tensor.prod.model.matrix(list(basisvar, basislag))
  effectasvector <- matrix(effect, ncol = 1, byrow = TRUE)
  ### Now I want Xpred %*% coefs = effectasvector. Need to derive coefs. Can
  ### use least squares (least norm solution to avoid problems with singular
  ### matrices)
  newcoef <- MASS::ginv(t(Xpred) %*% Xpred) %*% t(Xpred) %*% effectasvector
  # Find new linear predictor to simulate from
  cbnew <- crossbasis(x = x,
                      argvar = list(fun = attr(cb, "argvar")$fun,
                                    knots = attr(cb, "argvar")$knots),
                      arglag = list(fun = "strata", breaks = predlag,
                                    intercept = FALSE),
                      lag = max(predlag))
  ### hypothetical contribution of crossbasis
  linpredcbh <- cbnew %*% newcoef
  ###  fitted contribution of crossbasis
  betasnames <- names(coef(model))
  betas <- coef(model)[which(betasnames %in% coefnames)]
  linpredcb <- cb %*% matrix(betas, ncol = 1)
  ###  fitted linear predictor
  linpredfitted <- predict(object = model, newdata = X, type = "link")
  ### modified linear predictor
  linpred <-  linpredfitted - linpredcb + linpredcbh

  ### characterize response variable (family and dispersion)
  d <- summary(model)$dispersion
  ytype <- modelfamily
  #############################################################################
  ###
  ### BEGIN provisional solution to problems to generate overdispersed binary
  ###       data
  ###
  #############################################################################
  ### if family is quasibinomial show a warning and use binomial. To do that,
  ### we create "ytype"
  if (ytype == "quasibinomial") {
    warning("Quasi-binomial distribution to simulate the response variable is
            not currently implemeted.\n Using binomial distribution.")
    d <- 1
    ytype <- "binomial"
  }
  #############################################################################
  ###
  ### END provisional solution to problems to generate overdispersed binary
  ###     data
  ###
  #############################################################################

  ### simulations
  res <- vector("list", nsim)
  simvec <- 1:nsim
  ### verbose message
  if (verbose) {
    verbmes <- rep(".", nsim)
    verbmes[(simvec %% 10) == 0] <- simvec[(simvec %% 10) == 0]
    verbbreak <- rep("", nsim)
    verbbreak[(simvec %% 50) == 0] <- "\n"
  }

  for (isim in simvec) {
    if (verbose) {
      cat(verbmes[isim])
      cat(verbbreak[isim])
    }
    ### get mu
    mu <- model$family$mu.eta(linpred)
    ### generate response keeping missing values due to NA in the linear
    ### predictor
    X$Ysim <- NA
    existsmu <- !is.na(mu)
    X$simY[existsmu] <- rod(n = sum(existsmu), mu = mu[existsmu], d = d, type = ytype)
    ### reproduce missing values in response
    X$simY[is.na(X[, which(names(X) == as.character(formula(model)[[2]]))])] <- NA
    ### reproduce NA in the response due to NA in original response
    yname <- strsplit(as.character(model$call$formula), "\\~")[[2]]
    yNA <- is.na(model$data[[yname]])
    if (any(yNA)) {
      X$simY[yNA] <- NA
    }
    ### fit model
    modifiedcall <- model$call
    formfixednew <- strsplit(as.character(eval(model$call$formula)), "\\~")[[3]]
    modifiedcall$formula <- as.formula(paste0("simY ~", formfixednew))
    modifiedcall$data <- as.name("X")
    modi <- eval(modifiedcall)

    # predict effects, at 'at' units with reference at 'cen'
    cpi <- crosspred(basis = cb, model = modi, cen = cen, at = at)

    # store predictions
    res[[isim]] <- cpi$matRRfit
  }
  return(res)
}

Try the collin package in your browser

Any scripts or data that you put into this service are public.

collin documentation built on Sept. 19, 2023, 5:06 p.m.