R/simulatedlnm.R
In collin: Visualization the Effects of Collinearity in Distributed Lag Models and Other Linear Models

Documented in simulatedlnm simulatedlnm.linearglm simulatedlnm.linearlme simulatedlnm.nonlinearglm

#' simulatedllm generic.
#'
#' Simulation procedure internally called by the \code{\link{collindlnm}}
#'   function for a given hypothetical effect.
#'
#' @param x an object internally generated by the \code{\link{collindlnm}} function.
#' @export
simulatedlnm <- function(x) UseMethod("simulatedlnm")




#' @describeIn simulatedlnm case of a hypothetical linear effect in a model of
#'   class \code{"lme"}.
#' @export
#' @importFrom nlme fixed.effects
#' @importFrom dlnm crosspred
#' @importFrom stats as.formula fitted formula rnorm
simulatedlnm.linearlme <- function(x) {
  ## method simulatedlnm for "linear" effect and model class "lme"
  model <- x$model
  cb <- x$cb
  at <- x$at
  cen <- x$cen
  effect <- x$effect
  nsim <- x$nsim
  verbose <- x$verbose
  coefnames <- x$coefnames
  x <- x$x

  ### get hypothetical linear predictor
  #  fitted linear predictor
  linpredfitted <- fitted(model, level = 0)
  #  fitted contribution of crossbasis
  betasfixednames <- names(fixed.effects(model))
  betasfixed <- fixed.effects(model)[which(betasfixednames %in% coefnames)]
  linpredcb <- cb %*% matrix(betasfixed, ncol = 1)
  # hypothetical ontribution of crossbasis
  linpredcbh <- as.matrix(x) %*% matrix(effect / at, ncol = 1)
  linpred <-  linpredfitted - linpredcb + linpredcbh
  ### residual standard error
  sdre <- exp(unlist(model$modelStruct$reStruct)) * model$sigma
  ### number of groups at population level
  ng <- length(sdre)
  ### labels for the simulated random effects
  ranefnames <- paste0("ranef", 1:ng)
  ### number of observations
  n <- dim(model$data)[1]
  ### simulations
  res <- matrix(NA, nrow = nsim, ncol = dim(x)[2])
  simvec <- 1:nsim
  ### verbose message
  if (verbose) {
    verbmes <- rep(".", nsim)
    verbmes[(simvec %% 10) == 0] <- simvec[(simvec %% 10) == 0]
    verbbreak <- rep("", nsim)
    verbbreak[(simvec %% 50) == 0] <- "\n"
  }

  for (isim in simvec) {
    if (verbose) {
      cat(verbmes[isim])
      cat(verbbreak[isim])
    }
    ### simulate residual error
    X <- model$data
    X$orderX <- 1:n
    raneflist <- vector("list", ng)
    for (i in 1:ng) {
      ### it requires id's to represent a unique unit (e.g) same id cannot
      ### represent different subjects in different schools
      vari <- model$data[, which(names(X) == names(sdre)[[i]])]
      raneflist[[i]] <- data.frame(as.factor(levels(vari)),
                                   rnorm(length(levels(vari)), 0, sd = sdre[[i]]))
      names(raneflist[[i]]) <- c(names(sdre)[i], ranefnames[i])
      X <- merge(X, raneflist[[i]], by = names(sdre)[[i]], all.x = TRUE)
    }
    X <- X[order(X$orderX), ]
    ### simulate response simY
    Xranef <- X[, ranefnames]
    if (ng == 1) {
      Xranef <- as.matrix(Xranef)
    }
    ### simulate residual error
    reserror <- rnorm(n, 0, sd = model$sigma)
    X$simY <- linpred + rowSums(Xranef) + reserror
    ### reproduce missing values in response
    X$simY[is.na(X[, which(names(X) == as.character(formula(model)[[2]]))])] <- NA
    ### fit model
    modifiedcall <- model$call
    formfixednew <- strsplit(as.character(eval(model$call$fixed)), "\\~")[[3]]
    modifiedcall$fixed <- as.formula(paste0("simY ~", formfixednew))
    modifiedcall$data <- as.name("X")
    modi <- eval(modifiedcall)
    ### predict effects, at 'at' units, centered at 0 because linear effects
    cpi <- crosspred(basis = cb, model = modi, cen = cen, at = at)
    ### store predictions
    res[isim, ] <- cpi$matfit[1, ]
  }
  return(res)
}




#' @describeIn simulatedlnm case of a hypothetical linear effect in a model of
#'   class \code{"glm"}.
#' @export
#' @importFrom dlnm crosspred
#' @importFrom stats as.formula coef formula predict
simulatedlnm.linearglm <- function(x) {
  ## method simulatedlnm for "linear" effect and model class "glm"
  model <- x$model
  cb <- x$cb
  at <- x$at
  cen <- x$cen
  effect <- x$effect
  nsim <- x$nsim
  verbose <- x$verbose
  coefnames <- x$coefnames
  x <- x$x

  implementedfamilies <- c("binomial", "quasibinomial", "poisson", "quasipoisson")

  modelfamily <- model$family$family
  if (!modelfamily %in% implementedfamilies) {
    stop(cat("\n Model families currently implemented are:\n",
             paste(" - ", implementedfamilies, "\n"), "\n"))
  }

  ######################################
  ###
  ### cases of (quasi)binomial, (quasi)poisson
  ###
  ######################################
  ### model data
  X <- model$data
  ###  fitted linear predictor
  linpredfitted <- predict(object = model, newdata = X, type = "link")
  ###  fitted contribution of crossbasis
  betasnames <- names(coef(model))
  betas <- coef(model)[which(betasnames %in% coefnames)]
  linpredcb <- cb %*% matrix(betas, ncol = 1)
  ### hypothetical contribution of crossbasis
  linpredcbh <- as.matrix(x) %*% matrix(effect / at, ncol = 1)
  linpred <-  linpredfitted - linpredcb + linpredcbh
  ### characterize response variable (family and dispersion)
  d <- summary(model)$dispersion
  ytype <- modelfamily
  #############################################################################
  ###
  ### BEGIN provisional solution to problem to generate overdispersed binary data
  ###
  #############################################################################
  ### if family is quasibinomial show a warning and use binomial. To do that,
  ### we create "ytype"
  if (ytype == "quasibinomial") {
    warning("Quasi-binomial distribution to simulate the response variable is not currently implemeted.
            \n Using binomial distribution.")
    d <- 1
    ytype <- "binomial"
  }
  #################################################################################
  ###
  ### END provisional solution to problems to generate overdispersed binary data
  ###
  #################################################################################

  ### simulations
  res <- matrix(NA, nrow = nsim, ncol = dim(x)[2])
  simvec <- 1:nsim
  ### verbose message
  if (verbose) {
    verbmes <- rep(".", nsim)
    verbmes[(simvec %% 10) == 0] <- simvec[(simvec %% 10) == 0]
    verbbreak <- rep("", nsim)
    verbbreak[(simvec %% 50) == 0] <- "\n"
  }

  for (isim in simvec) {
    if (verbose) {
      cat(verbmes[isim])
      cat(verbbreak[isim])
    }
    ### get mu
    mu <- model$family$mu.eta(linpred)
    ### generate response keeping missing values due to NA in the linear predictor
    X$Ysim <- NA
    existsmu <- !is.na(mu)
    X$simY[existsmu] <- rod(n = sum(existsmu), mu = mu[existsmu], d = d, type = ytype)
    ### reproduce missing values in response
    X$simY[is.na(X[, which(names(X) == as.character(formula(model)[[2]]))])] <- NA
    ### reproduce NA in the response due to NA in original response
    yname <- strsplit(as.character(eval(model$call$formula)), "\\~")[[2]]
    yNA <- is.na(model$data[[yname]])
    if (any(yNA)) {
      X$simY[yNA] <- NA
    }
    ### fit model
    modifiedcall <- model$call
    formfixednew <- strsplit(as.character(eval(model$call$formula)), "\\~")[[3]]
    modifiedcall$formula <- as.formula(paste0("simY ~", formfixednew))
    modifiedcall$data <- as.name("X")
    modi <- eval(modifiedcall)
    ### predict effects, at 'at' units, centered at 0 because linear effects
    cpi <- crosspred(basis = cb, model = modi, cen = cen, at = at)
    ### store predictions
    res[isim, ] <- cpi$matRRfit[1, ]
  }
  return(res)
}



#' @describeIn simulatedlnm case of a hypothetical non-linear effect in a model
#'   of class \code{"glm"}.
#' @export
#' @importFrom mgcv tensor.prod.model.matrix
#' @importFrom MASS ginv
#' @importFrom dlnm crossbasis crosspred onebasis
#' @importFrom stats as.formula coef formula predict
simulatedlnm.nonlinearglm <- function(x) {
  ## method simulatedlnm for "nonlinear" effect and model class "glm"
  model <- x$model
  cb <- x$cb
  at <- x$at
  cen <- x$cen
  effect <- x$effect
  nsim <- x$nsim
  verbose <- x$verbose
  coefnames <- x$coefnames
  x <- x$x

  implementedfamilies <- c("binomial", "quasibinomial", "poisson", "quasipoisson")

  modelfamily <- model$family$family
  if (!modelfamily %in% implementedfamilies) {
    stop(cat("\n Model families currently implemented are:\n",
             paste(" - ", implementedfamilies, "\n"), "\n"))
  }
  ######################################
  ###
  ### cases of (quasi)binomial, (quasi)poisson
  ###
  ######################################
  ### model data
  X <- model$data
  ### find new coefficient vector that provides (approximately) the associations
  ### provided by the user
  # from mkXpred code:
  predlag <- attr(cb, "lag")[1]:attr(cb, "lag")[2]
  predvar <- at
  varvec <- if(is.matrix(at)) as.numeric(at) else rep(at, length(predlag))
  lagvec <- rep(predlag, each = length(predvar))
  basisvar <- do.call("onebasis", c(list(x = varvec), attr(cb, "argvar")))
  #basislag <- do.call("onebasis",c(list(x=lagvec),attr(cb,"arglag")))
  basislag <- onebasis(x = lagvec, fun = "strata", breaks = predlag)
  if(!is.null(cen)) {
    basiscen <- do.call("onebasis", c(list(x = cen), attr(cb, "argvar")))
    basisvar <- scale(basisvar, center = basiscen, scale = FALSE)
  }
  Xpred <- mgcv::tensor.prod.model.matrix(list(basisvar, basislag))
  effectasvector <- matrix(effect, ncol = 1, byrow = TRUE)
  ### Now I want Xpred %*% coefs = effectasvector. Need to derive coefs. Can
  ### use least squares (least norm solution to avoid problems with singular
  ### matrices)
  newcoef <- MASS::ginv(t(Xpred) %*% Xpred) %*% t(Xpred) %*% effectasvector
  # Find new linear predictor to simulate from
  cbnew <- crossbasis(x = x,
                      argvar = list(fun = attr(cb, "argvar")$fun,
                                    knots = attr(cb, "argvar")$knots),
                      arglag = list(fun = "strata", breaks = predlag,
                                    intercept = FALSE),
                      lag = max(predlag))
  ### hypothetical contribution of crossbasis
  linpredcbh <- cbnew %*% newcoef
  ###  fitted contribution of crossbasis
  betasnames <- names(coef(model))
  betas <- coef(model)[which(betasnames %in% coefnames)]
  linpredcb <- cb %*% matrix(betas, ncol = 1)
  ###  fitted linear predictor
  linpredfitted <- predict(object = model, newdata = X, type = "link")
  ### modified linear predictor
  linpred <-  linpredfitted - linpredcb + linpredcbh

  ### characterize response variable (family and dispersion)
  d <- summary(model)$dispersion
  ytype <- modelfamily
  #############################################################################
  ###
  ### BEGIN provisional solution to problems to generate overdispersed binary
  ###       data
  ###
  #############################################################################
  ### if family is quasibinomial show a warning and use binomial. To do that,
  ### we create "ytype"
  if (ytype == "quasibinomial") {
    warning("Quasi-binomial distribution to simulate the response variable is
            not currently implemeted.\n Using binomial distribution.")
    d <- 1
    ytype <- "binomial"
  }
  #############################################################################
  ###
  ### END provisional solution to problems to generate overdispersed binary
  ###     data
  ###
  #############################################################################

  ### simulations
  res <- vector("list", nsim)
  simvec <- 1:nsim
  ### verbose message
  if (verbose) {
    verbmes <- rep(".", nsim)
    verbmes[(simvec %% 10) == 0] <- simvec[(simvec %% 10) == 0]
    verbbreak <- rep("", nsim)
    verbbreak[(simvec %% 50) == 0] <- "\n"
  }

  for (isim in simvec) {
    if (verbose) {
      cat(verbmes[isim])
      cat(verbbreak[isim])
    }
    ### get mu
    mu <- model$family$mu.eta(linpred)
    ### generate response keeping missing values due to NA in the linear
    ### predictor
    X$Ysim <- NA
    existsmu <- !is.na(mu)
    X$simY[existsmu] <- rod(n = sum(existsmu), mu = mu[existsmu], d = d, type = ytype)
    ### reproduce missing values in response
    X$simY[is.na(X[, which(names(X) == as.character(formula(model)[[2]]))])] <- NA
    ### reproduce NA in the response due to NA in original response
    yname <- strsplit(as.character(model$call$formula), "\\~")[[2]]
    yNA <- is.na(model$data[[yname]])
    if (any(yNA)) {
      X$simY[yNA] <- NA
    }
    ### fit model
    modifiedcall <- model$call
    formfixednew <- strsplit(as.character(eval(model$call$formula)), "\\~")[[3]]
    modifiedcall$formula <- as.formula(paste0("simY ~", formfixednew))
    modifiedcall$data <- as.name("X")
    modi <- eval(modifiedcall)

    # predict effects, at 'at' units with reference at 'cen'
    cpi <- crosspred(basis = cb, model = modi, cen = cen, at = at)

    # store predictions
    res[[isim]] <- cpi$matRRfit
  }
  return(res)
}

Any scripts or data that you put into this service are public.

collin documentation built on Sept. 19, 2023, 5:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

collin
Visualization the Effects of Collinearity in Distributed Lag Models and Other Linear Models

R/simulatedlnm.R
In collin: Visualization the Effects of Collinearity in Distributed Lag Models and Other Linear Models

Defines functions simulatedlnm.nonlinearglm simulatedlnm.linearglm simulatedlnm.linearlme simulatedlnm

Documented in simulatedlnm simulatedlnm.linearglm simulatedlnm.linearlme simulatedlnm.nonlinearglm

Try the collin package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

collin Visualization the Effects of Collinearity in Distributed Lag Models and Other Linear Models

R/simulatedlnm.R In collin: Visualization the Effects of Collinearity in Distributed Lag Models and Other Linear Models

Defines functions simulatedlnm.nonlinearglm simulatedlnm.linearglm simulatedlnm.linearlme simulatedlnm

Documented in simulatedlnm simulatedlnm.linearglm simulatedlnm.linearlme simulatedlnm.nonlinearglm

Try the collin package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

collin
Visualization the Effects of Collinearity in Distributed Lag Models and Other Linear Models

R/simulatedlnm.R
In collin: Visualization the Effects of Collinearity in Distributed Lag Models and Other Linear Models