R/fixef_sim.r
In simglm: Simulate Models Based on the Generalized Linear Model

Documented in sim_continuous2 sim_factor2 sim_time simulate_fixed simulate_heterogeneity simulate_knot

#' Simulate categorical, factor, or discrete variables
#' 
#' Function that simulates discrete, factor, or categorical variables.  
#' Is essentially a wrapper around the sample function from base R.
#' 
#' @param n A list of sample sizes.
#' @param levels Scalar indicating the number of levels for categorical, 
#'   factor, or discrete variable. Can also specify levels as a character vector.
#' @param var_level The level the variable should be simulated at. This can either 
#'      be 1, 2, or 3 specifying a level 1, level 2, or level 3 variable 
#'      respectively.
#' @param replace TRUE/FALSE indicating whether levels should be sampled with 
#'   replacement. Default is TRUE.
#' @param ... Additional parameters passed to the sample function.
#' @export 
sim_factor2 <- function(n, levels, var_level = 1, replace = TRUE,
                       ...) {
  if(var_level == 1) {
    cat_var <- base::sample(x = levels, size = sum(n[['level1']]), 
                            replace = replace, ...)
  } else {
    if(var_level == 2) {
      cat_var <- rep(base::sample(x = levels, size = sum(n[['level2']]), 
                                  replace = replace, ...),
                      times = n[['level1']])
    } else {
      cat_var <- rep(base::sample(x = levels, size = n[['level3']], 
                                  replace = replace, ...),
                      times = n[['level3_total']])
    }
  }
  cat_var
}

#' Simulate continuous variables
#' 
#' Function that simulates continuous variables. Any distribution function in 
#' R is supported.
#' 
#' @param n A list of sample sizes.
#' @param dist A distribution function. This argument takes a quoted
#'      R distribution function (e.g. 'rnorm'). Default is 'rnorm'.
#' @param var_level The level the variable should be simulated at. This can either 
#'      be 1, 2, or 3 specifying a level 1, level 2, or level 3 variable 
#'      respectively.
#' @param variance The variance for random effect simulation.
#' @param ther_sim A TRUE/FALSE flag indicating whether the error simulation 
#'  function should be simulated, that is should the mean and standard deviation
#'  used for standardization be simulated. 
#' @param ther_val A vector of 2 that should include the theoretical mean and 
#'  standard deviation of the generating function.
#' @param ... Additional parameters to pass to the dist_fun argument.
#' @export 
sim_continuous2 <- function(n, dist = 'rnorm', var_level = 1, 
                            variance = NULL, ther_sim = FALSE, ther_val = NULL, 
                            ...) {
  
  if(var_level == 1) {
    cont_var <- unlist(lapply(n[['level1']], FUN = dist, ...))
  } else {
    if(var_level == 2) {
      cont_var <- rep(unlist(lapply(sum(n[['level2']]), FUN = dist, ...)),
                      times = n[['level1']])
    } else {
      cont_var <- rep(unlist(lapply(n[['level3']], FUN = dist, ...)),
                      times = n[['level3_total']])
    }
  }
  
  if(!is.null(variance)) {
    if(ther_sim) {
      ther_val <- do.call(dist, c(list(n = 10000000), ...))
      ther <- c(mean(ther_val), sd(ther_val))
      
      cont_var <- standardize(cont_var, ther[1], ther[2])
    }
    if(!is.null(ther_val)) {
      cont_var <- standardize(cont_var, ther_val[1], ther_val[2])
    }
      cont_var <- cont_var %*% chol(c(variance))
  }
  cont_var
}

#' Simulate knot locations
#' 
#' Function that generates knot locations. An example of usefulness of this funciton
#' would be with generation of interrupted time series data. Another application may
#' be with simulation of piecewise linear data structures.
#' 
#' @param data Data to pass to the sim_knot2 function to determine knot 
#'   location.
#' @param sim_args A named list with special model formula syntax. See details and examples
#'   for more information. The named list may contain the following:
#'   \itemize{
#'     \item fixed: This is the fixed portion of the model (i.e. covariates)
#'     \item random: This is the random portion of the model (i.e. random effects)
#'     \item error: This is the error (i.e. residual term).
#'   }
#' @param data Mostly internal argument.
#' @export
simulate_knot <- function(data, sim_args) {
  
  purrr::invoke_map("sim_knot2", 
                    sim_args[['knot']], 
                    data = data
  ) %>% data.frame()
  
}

sim_knot2 <- function(data, variable, knot_locations, right = FALSE, ...) {
  
  if(!right) {
    knot_locat <- c(min(data[[variable]]), knot_locations, (max(data[[variable]]) + 1))
  } else {
    knot_locat <- c((min(data[[variable]]) - 1), knot_locations, (max(data[[variable]])))
  }
  
  cut(data[[variable]], knot_locat, labels = FALSE, right = right, ...) - 1
  
}

#' Simulate Time
#' 
#' This function simulates data for the time variable of longitudinal data.
#' 
#' @param n Sample size of the levels.
#' @param time_levels The values the time variable should take. If NULL (default),
#'   the time values are discrete integers starting at 0 and going to n - 1.
#' @param ... Currently not used.
#' 
#' @export 
sim_time <- function(n, time_levels = NULL, ...) {
  
  if(is.null(time_levels)) {
    do.call('c', 
            lapply(seq_along(n[['level1']]), function(xx) 
              0:(n[['level1']][xx] - 1)))
  } else {
    do.call('c', 
            lapply(seq_along(n[['level1']]), function(xx)
              time_levels[1:n[['level1']][xx]]))
  }
}

sim_variable <- function(var_type = c("continuous", "factor", "ordinal", 
                                      'time'), ...) {
  var_type <- match.arg(var_type)
  
  switch(var_type,
    continuous = sim_continuous2(...),
    factor = sim_factor2(...),
    ordinal = sim_factor2(...),
    #knot = sim_knot2(...),
    time = sim_time(...)
  )
}

#' Tidy fixed effect formula simulation
#' 
#' This function simulates the fixed portion of the model using a formula syntax.
#' 
#' @param data Data simulated from other functions to pass to this function. Can pass
#'  NULL if first in simulation string.
#' @param sim_args A named list with special model formula syntax. See details and examples
#'   for more information. The named list may contain the following:
#'   \itemize{
#'     \item fixed: This is the fixed portion of the model (i.e. covariates)
#'     \item random: This is the random portion of the model (i.e. random effects)
#'     \item error: This is the error (i.e. residual term).
#'   }
#' @param ... Other arguments to pass to error simulation functions.
#' @importFrom purrr modify_if
#' 
#' @export 
simulate_fixed <- function(data, sim_args, ...) {
  
  fixed_formula <- parse_formula(sim_args)[['fixed']]
  
  fixed_vars <- attr(terms(fixed_formula), "term.labels")  
  
  if(any(grepl('^factor\\(', fixed_vars))) {
    fixed_vars <- gsub("factor\\(|\\)$", "", fixed_vars)
  }
  if(any(grepl('^ns\\(', fixed_vars))) {
    fixed_vars <- gsub("ns\\(|bs\\(|\\,.+\\)$", "", fixed_vars)
  }
  if(any(grepl("^poly\\(", fixed_vars))) {
    fixed_vars <- gsub("poly\\(|\\,.+\\)", "", fixed_vars)
  }
  
  if(is.null(data)) {
    n <- sample_sizes(sim_args[['sample_size']])
    id_vars <- parse_randomeffect(parse_formula(sim_args)[['randomeffect']])[['cluster_id_vars']]
    cross_class <- parse_crossclass(sim_args, parse_randomeffect(parse_formula(sim_args)[['randomeffect']]))
    
    id_vars <- id_vars[id_vars %ni% cross_class[['cross_class_idvars']]]
    ids <- create_ids(n, 
                      c('level1_id', id_vars))
    Xmat <- purrr::invoke_map("sim_variable", 
                              sim_args[['fixed']],
                              n = n
    ) %>% 
      data.frame()
  } else {
    n <- compute_samplesize(data, sim_args)
    Xmat <- purrr::invoke_map("sim_variable", 
                              sim_args[['fixed']],
                              n = n
    ) %>% 
      data.frame()
  }
  if(!is.null(sim_args[['knot']])) {
    
    knot_names <- names(sim_args[['knot']])
    knot_loc <- grep(knot_names, fixed_vars)
    
    fixed_vars_knot <- fixed_vars[-knot_loc]
      
    if(any(grepl(":|^I", fixed_vars_knot))) {
      int_loc <- grep(":|^I", fixed_vars_knot)
      colnames(Xmat) <- fixed_vars_knot[-int_loc]
    } else {
      colnames(Xmat) <- fixed_vars_knot
    } 
    
    Xmat <- cbind(Xmat, 
                  simulate_knot(data = Xmat, sim_args = sim_args))
  }
  
  if(any(grepl(":|^I", fixed_vars))) {
    int_loc <- grep(":|^I", fixed_vars)
    colnames(Xmat) <- fixed_vars[-int_loc]
  } else {
    colnames(Xmat) <- fixed_vars
  } 

  if(any(unlist(lapply(seq_along(sim_args[['fixed']]), function(xx) 
    sim_args[['fixed']][[xx]]$var_type)) == 'factor') | 
    any(grepl("^ns|^poly", attr(terms(fixed_formula), "term.labels")))) {
    fixed_vars <- poly_ns_names(sim_args)
    
    if(any(grepl('^factor\\(', fixed_vars))) {
      fixed_vars <- gsub("factor\\(|\\)$", "", fixed_vars)
    }
    
    num_levels <- lapply(seq_along(sim_args[['fixed']]), function(xx) 
      sim_args[['fixed']][[xx]][['levels']])
    num_levels <- purrr::modify_if(num_levels, is.character, length)
    
    if(any(unlist(lapply(seq_along(sim_args[['fixed']]), function(xx) 
      num_levels[[xx]] > 1 & 
      sim_args[['fixed']][[xx]][['var_type']] == 'factor'))
      )) {
      fixed_vars <- factor_names(sim_args, fixed_vars)
    }
    
    Omat <- Xmat
    Xmat <- data.frame(model.matrix(fixed_formula, Xmat, ...))
    colnames(Xmat)[2:ncol(Xmat)] <- fixed_vars
    
    if(any(unlist(lapply(seq_along(sim_args[['fixed']]), function(xx) 
      sim_args[['fixed']][[xx]]$var_type)) == 'factor')) {
      Omat_factor <- Omat[unique_columns(Xmat, Omat)]
      Omat_factor_names <- attr(terms(fixed_formula), "term.labels")
      if(any(grepl(":|^I", Omat_factor_names))) {
        Omat_factor_names <- Omat_factor_names[-int_loc]
      }
    } else {
      Omat_factor <- NULL
    }
    if(any(grepl("^ns|^poly", attr(terms(fixed_formula), "term.labels")))) {
      fixed_vars_new <- attr(terms(fixed_formula), "term.labels") 
      fixed_vars_poly_ns <- gsub("poly\\(|\\,.+\\)|ns\\(|\\,.+\\)", "", 
                                 fixed_vars_new[grepl("^poly|^ns", fixed_vars_new)]
      )
      Omat_poly_ns <- Omat[ , fixed_vars_poly_ns, drop = FALSE]
    } else {
      Omat_poly_ns <- NULL
    }
    
    Omat <- dplyr::bind_cols(Omat_factor, Omat_poly_ns)
    
    Xmat <- dplyr::bind_cols(Xmat, Omat)
  } else {
    Xmat <- data.frame(model.matrix(fixed_formula, Xmat, ...))
    
    if(grepl("^0", as.character(fixed_formula)[2])) {
      colnames(Xmat)[1:ncol(Xmat)] <- fixed_vars
    } else {
      colnames(Xmat)[2:ncol(Xmat)] <- fixed_vars
    }
  }
  
  if(is.null(data)) {
    data.frame(Xmat, ids)
  } else {
    data.frame(data, Xmat)
  }
}


#' Tidy heterogeneity of variance simulation
#' 
#' This function simulates heterogeneity of level one error variance.
#' 
#' @param data Data simulated from other functions to pass to this function. 
#' This function needs to be specified after `simulate_fixed` and `simulate_error`.
#' @param sim_args A named list with special model formula syntax. See details and examples
#'   for more information. The named list may contain the following:
#'   \itemize{
#'     \item fixed: This is the fixed portion of the model (i.e. covariates)
#'     \item random: This is the random portion of the model (i.e. random effects)
#'     \item error: This is the error (i.e. residual term).
#'   }
#' @param ... Other arguments to pass to error simulation functions.
#' @export
simulate_heterogeneity <- function(data, sim_args, ...) {
  
  heterogeneity_error <- heterogeneity(variance = sim_args[['heterogeneity']][['variance']],
                fixef = data, 
                variable = sim_args[['heterogeneity']][['variable']],
                err = data[['error']])

  data.frame(data[, !(names(data) %in% 'error')], 
             error = heterogeneity_error, orig_error = data[['error']])
  
}

Any scripts or data that you put into this service are public.

simglm documentation built on Feb. 7, 2022, 9:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

simglm
Simulate Models Based on the Generalized Linear Model

R/fixef_sim.r
In simglm: Simulate Models Based on the Generalized Linear Model

Defines functions simulate_heterogeneity simulate_fixed sim_variable sim_time sim_knot2 simulate_knot sim_continuous2 sim_factor2

Documented in sim_continuous2 sim_factor2 sim_time simulate_fixed simulate_heterogeneity simulate_knot

Try the simglm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

simglm Simulate Models Based on the Generalized Linear Model

R/fixef_sim.r In simglm: Simulate Models Based on the Generalized Linear Model

Defines functions simulate_heterogeneity simulate_fixed sim_variable sim_time sim_knot2 simulate_knot sim_continuous2 sim_factor2

Documented in sim_continuous2 sim_factor2 sim_time simulate_fixed simulate_heterogeneity simulate_knot

Try the simglm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

simglm
Simulate Models Based on the Generalized Linear Model

R/fixef_sim.r
In simglm: Simulate Models Based on the Generalized Linear Model