inst/doc/simulating-data.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----message=FALSE, warning=FALSE---------------------------------------------
library(holodeck)
library(dplyr)
library(purrr)
library(mice)

## -----------------------------------------------------------------------------
df <- tibble(Y = rep(c("a", "b"), each = 5))

df %>% sim_covar(n_vars = 5, var = 1, cov = 0.5)

## -----------------------------------------------------------------------------
sim_covar(n_obs = 10, n_vars = 5, var = 1, cov = 0.5)

## -----------------------------------------------------------------------------
sim_cat(n_obs = 10, n_groups = 2)

## -----------------------------------------------------------------------------
df %>%
  group_by(Y) %>% 
  sim_discr(n_vars = 5, var = 1, cov = 0.1, group_means = c(1, -1))

## -----------------------------------------------------------------------------
df <-
  sim_covar(n_obs = 20, n_vars = 5, var = 1, cov = 0.1, name = "low") %>% #5 variables with low covariance
  sim_covar(n_vars = 5, var = 1, cov = 0.8, name = "high")            #5 variables with high covariance

## -----------------------------------------------------------------------------
df1 <-
  df %>% 
  sim_cat(n_groups = 2, name = "factor") %>% 
  group_by(factor) %>% 
  sim_discr(n_vars = 5, var = 1, cov = 0.1, group_means = c(-1, 1), name = "discr") %>% 
  ungroup()

## -----------------------------------------------------------------------------
df2 <-
  df1 %>% 
  sim_missing(prop = 0.1)
df2

## ----fig.width=6, fig.height=5------------------------------------------------
library(ggplot2)
df1 %>%
  select(-factor) %>% 
  cov() %>% 
  heatmap(Rowv = NA, Colv = NA, symm = TRUE)

## -----------------------------------------------------------------------------
df2 <- 
  sim_cat(n_obs = 40, n_groups = 3, name = "factor") %>% 
  sim_covar(n_vars = 3, var = 1, cov = 0.0, name = "noise") %>% 
  group_by(factor) %>% 
  sim_discr(n_vars = 5, var = 1, cov = 0, group_means = c(-1, 0, 1), name  = "signal") %>%
  sim_discr(n_vars = 5, var = 1, cov = 0, group_means = c(0, 0.5, 1), name = "signal2") %>% 
  ungroup()
df2

## -----------------------------------------------------------------------------
set.seed(100)
dfs <-
  map(1:20, 
      ~sim_cat(n_obs = 40, n_groups = 3, name = "factor") %>% 
        sim_covar(n_vars = 3, var = 1, cov = 0.0, name = "noise") %>% 
        group_by(factor) %>% 
        sim_discr(n_vars = 5, var = 1, cov = 0, group_means = c(-1, 0, 1), name  = "signal") %>%
        sim_discr(n_vars = 5, var = 1, cov = 0, group_means = c(0, 0.5, 1), name = "signal2") %>% 
        ungroup())

## -----------------------------------------------------------------------------
set.seed(101)
dfs.missing <-
  map(dfs, ~sim_missing(., prop = 0.05))

## ----warning=FALSE------------------------------------------------------------
# this might take a few seconds
dfs.imputed <-
  map(dfs.missing, ~mice(., printFlag = FALSE) %>% complete())

## -----------------------------------------------------------------------------
head(dfs[[1]])
head(dfs.missing[[1]])
head(dfs.imputed[[1]])

Try the holodeck package in your browser

Any scripts or data that you put into this service are public.

holodeck documentation built on Aug. 26, 2023, 1:07 a.m.