old_code/createData.R

###### simulate data vor use in GADS-Testing/Help/Understanding

### libraries
library(purrr)
library(data.table)
library(devtools)

# 4 layers: Student, Class, Teacher, School
# all with imputed and not imputed data
# all with keys

# function for each layer
simulateData <- function(N, lvl_name, n_unimp, n_imp, imps, hlvl_name, N_higherKey) {
  IDs <- 1:N
  # simulate unimputed variables
  unimp_Vars <- simulateSingleFrame(N = N, n_vars = n_unimp, namePrefix = lvl_name)
  # simulate imputed variables
  impVars <- rerun(imps, simulateSingleFrame(N = N, n_vars = n_imp, namePrefix = paste("imp", lvl_name, sep = "_")))
  impVars <- listpos2imp(impVars)
  # create higher order ID variable
  hIDs <- vector("integer", N)
  hIDs[] <- 1:N_higherKey

  # create seperate data sets (unimputed, imputed)
  unimp_dat <- data.table(IDs, unimp_Vars, hIDs)
  imp_list <- lapply(impVars, function(impDat) data.table(IDs, impDat))
  imp_dat <- do.call(rbind, imp_list)

  # name id-variables
  names(unimp_dat)[1] <- paste("ID", lvl_name, sep = "_")
  names(imp_dat)[1] <- paste("ID", lvl_name, sep = "_")
  names(unimp_dat)[ncol(unimp_dat)] <- paste("ID", hlvl_name, sep = "_")

  # create list with both data.frames
  twoList <- list(unimputed = unimp_dat, imputed = imp_dat)
  twoList
}

# simulate a single data frame (for unimputed variables or a single imputation for imputed variables)
simulateSingleFrame <- function(N, n_vars, IDs, namePrefix = "var") {
  values <- as.data.table(rerun(n_vars, rnorm(N, 0, 1)))
  names(values) <- paste(namePrefix, 1:n_vars, sep = "_")
  values
}

## add counter to imputed data set("imputation variable")
listpos2imp <- function(datList) {
  for(i in seq_along(datList)) {
    datList[[i]]$n_imp <- i
  }
  datList
}


### simulate data sets for each layer
#simulateData(N = 50, lvl_name = "stud", n_unimp = 3, n_imp = 3, imps = 5, N_higherKey = 15, hlvl_name = "class")

### wrapper
simulate_RDL <- function(lvl_names, lvl_N) {
  # checks:
  stopifnot(length(lvl_names) == length(lvl_N))

  l <- length(lvl_names)
  # modification for last data set
  lvl_names[l + 1] <- "dummy"
  lvl_N[l + 1] <- 1

  datList <- vector("list", l)
  for(i in 1:l) {
    datList[[i]] <- simulateData(N = lvl_N[i], lvl_name = lvl_names[i], n_unimp = 3, n_imp = 3, imps = 5,
                 hlvl_name = lvl_names[i+1], N_higherKey = lvl_N[i+1])
  }
  names(datList) <- lvl_names[1:l]
  datList
}

## simulate relational data list!
sim_RDL <- simulate_RDL(lvl_names = c("stud", "class", "teach", "sch"), lvl_N = c(500, 50, 25, 13))
sim_RDL[[4]]$unimputed <- sim_RDL[[4]]$unimputed[, !(names(sim_RDL[[4]]$unimputed) %in% "ID_dummy"), with = F]

str(sim_RDL)

# save data for package
use_data(sim_RDL, overwrite = T)
b-becker/eatGADS documentation built on May 24, 2019, 8:47 p.m.