old_code/createDBdata.R

###### simulate data vor use in DB-Testing/ importing in SQLite

### libraries
library(purrr)
library(data.table)
library(devtools)

# 4 layers: Student, Teacher, Class, School
# all with imputed and not imputed data
# all with keys

# function for each layer
simulateData <- function(N, lvl_name, n_unimp, n_imp, imps, foreignKey_name, foreignKey_N) {
  IDs <- 1:N
  # simulate unimputed variables
  unimp_Vars <- simulateSingleFrame(N = N, n_vars = n_unimp, namePrefix = lvl_name)
  # simulate imputed variables
  impVars <- rerun(imps, simulateSingleFrame(N = N, n_vars = n_imp, namePrefix = paste("imp", lvl_name, sep = "_")))
  impVars <- listpos2imp(impVars)
  # create foreign ID variable
  hIDs <- vector("integer", N)
  hIDs[] <- 1:foreignKey_N

  # create seperate data sets (unimputed, imputed)
  unimp_dat <- data.table(IDs, unimp_Vars, hIDs)
  imp_list <- lapply(impVars, function(impDat) data.table(IDs, impDat))
  imp_dat <- do.call(rbind, imp_list)

  # name id-variables
  names(unimp_dat)[1] <- paste("ID", lvl_name, sep = "_")
  names(imp_dat)[1] <- paste("ID", lvl_name, sep = "_")
  names(unimp_dat)[ncol(unimp_dat)] <- paste("ID", foreignKey_name, sep = "_")

  # create list with both data.frames
  twoList <- list(unimp_dat, imp_dat)
  listNames <- c("unimp", "imp")
  names(twoList) <- paste(lvl_name, listNames, sep = "_")
  twoList
}

# simulate a single data frame (for unimputed variables or a single imputation for imputed variables)
simulateSingleFrame <- function(N, n_vars, IDs, namePrefix = "var") {
  values <- as.data.table(rerun(n_vars, rnorm(N, 0, 1)))
  names(values) <- paste(namePrefix, 1:n_vars, sep = "_")
  values
}

## add counter to imputed data set("imputation variable")
listpos2imp <- function(datList) {
  for(i in seq_along(datList)) {
    datList[[i]]$n_imp <- i
  }
  datList
}

simulateData(50, "stud", 3, 3, 5, "class", 30)


### wrapper
simulate_DB_list <- function(lvl_names, lvl_N) {
  # checks:
  stopifnot(length(lvl_names) == length(lvl_N))

  l <- length(lvl_names)
  # modification for last data set
  lvl_names[l + 1] <- "dummy"
  lvl_N[l + 1] <- 1

  datList <- vector("list")
  for(i in 1:l) {
    newElements <- simulateData(N = lvl_N[i], lvl_name = lvl_names[i], n_unimp = 3, n_imp = 3, imps = 5,
                               hlvl_name = lvl_names[i+1], N_higherKey = lvl_N[i+1])
    datList <- c(datList, newElements)
  }
  datList
}

sim_DB_list <- simulate_DB_list(lvl_names = c("stud", "teach", "class", "sch"), lvl_N = c(500, 50, 25, 13))
str(sim_DB_list)

## save as csv's
?write.csv

for(i in seq_along(sim_DB_list)) {
  write.csv(sim_DB_list[[i]], file = paste("data/", names(sim_DB_list)[i], ".csv", sep = ""),
            row.names = F)
}
b-becker/eatGADS documentation built on May 24, 2019, 8:47 p.m.