#' Generate a multilevel long-form data frame
#'
#' @description This function builds a long-form data set with \code{n}
#' observations from \code{j} units.
#'
#' @details The function creates a data frame containing variables \code{id},
#' \code{y}, and \code{V#} with the number of fixed effects. The user can
#' specify the number of level 2 covariates. The data are generated in two
#' steps, first the level 2 data are generated, which are constant per unit
#' then the ordering of units is created using the \code{sample} function,
#' with \code{replace = TRUE}. This results in unequal number of observations
#' per unit, altough each unit has equal probabilty of being sampled. Lastly,
#' level 1 data are generated by drawing from the normal distribution.
#'
#' @param n Number of observations.
#' @param j Number of individuals.
#' @param fixed_coef Vector with fixed effects coefficients.
#' @param random_coef_sd Vector with standard deviations of the random effects.
#' @param resid_sd Scalar, residual variance.
#' @param n_level_2_var Number of level 2 variables.
#' @param mean_fixed_level_2 Means of the fixed effects covariates, level 2.
#' @param sd_fixed_level_2 Standard Deviation of the fixed effects covariates,
#' level 2.
#' @param mean_fixed_level_1 Means of the fixed effects covariates, level 1.
#' @param sd_fixed_level_1 Standard deviation of the fixed effects covariates,
#' level 1.
#' @keywords multilevel dataset
#' @export
#' @examples
#' ## We create a dataset, consisting of 2500 observations from 20
#' ## units. The fixed effects have the coefficients 1, 2, 3, 4, and 5. The
#' ## variance of the random effects equals 1, 4, and 9. Lastly the
#' ## residual variance equals 4:
#'
#' test_data <- build_dataset(n = 2500,
#' j = 20,
#' fixed_coef = 1:5,
#' random_coef_sd = 1:3,
#' resid_sd = 2)
#' @return A data frame with variable \code{id}, which labels the units,
#' \code{y} is the outcome or dependent variable and covariates.
build_dataset <- function(n,
j,
fixed_coef,
random_coef_sd,
resid_sd,
n_level_2_var = 2,
mean_fixed_level_2 = 0,
sd_fixed_level_2 = 1,
mean_fixed_level_1 = 0,
sd_fixed_level_1 = 1){
level_2_data_j <- build_person_data(j,
n_level_2_var,
mean_fixed_level_2,
sd_fixed_level_2)
coef_data <- cbind(matrix(rep(fixed_coef, each = j), nrow = j),
build_coef_data(j, random_coef_sd))
n_fixed_var <- length(fixed_coef)
n_random_var <- length(random_coef_sd)
n_level_1 <- n_fixed_var - n_level_2_var
ids <- sample(1:j, n, replace = TRUE)
coef_dataset <- coef_data[ids, ]
level_2_data <- level_2_data_j[ids, ]
dataset <- as.data.frame(cbind(id = ids, level_2_data[, -1]))
if(n_level_1 > 0){
temp <- matrix(nrow = n, ncol = n_level_1)
if(length(mean_fixed_level_1) == 1){
mean_fixed_level_1 <- rep(mean_fixed_level_1, n_level_1)
sd_fixed_level_1 <- rep(sd_fixed_level_1, n_level_1)
}
for(t in 1:n_level_1){
dataset[, (1 + dim(dataset)[2])] <- temp[, t] <- stats::rnorm(
n,
mean_fixed_level_1[t],
sd_fixed_level_1[t])
}
dataset <- cbind(dataset, "z0" = 1, temp[, 1:(n_random_var - 1)])
}
dataset$y <- rowSums(dataset[, -1] * coef_dataset) + stats::rnorm(n, 0,
resid_sd)
dataset <- dataset[, c(1, dim(dataset)[2], 2:(1 + n_fixed_var))]
return(dataset)
}
#' build_person_data is a function to create level 2 data.
#' @param j Number of units.
#' @param n_level_2_var Number of level 2 variables.
#' @param mean_fixed_level_2 A vector with means for the level 2 covariates.
#' @param sd_fixed_level_2 A vector with standard deviations for the level 2
#' covariates.
#' @return A data frame with level 2 data for the units.
build_person_data <- function(j,
n_level_2_var,
mean_fixed_level_2,
sd_fixed_level_2){
person_data <- matrix(NA, nrow = j, ncol = (1 + n_level_2_var))
person_data[, 1:2] <- cbind(1:j, 1)
if(n_level_2_var == 1){
return(person_data)
}
else{
for(t in 1:(n_level_2_var - 1)){
person_data[, t + 2] <- stats::rnorm(j,
mean_fixed_level_2[t],
sd_fixed_level_2[t])
}
}
return(person_data)
}
#' build_coef_data creates a data set with random coefficients per unit.
#' @param j Number of units.
#' @param random_coef_sd A vector with true variance of the random effects.
#' @return A data frame random coefficients for all random effects for
#' all units.
build_coef_data <- function(j,
random_coef_sd){
coef_random <- matrix(NA, nrow = j, ncol = length(random_coef_sd))
for(i in 1:length(random_coef_sd)){
coef_random[, i] <- stats::rnorm(j, 0, random_coef_sd[i])
}
return(coef_random)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.