library(plyr)
library(dplyr)
library(tidyr)
library(haven)
setwd("C:/Users/benja/OneDrive - Cornell University/GitHub/ineqx")
crDat <- function(N.T, N.G, LP_n, LP_x, LP_mu, LP_sigma, x_const=F, t_vals=c(0,1), yp1=F, seed=T, sav=F) {
# Arguments ------------------------------------------------------------------------------------ #
# N.T = Number of time points
# N.G = Number of groups
# LP_n = "Linear predictor" for n per group. This can be just numbers as characters
# LP_n = c("300", "300", "300") or functions LP_n = c("300-10*year", "300", "300")
# LP_x = Proportion of X=0 as "linear predictor". E.g., LP_x = c("0.1+0.1*i", "0.5", "0.9-0.1*i")
# LP_mu = Linear predictor for mean, e.g.
# LP_sigma = Linear predictor for standard deviation
# x_const = {T|F} If True, distribution of X does not change across waves
# t_vals = Vector with numeric values, e.g. c(0,1) or c(0,1,2)
# yp1 = {T|F} IF True, {t=0, year}, {t=1, year+1}
# seed = {T|F}
# sav = {T|F}
# ---------------------------------------------------------------------------------------------- #
# N.T=3; N.G=3; LP_n = c("2", "2", "2"); LP_x <- c("0.5-0.1*i", "0.5", "0.5"); LP_mu = "10*(group==1)*x*t + 20*(group==2)*x*t + 30*(group==3)*x*t"; LP_sigma = "1"; x_const = F; x_vals=c(0,1); t_vals=c(0,1); yp1=F; seed=F; sav=F;
if(seed==T) set.seed(1)
if(isTRUE(sav)) stop("sav must be 'R', 'Stata', or FALSE.")
# Dataframe with number of observations per wave
N <-
tibble() %>%
tidyr::expand(year=1:N.T)
for(i in 1:N.G) {
N <-
N %>%
dplyr::mutate("G{i}" := eval(parse(text=LP_n[i])))
}
# Create data structure ------------------------------------------------------------------------ #
d <- replicate(N.T, vector("list", N.G), simplify = FALSE)
for(j in 1:N.G) { # per group
for(i in 1:N.T) { # per time
N.ij <- N %>% dplyr::filter(year==i) %>% dplyr::select(paste0("G", j)) %>% unlist() %>% as.vector()
if(x_const & i > 1) d[[i]][[j]] <- d[[1]][[j]] %>% dplyr::mutate(year=i)
else {
d[[i]][[j]] <-
tibble() %>%
tidyr::expand(id=1:N.ij, year=i, group=j, x=1, t=t_vals) %>%
dplyr::mutate(x=case_when(id %in% sample(1:N.ij, N.ij*eval(parse(text=LP_x[j]))) & x>0 ~ 0, TRUE ~ as.double(x)))
}
}
}
# Create dataframe from list of lists
dat <- ldply(d, .fun = ldply)
# {t=0, year}, {t=1, year+1}?
if(yp1) dat <- dat %>% dplyr::mutate(year=case_when(t>0 ~ as.double(year+1), TRUE ~ as.double(year)))
n <- dim(dat)[1]
# Create income -------------------------------------------------------------------------------- #
incdat <-
dat %>%
tibble() %>%
dplyr::mutate(
inc = rnorm(n, eval(parse(text=LP_mu)), eval(parse(text=LP_sigma))),
lninc = log(inc-(min(inc)-1)*(min(inc)<0))
)
incdat <- incdat %>% dplyr::arrange(year, group, id, x, t) # sort
if(sav=="R") save(incdat, file = "data/incdat.RData") else if (sav=="Stata") write_dta(incdat, path = "data/incdat.dta")
return(incdat)
}
# testDat <-
# crDat(
# N.T=5,
# N.G=3,
# LP_x = c("0.1", "0.1", "0.1"),
# LP_n = c("1000", "1000", "1000-(year>3)*800"),
# LP_mu = "10+100*(group==2)*(year>2)+100*(group==1)*x*t+150*(group==2)*x*t+20*(group==3)*x*t*year",
# LP_sigma = "1+10*(group==1)*x*t+10*(group==2)*x*t+10*(group==3)*x*t+10*(group==3)*x*t*year",
# x_const=F,
# t_vals=c(0,1),
# yp1=F,
# seed=T,
# sav=F
# )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.