# some utility functions on the server
res.load <- function(name) {
destfile = sprintf("%s/%s.dat",local_opts$wdir,name)
flog.info("loading %s",destfile)
# check that dependent resource exists
if (!file.exists(destfile)) {
flog.error("Resource %s does not exists, you need to run the code that creates it.",name)
}
load(destfile)
return(value)
}
res.exists <- function(name) {
destfile = sprintf("%s/%s.dat",local_opts$wdir,name)
file.exists(destfile)
}
res.save <- function(name,value) {
destfile = sprintf("%s/%s.dat",local_opts$wdir,name)
flog.info("saving to %s",destfile)
save(value,file=destfile)
}
get.stats.clusters <- function(data,movers=FALSE,ydep="y1") {
rr = list()
rr$nwid = data[,length(unique(wid))] # unique worker
rr$nfid = data[,length(unique(f1))]
# creating firm info from 2002
fdata = data[,list(.N,ind=ind1[1],size1=size1[1],va1=va1[1],Nm=sum(move==TRUE)),f1]
rr$nfirm_actualsize_ge10 = fdata[N>=10,.N]
rr$nfirm_actualsize_ge50 = fdata[N>=50,.N]
rr$nfirm_reportedsize_ge10 = fdata[size1>=10,.N]
rr$nfirm_reportedsize_ge50 = fdata[size1>=50,.N]
rr$nfirm_movers_ge1 = fdata[Nm>=1,.N]
rr$nfirm_movers_ge5 = fdata[Nm>=5,.N]
rr$nfirm_movers_ge10 = fdata[Nm>=10,.N]
rr$firm_reportedsize_mean = fdata[,mean(size1)]
rr$firm_reportedsize_median = fdata[,median(size1)*1.0]
rr$firm_actualsize_mean = fdata[,mean(N)]
rr$firm_actualsize_median = fdata[,median(N)*1.0]
rr$firm_reportedsize_median_worker = data[,median(size1)*1.0]
rr$firm_actualsize_median_worker = data[,median(asize)*1.0]
rr$firm_mean_log_va = fdata[va1>0,mean(log(va1))]
rr$firm_var_log_va = fdata[va1>0,var(log(va1))]
rr$firm_neg_va = fdata[va1<=0,.N]
rr$worker_share_educ1 = data[,mean(educ==1)]
rr$worker_share_educ2 = data[,mean(educ==2)]
rr$worker_share_educ3 = data[,mean(educ==3)]
rr$worker_var_log_wage = data[,var(get(ydep))]
rr$worker_mean_log_wage = data[,mean (get(ydep))]
rr$worker_skewness_log_wage = data[,skewness(get(ydep))]
rr$worker_kurtosis_log_wage = data[,kurtosis(get(ydep))]
rr$worker_share_ind_manu = data[,mean(ind1=="Manufacturing")]
rr$worker_share_ind_serv = data[,mean(ind1=="Services")]
rr$worker_share_ind_retail = data[,mean(ind1=="Retail trade")]
rr$worker_share_ind_cons = data[,mean(ind1=="Construction etc.")]
# between firm variance
rr$between_firm_wage_var = data[, var(fmw)]
rr$worker_share_age_0_30 = data[,mean(age<=30)]
rr$worker_share_age_31_50 = data[,mean(age>=31 & age<=50)]
rr$worker_share_age_51_inf = data[,mean(age>=51)]
return(rr)
}
compute.mob.matrix <- function(model,nsim=1e7) {
# we simulate serveral time, and collect based on the realization of epsilon in period 2
NNm = model$NNm
NNs = model$NNs
NNm[!is.finite(NNm)]=0
NNs[!is.finite(NNs)]=0
stayer_share = sum(10*NNs)/(sum(10*NNs)+sum(NNm))
NNs = round(NNs*nsim*stayer_share/sum(NNs))
NNm = round(NNm*nsim*(1-stayer_share)/sum(NNm))
# we simulate from the model both movers and stayers
sdata.sim = m4.mixt.simulate.stayers(model,NNs)
jdata.sim = m4.mixt.simulate.movers(model,NNm)
data.sim = rbind(sdata.sim[,list(k,j1,j2=j1,y2,m=0)],jdata.sim[,list(k,j1,j2,y2,m=1)])
# make sure the cluster are ordered
#If = rank(data.sim[,mean(y2),j1][order(j1),V1])
#data.sim[,j1 := If[j1]][,j2:=If[j2]]
rm(sdata.sim,jdata.sim)
# compute the residuals
data.sim[,er:=rank(y2)/.N,list(j1,k)]
# aggregate clusters
ff = function(x) {
if (x<=3) return("k=1..3");
if (x<=7) return("k=4..7");
return("k=8..10");
}
data.sim[,j1g := ff(j1),j1]
data.sim[,j2g := ff(j2),j2]
# code j2=0 when m=0
data.sim[m==0,j2g:="0"]
M1 = acast(data.sim[(er<0.1),.N,list(j1g,j2g,m)],j1g~j2g,fill=0,value.var = "N")
M1 = M1/spread(rowSums(M1),2,4); M1[,1]=1-M1[,1]
M2 = acast(data.sim[,.N,list(j1g,j2g,m)],j1g~j2g,fill=0,value.var = "N")
M2 = M2/spread(rowSums(M2),2,4); M2[,1]=1-M2[,1]
M3 = acast(data.sim[(er>.9),.N,list(j1g,j2g,m)],j1g~j2g,fill=0,value.var = "N")
M3 = M3/spread(rowSums(M3),2,4); M3[,1]=1-M3[,1]
d1 = melt(M1,c('j1','j2'))
d1$cond=1
d2 = melt(M2,c('j1','j2'))
d2$cond=2
d3 = melt(M3,c('j1','j2'))
d3$cond=3
return(data.table(rbind(d1,d2,d3)))
}
analysis.dynamic.dec <- function(model) {
nsim=1e7
NNm = model$NNm
NNs = model$NNs
NNm[!is.finite(NNm)]=0
NNs[!is.finite(NNs)]=0
stayer_share = sum(10*NNs)/(sum(10*NNs)+sum(NNm))
NNs = round(NNs*nsim*stayer_share/sum(NNs))
NNm = round(NNm*nsim*(1-stayer_share)/sum(NNm))
# we only use movers here
jdata.sim = m4.mixt.simulate.movers(model,20*NNm)
# depdendence effect
res = list()
# Y2
jdata.sim[,v1 := var(y2),list(j1,j2,k)]
res$pe_y2_c2 = jdata.sim[,mean(v1,na.rm=T)]
jdata.sim[,m2 := mean(y2),list(j1,j2,k)]
jdata.sim[,v2 := var(m2),list(j1,k)]
res$pe_y2_c1 = jdata.sim[,mean(v2,na.rm=T)]
jdata.sim[,v3 := var(y2), list(j1,k)]
res$pe_y2_tot = jdata.sim[,mean(v3,na.rm=T)]
# Y1
jdata.sim[,v1 := var(y1),list(j1,j2,k)]
res$pe_y1_c2 = jdata.sim[,mean(v1,na.rm=T)]
jdata.sim[,m2 := mean(y1),list(j1,j2,k)]
jdata.sim[,v2 := var(m2),list(j1,k)]
res$pe_y1_c1 = jdata.sim[,mean(v2,na.rm=T)]
jdata.sim[,v3 := var(y1), list(j1,k)]
res$pe_y1_tot = jdata.sim[,mean(v3,na.rm=T)]
# Y3
jdata.sim[,v1 := var(y3),list(j1,j2,k)]
res$pe_y3_c2 = jdata.sim[,mean(v1,na.rm=T)]
jdata.sim[,m2 := mean(y3),list(j1,j2,k)]
jdata.sim[,v2 := var(m2),list(j2,k)]
res$pe_y3_c1 = jdata.sim[,mean(v2,na.rm=T)]
jdata.sim[,v3 := var(y3), list(j2,k)]
res$pe_y3_tot = jdata.sim[,mean(v3,na.rm=T)]
# also compute y1,y4
jdata.sim[,v1 := var(y4),list(j1,j2,k)]
res$pe_y4_c2 = jdata.sim[,mean(v1,na.rm=T)]
jdata.sim[,m2 := mean(y4),list(j1,j2,k)]
jdata.sim[,v2 := var(m2),list(j2,k)]
res$pe_y4_c1 = jdata.sim[,mean(v2,na.rm=T)]
jdata.sim[,v3 := var(y4), list(j2,k)]
res$pe_y4_tot = jdata.sim[,mean(v3,na.rm=T)]
# --------- firm effect ------------- #
jdata.sim[,m3 := mean(y2),list(j1,k)]
jdata.sim[,v3 := var(m3),list(k)]
res$fe_y2 = jdata.sim[,mean(v3)]
jdata.sim[,m3 := mean(y3),list(j2,k)]
jdata.sim[,v3 := var(m3),list(k)]
res$fe_y3 = jdata.sim[,mean(v3)]
jdata.sim[,m3 := mean(y1),list(j2,k)]
jdata.sim[,v3 := var(m3),list(k)]
res$fe_y1 = jdata.sim[,mean(v3)]
jdata.sim[,m3 := mean(y4),list(j2,k)]
jdata.sim[,v3 := var(m3),list(k)]
res$fe_y4 = jdata.sim[,mean(v3)]
# --------- network effect ------------- #
# we need to compute the following 2 terms
# sum_{k'}p(k'|k)E(Y3|k') + sum_{k'}p(k'|k)(E(Y3|k',k)-E(Y3|k'))
# first term
jdata.sim[, m1 := mean(y3), list(k,j2)]
jdata.sim[, a1 := mean(m1), list(k,j1)]
# last term
jdata.sim[, m2 := mean(y3), list(k,j1,j2)]
jdata.sim[, a2 := mean(m2-m1), list(k,j1)]
# total
jdata.sim[, m3 := mean(y3), list(k,j1)]
rr3 = jdata.sim[, { vv = cov(cbind(a1,a2)); data.frame(vot=var(m3),v1=vv[1,1],v2=vv[2,2],cc=vv[1,2],N=.N)},k]
rr3 = rr3[,list(v1=wt.mean(v1,N),v2=wt.mean(v2,N),cc=wt.mean(cc,N))]
res$ne_y3_v1 = rr3$v1
res$ne_y3_v2 = rr3$v2
res$ne_y3_cov = rr3$cc
# first term
jdata.sim[, m1 := mean(y4), list(k,j2)]
jdata.sim[, a1 := mean(m1), list(k,j1)]
# last term
jdata.sim[, m2 := mean(y4), list(k,j1,j2)]
jdata.sim[, a2 := mean(m2-m1), list(k,j1)]
# total
jdata.sim[, m3 := mean(y4), list(k,j1)]
rr4 = jdata.sim[, { vv = cov(cbind(a1,a2)); data.frame(vot=var(m3),v1=vv[1,1],v2=vv[2,2],cc=vv[1,2],N=.N)},k]
rr4 = rr4[,list(v1=wt.mean(v1,N),v2=wt.mean(v2,N),cc=wt.mean(cc,N))]
res$ne_y4_v1 = rr4$v1
res$ne_y4_v2 = rr4$v2
res$ne_y4_cov = rr4$cc
return(data.frame(res))
}
analysis.dynamic.dec.bis <- function(model,nsim=1e7) {
NNm = model$NNm
NNs = model$NNs
NNm[!is.finite(NNm)]=0
NNs[!is.finite(NNs)]=0
stayer_share = sum(10*NNs)/(sum(10*NNs)+sum(NNm))
NNs = round(NNs*nsim*stayer_share/sum(NNs))
NNm = round(NNm*nsim*(1-stayer_share)/sum(NNm))
NNm = round(nsim * NNm/sum(NNm))
# we only use movers here
jdata.sim = m4.mixt.simulate.movers(model,NNm)
#sdata.sim = m4.mixt.simulate.stayers(model,NNs)
# reorder
#If = rank(sdata.sim[,mean(y2),j1][order(j1),V1])
#jdata.sim[,j1 := If[j1]][,j2:=If[j2]]
# ----- Var(Y1), Y2, Y3, Y4 within alpha ------- #
res = list()
v_y1_unc = jdata.sim[,v_y1 := var(y1),k][,mean(v_y1)]
v_y2_unc = jdata.sim[,v_y2 := var(y2),k][,mean(v_y2)]
v_y3_unc = jdata.sim[,v_y3 := var(y3),k][,mean(v_y3)]
v_y4_unc = jdata.sim[,v_y4 := var(y4),k][,mean(v_y4)]
res$v_y1_unc=v_y1_unc
res$v_y2_unc=v_y2_unc
res$v_y3_unc=v_y3_unc
res$v_y4_unc=v_y4_unc
#jdata.sim[,v_y2 := var(y2),k]
#jdata.sim[,v_y3 := var(y3),k]
#jdata.sim[,v_y4 := var(y4),k]
# ----- Wa Bk of Y1, Y2 ----- #
jdata.sim[, m1 := mean(y1), list(k,j1)]
jdata.sim[, v1 := var(m1), list(k)]
res$y1_Wa_Bk1 = jdata.sim[, mean(v1/v_y1_unc)]
jdata.sim[, m1 := mean(y2), list(k,j1)]
jdata.sim[, v1 := var(m1), list(k)]
res$y2_Wa_Bk1 = jdata.sim[, mean(v1/v_y2_unc)]
# ------- Wa,k Bk' of Y1,Y2 ------- #
jdata.sim[, m1 := mean(y1), list(k,j1,j2)]
jdata.sim[, v1 := var(m1), list(k,j1)]
res$y1_Wak1_Bk2 = jdata.sim[, mean(v1/v_y1_unc)]
jdata.sim[, m1 := mean(y2), list(k,j1,j2)]
jdata.sim[, v1 := var(m1), list(k,j1)]
res$y2_Wak1_Bk2 = jdata.sim[, mean(v1/v_y2_unc)]
# ---------- Wa,k,k' of Y1,Y2 -------- #
jdata.sim[, v1 := var(y1), list(k,j1,j2)]
jdata.sim[is.na(v1),v1:=0]
res$y1_Wak1k2 = jdata.sim[, mean(v1/v_y1_unc)]
jdata.sim[, v1 := var(y2), list(k,j1,j2)]
jdata.sim[is.na(v1),v1:=0]
res$y2_Wak1k2 = jdata.sim[, mean(v1/v_y2_unc)]
# ----- Wa Bk' of Y3, Y4 ----- #
jdata.sim[, m1 := mean(y3), list(k,j2)]
jdata.sim[, v1 := var(m1), list(k)]
res$y3_Wa_Bk2 = jdata.sim[, mean(v1/v_y3_unc)]
jdata.sim[, m1 := mean(y4), list(k,j2)]
jdata.sim[, v1 := var(m1), list(k)]
res$y4_Wa_Bk2 = jdata.sim[, mean(v1/v_y4_unc)]
# ------- Wa,k' Bk of Y3,Y4 ------- #
jdata.sim[, m1 := mean(y3), list(k,j1,j2)]
jdata.sim[, v1 := var(m1), list(k,j2)]
res$y3_Wak2_Bk1 = jdata.sim[, mean(v1/v_y3_unc)]
jdata.sim[, m1 := mean(y4), list(k,j1,j2)]
jdata.sim[, v1 := var(m1), list(k,j2)]
res$y4_Wak2_Bk1 = jdata.sim[, mean(v1/v_y4_unc)]
# ---------- Wa,k,k' of Y3,Y4 -------- #
jdata.sim[, v1 := var(y3), list(k,j1,j2)]
jdata.sim[is.na(v1),v1:=0]
res$y3_Wak1k2 = jdata.sim[, mean(v1/v_y3_unc)]
jdata.sim[, v1 := var(y4), list(k,j1,j2)]
jdata.sim[is.na(v1),v1:=0]
res$y4_Wak1k2 = jdata.sim[, mean(v1/v_y4_unc)]
# ------- Wa Bk of Y3, Y4 -------- #
jdata.sim[, m1 := mean(y3), list(k,j1)]
jdata.sim[, v1 := var(m1), list(k)]
res$y3_Wa_Bk1 = jdata.sim[, mean(v1/v_y3_unc)]
jdata.sim[, m1 := mean(y4), list(k,j1)]
jdata.sim[, v1 := var(m1), list(k)]
res$y4_Wa_Bk1 = jdata.sim[, mean(v1/v_y4_unc)]
# -------- network --------
# first term
jdata.sim[, m1 := mean(y3), list(k,j2)]
jdata.sim[, a2 := mean(m1), list(k,j1)]
jdata.sim[, v1 := var(a2), list(k)]
res$y3net_Wa_Bk1 = jdata.sim[, mean(v1/v_y3_unc)]
jdata.sim[, m1 := mean(y4), list(k,j2)]
jdata.sim[, a2 := mean(m1), list(k,j1)]
jdata.sim[, v1 := var(a2), list(k)]
res$y4net_Wa_Bk1 = jdata.sim[, mean(v1/v_y4_unc)]
res$y3var_diff = res$y3_Wa_Bk1- res$y3net_Wa_Bk1
res$y4var_diff = res$y4_Wa_Bk1- res$y4net_Wa_Bk1
return(data.frame(res))
}
generate_simulated_data = function(force=FALSE) {
if ( (force==FALSE) & file.exists(sprintf("%s/data-tmp/data-static.dat",local_opts$wdir))) {
flog.info("data already exists, skipping simulation.")
} else {
load("inst/m2-mixt-y2003-main-fixb.rkiv")
res_main = value
model = res_main$model
sim = m2.mixt.simulate.sim(model,fsize = 50)
sdata = sim$sdata
jdata = sim$jdata
sdata$move=0
jdata$move=1
sdata[, birthyear := sample(1960:1980,.N,replace=T)]
jdata[, birthyear := sample(1960:1980,.N,replace=T)]
jdata[,x:=1][,j2true:=NULL]
sdata[,wid:=sprintf("W%i",1:.N)]
ns =sdata[,.N]
jdata[,wid:=sprintf("W%i", ns+ (1:.N))]
# draw industry for each firm
fdata = data.table(f1 = unique(c(sdata$f1,jdata$f1,jdata$f2)))
fdata [, ind := sample(c("Manufacturing","Services","Retail trade","Construction etc."),.N,replace=T)]
fdata [, va := exp(rnorm(.N)),f1]
setkey(fdata,f1)
setkey(sdata,f1)
sdata[, ind1:= fdata[sdata,ind]]
sdata[, va1 := fdata[sdata,va]]
setkey(sdata,f2)
sdata[, ind2:= fdata[sdata,ind]]
sdata[, va2 := fdata[sdata,va]]
setkey(jdata,f1)
jdata[, ind1:= fdata[jdata,ind]]
jdata[, va1 := fdata[jdata,va]]
setkey(jdata,f2)
jdata[, ind2:= fdata[jdata,ind]]
jdata[, va2 := fdata[jdata,va]]
sdata[, educ:= sample(1:3,.N,replace=T)]
jdata[, educ:= sample(1:3,.N,replace=T)]
sdata[,size1 := .N,f1]
jdata[,size1 := .N,f1]
sdata = rbind(jdata,sdata)
save(sdata,jdata,file=sprintf("%s/data-tmp/data-static.dat",local_opts$wdir))
flog.info("!!! Using simulated data for static")
}
if ( (force==FALSE) & file.exists(sprintf("%s/data-tmp/data-dynamic.dat",local_opts$wdir))) {
flog.info("data already exists, skipping simulation.")
} else {
load("inst/m4-mixt-d2003-main.rkiv")
res_main = value
model = res_main$model
sim = m4.mixt.simulate.sim(model,fsize = 50)
sdata = sim$sdata
jdata = sim$jdata
sdata$move=0
jdata$move=1
sdata[, birthyear := sample(1960:1980,.N,replace=T)]
jdata[, birthyear := sample(1960:1980,.N,replace=T)]
jdata[,x:=1][,j2true:=NULL][,j1true:=NULL]
sdata[,x:=1][,j1true:=NULL]
sdata[,wid:=sprintf("W%i",1:.N)]
ns =sdata[,.N]
jdata[,wid:=sprintf("W%i", ns+ (1:.N))]
# draw industry for each firm
fdata = data.table(f1 = unique(c(sdata$f1,jdata$f1,jdata$f2)))
fdata [, ind := sample(c("Manufacturing","Services","Retail trade","Construction etc."),.N,replace=T)]
fdata [, va := exp(rnorm(.N)),f1]
setkey(fdata,f1)
setkey(sdata,f1)
sdata[, ind1:= fdata[sdata,ind]]
sdata[, va1 := fdata[sdata,va]]
setkey(sdata,f2)
sdata[, ind2:= fdata[sdata,ind]]
sdata[, va2 := fdata[sdata,va]]
setkey(jdata,f1)
jdata[, ind1:= fdata[jdata,ind]]
jdata[, va1 := fdata[jdata,va]]
setkey(jdata,f2)
jdata[, ind2:= fdata[jdata,ind]]
jdata[, va2 := fdata[jdata,va]]
sdata[, educ:= sample(1:3,.N,replace=T)]
jdata[, educ:= sample(1:3,.N,replace=T)]
sdata[,size1 := .N,f1]
jdata[,size1 := .N,f1]
sdata[,va1 := exp(rnorm(.N)),f1]
jdata[,va1 :=exp(rnorm(.N)),f1]
sdata[,j2:=j1]
sdata = rbind(jdata,sdata)
save(sdata,jdata,file=sprintf("%s/data-tmp/data-dynamic.dat",local_opts$wdir))
flog.info("!!! Using simulated data for dynamic")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.