# This implements the probabilistic model (in stationary)
# ------------- Initiliazing functions ---------------------
# create a random model for EM with
# endogenous mobility with multinomial pr
#' @export
m2.proba.new <-function(nk,nf,fixb=F) {
model = list()
# model for Y1,Y2
model$A1 = array(0.9*(1 + 0.5*rnorm(nf*nk)),c(nf,nk))
model$S1 = 0.3*array(1+0.5*runif(nf*nk),c(nf,nk))
model$A2 = array(0.9*(1 + 0.5*rnorm(nf*nk)),c(nf,nk))
model$S2 = 0.3*array(1+0.5*runif(nf*nk),c(nf,nk))
# model for p(K | l ,l') for movers
model$g1 = rdim(rdirichlet(nf*nf,rep(1,nk)),nf,nf,nk)
# model for p(K | l ,l') for stayers
model$g0 = rdirichlet(nf,rep(1,nk))
# model for mobility
model$pm = array(0.1,c(nf,nk))
# distribution for k and l
model$pk = rdirichlet(1,rep(1,nk))
model$pl = rdirichlet(1,rep(1,nf))
model$nk = nk
model$nf = nf
for (l in 1:nf) {
model$A1[l,] = sort(model$A1[l,])
}
return(model)
}
#' create proba model from mixt2 model
#'
#' @export
m2.proba.new.from.mixt <-function(model0,sdata_subsample=0.1,mobility_smooth=0.001) {
model = list()
# wages are identical
model[c('A1','A2','S1','S2','nk','nf')] = model0[c('A1','A2','S1','S2','nk','nf')]
nf = model0$nf
nk = model0$nk
pk0 = model0$pk0[1,,]
pk1 = rdim(model0$pk1,model0$nf,model0$nf,model0$nk)
Ns = model0$NNs/sdata_subsample + as.numeric(rowSums(model0$NNm))
pr_joint = (pk0+mobility_smooth) * spread(Ns/sum(Ns),2,model0$nk)
# distribution for k
model$pk = colSums(pr_joint)
# model for choice of class in period 1
model$g0 = log(pr_joint / spread(model$pk,1,model0$nf))
# model for mobility
model$pm = array(0,c(nf,nk))
for (l in 1:nf) for (k in 1:nk) {
model$pm[l,k] = sum(model0$NNm[l,] * pk1[l,,k])/(model0$NNs[l]/sdata_subsample * pk0[l,k] + sum(model0$NNm[l,] * pk1[l,,k]))
}
# model for choice of class in period 2 conditional on moving
model$g1 = array(0,c(nf,nf,nk))
pr_joint1 = (pk1) * spread((model0$NNm+mobility_smooth)/sum(model0$NNm+mobility_smooth),3,model0$nk)
for (l1 in 1:nf) for (k in 1:k) {
model$g1[l1,,k] = log(pr_joint1[l1,,k]/sum(pr_joint1[l1,,k]))
}
return(model)
}
# ------------- Simulating functions ---------------------
#' impute wages on existing data
#' @export
m2.proba.impute.stayers <- function(sdatae,model) {
A1 = model$A1
S1 = model$S1
g0 = model$g0
A2 = model$A2
S2 = model$S2
nk = model$nk
nf = model$nf
# we compute the probability of k|j1
pk0 = exp(model$g0) * spread(as.numeric(model$pk),1,model$nf)
pk0 = pk0 / spread(rowSums(pk0),2,model$nk)
# ------ impute K, Y1, Y4 on jdata ------- #
sdatae.sim = copy(sdatae)
sdatae.sim[, c('k_imp','y1_imp') := {
ni = .N
Ki = sample.int(nk,.N,prob = pk0[j1,],replace=T)
# draw Y2, Y3
Y1 = A1[j1,Ki] + S1[j1,Ki] * rnorm(ni)
list(Ki,Y1)
},list(j1)]
return(sdatae.sim)
}
# -------------------- Estimating functions -----------------------------
#' computing the likelihood of stayers for a given group
#' of workers in a cluster
#'
#' @param Y1 vector of log-wages in period 1
#' @param J1s vector of firm classes in period 1
#' @param sj1 a vector a log-size of the firm when the worker is employed
#' @param model a probabilistic model
#' @param lik.den pre-computed denominators
#' @export
m2.proba.lik.stayers <- function(Y1,J1s,sj1,model,lik.den,include_j=T) {
lps = array(0,c(length(Y1),model$nk))
for (l1 in 1:model$nf) {
I = which(J1s==l1)
for (k in 1:model$nk) {
lpr_y1 = lognormpdf(Y1[I] , model$A1[l1,k], model$S1[l1,k]) # the wage
lpr_l1 = model$g0[l1,k] - lik.den$R0[k] # choosing the current cluster given the type k
lpr_j1 = sj1[I] - lik.den$Rj[l1] # choosing the current firm within the cluster
lps[I,k] = log(model$pk[k]) + lpr_l1+ lpr_j1 + lpr_y1 + log(1-model$pm[l1,k])
}
}
sum(logRowSumExp(lps))
}
#' computing the likelihood of stayers for a given group
#' of workers in a cluster
#'
#' @param Y1 vector of log-wages in period 1
#' @param Y2 vector of log-wages in period 2
#' @param J1m vector of firm types in period 1
#' @param J2m vector of firm types in period 2
#' @param sj1 a vector a log-size of the firm when the worker is employed in period 1
#' @param sj2 a vector a log-size of the firm when the worker is employed in period 2
#' @param R0 a vector of constants in the probabilistic model for choice of cluster in period 1
#' @param R1 a vector of constants in the probabilistic model for choice of cluster in period 2
#' @param Rj1 a vector of constants in the probabilistic model for choice of a firm within a cluster in period 1
#' @param Rj2 a vector of constants in the probabilistic model for choice of a firm within a cluster in period 2
#' @param model a probabilistic model
#' @export
m2.proba.lik.movers <- function(Y1m,Y2m,J1m,J2m,sj1,sj2,model,lik.den,include_j=T) {
lpm = array(0,c(length(Y1m),model$nk))
for (l1 in 1:model$nf) for (l2 in 1:model$nf) {
I = which( (J1m==l1) & (J2m==l2))
if (length(I)==0) next;
for (k in 1:model$nk) {
# worker mobility
lpr_k1 = model$g0[l1,k] - lik.den$R0[k]
lpr_move = log(model$pm[l1,k])
lpr_k2 = model$g1[l1,l2,k] - lik.den$R1[l1,k]
# picking the firms
lpr_j1 = sj1[I] - lik.den$Rj[l1]
# if same cluster, needs to remove the PR of the firm in period 1
if (l1==l2) {
#lpr_j2 = sj2[I] - log( exp(lik.den$Rj[l2]) - sj1[I])
lpr_j2 = sj2[I] - lik.den$Rj[l2] - log( 1 - exp(sj1[I]-lik.den$Rj[l2]))
} else {
lpr_j2 = sj2[I] - lik.den$Rj[l2]
}
# wages
lpr_y1 = lognormpdf(Y1m[I] , model$A1[l1,k], model$S1[l1,k])
lpr_y2 = lognormpdf(Y2m[I] , model$A2[l2,k], model$S2[l2,k])
lpm[I,k] = log(model$pk[k]) + lpr_k1 + lpr_move + lpr_k2 + lpr_y1 + lpr_j1 + lpr_j2 + lpr_y2
}
}
sum(logRowSumExp(lpm))
}
#' computes the total liklihood
#'
#' @param sim jdata and sdata
#' @param model is a model.proba list with A1,S1,A2,S2,pm,g0,g1,pk,nk,nf,Jall_l
#' @export
m2.prob.lik.all <- function(sim,model,ctrl) {
# ------- UPDATING SOME FIRMS ------- #
sdata =sim$sdata
jdata = sim$jdata
dprior = ctrl$dprior
model0 = ctrl$model0
taum = ctrl$tau
### ----- GET MODEL ---
nk = model$nk
nf = model$nf
A1 = model$A1
S1 = model$S1
A2 = model$A1
S2 = model$S1
g0 = model$g0
g1 = model$g1
pk = model$pk
pl = model$pl
pm = model$pm
# ----- GET DATA
# movers
Y1m = jdata$y1
Y2m = jdata$y2
J1m = jdata$j1
J2m = jdata$j2
F1m = jdata$f1
F2m = jdata$f2
# stayers
Y1s = sdata$y1
Y2s = sdata$y2
J1s = sdata$j1
F1s = sdata$f1
# create the depend variables
nfirms = sdata[,length(unique(f1))]
ns = sdata[,.N]
nm = jdata[,.N]
# current draw for firm types
ddtmp = unique(sdata[,.N,list(f1,j1)])
Jall_l = ddtmp$j1
Jall_f = ddtmp$f1
Jall_s = ddtmp$N
# extract realized size in cross-section
sdata[,n1:=.N,f1]
sizedata = unique(sdata[,list(n1,f1)])
setkey(sizedata,f1)
setkey(jdata,f1)
jdata[,n1:=sizedata[jdata,n1]]
setkey(jdata,f2)
jdata[,n2:=sizedata[jdata,n1]]
N1s = sdata$n1
N1m = jdata$n1
N2m = jdata$n2
## --- update firms --- #
## we prepare a set of constants that will be useful
## these are the liklihood in the current (k_1....k_J) for all firms
dd = list(Y1s=Y1s,J1s=J1s,J1m=J1m,J2m=J2m,Y1m=Y1m,Y2m=Y2m,F1s=F1s,F1m=F1m,F2m=F2m,N1m=N1m,N2m=N2m,N1s=N1s)
likden = m2.proba.likden(model,Jall_s,Jall_l)
rr = m2.prob.lik(dd,model,likden,dprior=dprior)
return(rr)
}
#' computes the liklihood of movers and stayers
#'
#' @param dd is a list with Y1s, J1s, Y1m, Y2m, J1m, J2m, Jall_f
#' @param model is list with A1,S1,A2,S2,pm,g0,g1,pk,nk,nf,Jall_l
#' @export
m2.prob.lik <- function(dd,model,likden,dprior=1.01) {
nk = model$nk
nf = model$nf
ns = length(dd$Y1s)
nm = length(dd$Y1m)
lik_stayers_l = array(0,nf)
lik_movers_ll = array(0,c(nf,nf))
## --- STAYERS --- #
lps = array(0,c(ns,nk))
for (l1 in 1:nf) {
I = which( (dd$J1s==l1))
if (length(I)==0) next;
for (k in 1:nk) {
lpr_y1 = lognormpdf(dd$Y1s[I] , model$A1[l1,k], model$S1[l1,k]) # the wage
lpr_k1 = model$g0[l1,k] - likden$R0[k]
lpr_j1 = log(dd$N1s[I]) - likden$Rj[l1]
lps[I,k] = log(model$pk[k]) + lpr_k1+ lpr_j1 + lpr_y1 + log(1-model$pm[l1,k])
}
lik_stayers_l[l1] = sum(logRowSumExp(lps[I,]))
}
liks = sum(logRowSumExp(lps))
taus = exp(lps - spread(logRowSumExp(lps),2,nk)) # normalize the k probabilities Pr(k|Y1,Y2,Y3,Y4,l)
# prior
liks_prior = (dprior-1) * sum(log(model$pk))
liks = liks
## --- MOVERS --- #
lpm = array(0,c(nm,nk))
for (l1 in 1:nf) for (l2 in 1:nf) {
I = which( (dd$J1m==l1) & (dd$J2m==l2))
if (length(I)==0) next;
for (k in 1:nk) {
lpr_y1 = lognormpdf(dd$Y1m[I] , model$A1[l1,k], model$S1[l1,k])
lpr_k1 = model$g0[l1,k] - likden$R0[k]
lpr_j1 = log(dd$N1m[I]) - likden$Rj[l1]
lpr_move = log(model$pm[l1,k])
lpr_k2 = model$g1[l1,l2,k] - likden$R1[l1,k]
if (l1==l2) {
lpr_j2 = log(dd$N2m[I]) - likden$Rj[l2] - log( 1 - exp(log(dd$N1m[I])-likden$Rj[l2]))
if (any(!is.finite(lpr_j2))) browser();
} else {
lpr_j2 = log(dd$N2m[I]) - likden$Rj[l2]
}
lpr_y2 = lognormpdf(dd$Y2m[I] , model$A2[l2,k], model$S2[l2,k])
# sum the log of all
lpm[I,k] = log(model$pk[k]) + lpr_y1 + lpr_j1 +
lpr_k1 + lpr_move + lpr_k2 + lpr_j2 + lpr_y2
}
lik_movers_ll[l1,l2] = sum(logRowSumExp(lpm[I,]))
}
likm = sum(logRowSumExp(lpm))
taum = exp(lpm - spread(logRowSumExp(lpm),2,nk)) # normalize the k probabilities Pr(k|Y1,Y2,Y3,Y4,l)
# prior
likm_prior = (dprior-1) * sum(log(model$pk))
likm = likm
lik = likm + liks + likm_prior + liks_prior
return(list(lik=lik,liks=liks,likm=likm,taum=taum,taus=taus,lik_stayers_l=lik_stayers_l,lik_movers_ll=lik_movers_ll))
}
#' precompute the likelihood denominator constants
#' @export
m2.proba.likden <- function(model,Jall_s,Jall_l) {
## prepapre denominator constants for cluster choices
R0 = array(0,c(model$nk))
for (k in 1:model$nk) {
R0[k] = logsumexp(model$g0[,k])
}
R1 = array(0,c(model$nf,model$nk))
for (k in 1:model$nk) for (l1 in 1:model$nf) {
R1[l1,k] = logsumexp(model$g1[l1,,k])
}
## prepapre denominator constants for firm choice within cluster
Rj = array(0,c(model$nf))
for (l1 in 1:model$nf) {
Rj[l1] = logsumexp(log(Jall_s[Jall_l==l1]))
}
## prepapre denominator constants for firm choice within cluster for movers
#Rjm = array(0,c(model$nf))
#for (l2 in 1:model$nf) {
# Rjm[l2] = logsumexp(log(Jall_s2[Jall_l2==l2]))
#}
if (any(!is.finite(Rj))) browser();
#return(list(R0=R0,R1=R1,Rj=Rj,Rjm=Rjm))
return(list(R0=R0,R1=R1,Rj=Rj))
}
#' update the classification using probabilistic model
#'
#' We take a current classification, and value of the model. We then
#' draw new firm classes according to a full probabilistic model.
#' @export
m2.proba.updateclasses <- function(sim,model0,pl,ctrl,debug=FALSE,mobility_smooth=0.0001) {
# what we need here is to be able to update the classification according
# to the probabilistic model. This means that I need to compute the posterior
# probabilities for each firm and each type.
# this then firest requires to map the BLM model into the probabilistic model.
model = m2.proba.new.from.mixt(model0,mobility_smooth=mobility_smooth)
model$pl = pl
# we aslo need to extract firm size info (required in the probabilistic model)
# ------- UPDATING SOME FIRMS ------- #
sdata =sim$sdata
jdata = sim$jdata
dprior = ctrl$dprior
model0 = ctrl$model0
taum = ctrl$tau
### ----- GET MODEL ---
nk = model$nk
nf = model$nf
A1 = model$A1
S1 = model$S1
A2 = model$A1
S2 = model$S1
g0 = model$g0
g1 = model$g1
pk = model$pk
pl = model$pl
pm = model$pm
# ----- GET DATA
# movers
Y1m = jdata$y1
Y2m = jdata$y2
J1m = jdata$j1
J2m = jdata$j2
F1m = jdata$f1
F2m = jdata$f2
# stayers
Y1s = sdata$y1
Y2s = sdata$y2
J1s = sdata$j1
F1s = sdata$f1
# create the depend variables
nfirms = sdata[,length(unique(f1))]
ns = sdata[,.N]
nm = jdata[,.N]
# current draw for firm types
ddtmp = unique(sdata[,.N,list(f1,j1)])
Jall_l = ddtmp$j1
Jall_f = ddtmp$f1
Jall_s = ddtmp$N
# extract realized size in cross-section
sdata[,n1:=.N,f1]
sizedata = unique(sdata[,list(n1,f1)])
setkey(sizedata,f1)
setkey(jdata,f1)
jdata[,n1:=sizedata[jdata,n1]]
setkey(jdata,f2)
jdata[,n2:=sizedata[jdata,n1]]
N1s = sdata$n1
N1m = jdata$n1
N2m = jdata$n2
# extract lieklihood of this classification in this model using pl
lik_before_firm_update_gpr = sim$sdata[,length(unique(f1)),j1][,sum(V1*log(pl[j1]))]
## --- update firms --- #
## we prepare a set of constants that will be useful
## these are the liklihood in the current (k_1....k_J) for all firms
dd = list(Y1s=Y1s,J1s=J1s,J1m=J1m,J2m=J2m,Y1m=Y1m,Y2m=Y2m,F1s=F1s,F1m=F1m,F2m=F2m,N1m=N1m,N2m=N2m,N1s=N1s)
likden = m2.proba.likden(model,Jall_s,Jall_l)
rr = m2.prob.lik(dd,model,likden,dprior=dprior)
lik_before_firm_update = rr$lik
lik_before_firm_update_movers = rr$likm
lik_before_firm_update_stayers = rr$liks
posterior_pr = array(0,c(ctrl$nfirms_to_update,nf))
all_res = data.frame()
# we draw some firms to update
firms_to_update_indices = sample.int(nfirms,ctrl$nfirms_to_update)
firms_to_update_new_types = rep(0,ctrl$nfirms_to_update)
for (ii in 1:(ctrl$nfirms_to_update)) {
findex = firms_to_update_indices[[ii]]
fidcur = Jall_f[[findex]]
lprs = rep(0,nf) # likelihood stayers
lprm = rep(0,nf) # likelihood movers
lpr = rep(0,nf) # likelihood total
l1cur = Jall_l[findex] # the current class of the firm
# next, we loop other clusters
for (l1 in 1:nf) {
# update constants for the computation of the likelihood
likden_guess = copy(likden)
likden_guess$Rj[l1cur] = log(exp(likden_guess$Rj[l1cur]) - Jall_s[findex])
likden_guess$Rj[l1] = log(exp(likden_guess$Rj[l1]) + Jall_s[findex])
ll_affected = union(l1,l1cur)
# ------------ STAYERS ------------ #
# finding all stayers in clusters (l1_cur,l1_guess)
Is_affected = which( (J1s == l1) | (J1s == l1cur))
if (ctrl$proba_include[2]==0) Is_affected = which(F1s == fidcur);
J1s_guess = J1s[Is_affected]
# update the class for the current firm
J1s_guess[ F1s[Is_affected] == fidcur ] = l1 # update workers in cur frim
# compute the likelihood for this group
lprs[l1] = (ctrl$proba_include[1]==TRUE)*m2.proba.lik.stayers(Y1s[Is_affected],J1s_guess,log(N1s[Is_affected]),model,likden_guess)
# add the likelihood from the other stayers
lprs[l1] = lprs[l1] + (ctrl$proba_include[2]==TRUE)*(rr$liks - sum(rr$lik_stayers[ll_affected]))
# ------------ MOVERS ------------ #
# finding all movers coming and going to clsuters (l1_cur,l1_guess) and
Im_affected = which( (J1m == l1) | (J1m == l1cur) | (J2m == l1) | (J2m == l1cur))
if (ctrl$proba_include[4]==0) Im_affected = which( (F1m == fidcur) | ( F2m== fidcur));
J1m_guess = J1m[Im_affected]
J2m_guess = J2m[Im_affected]
# update the class for the current firm
J1m_guess[ F1m[Im_affected] == fidcur ] = l1 # update workers moving from current firm
J2m_guess[ F2m[Im_affected] == fidcur ] = l1 # update workers moving to current firm
# compute the likelihood
lprm[l1] = lprm[l1] + (ctrl$proba_include[3]==TRUE)* m2.proba.lik.movers(Y1m[Im_affected],Y2m[Im_affected],J1m_guess,J2m_guess,log(N1m[Im_affected]),log(N2m[Im_affected]),model,likden_guess)
lprm[l1] = lprm[l1] + (ctrl$proba_include[4]==TRUE)*(rr$likm - sum(rr$lik_movers_ll[,ll_affected]) - sum(rr$lik_movers_ll[ll_affected,]) +
sum(rr$lik_movers_ll[ll_affected,ll_affected])) # the last is to not remove the intersection twice
# combine the two
lpr[l1] = lprs[l1] + lprm[l1]
posterior_pr[ii,l1] = lpr[l1]
}
lpr = lpr + log(pl) # adding the probability of each class among firms
rownames(posterior_pr) = Jall_f[firms_to_update_indices]
firms_to_update_new_types[[ii]] = sample.int(nf,1,prob=exp(lpr-max(lpr)))
if (debug==TRUE) {
class_true = sdata[f1==Jall_f[[findex]]][,j1true[1]]
class_map = which.max(exp(lpr-max(lpr)))
class_pre = Jall_l[[findex]]
pr_map = max( exp(lpr-max(lpr))/(sum(exp(lpr-max(lpr)))) )
str1 = ifelse(class_true==firms_to_update_new_types[[ii]],"*","")
str2 = ifelse(class_pre!=firms_to_update_new_types[[ii]],"!","")
flog.info("j1true=%2i j1pre=%2i j1map=%2i j1new=%2i pr_map=%2f %s%s",class_true,class_pre,class_map, firms_to_update_new_types[[ii]],pr_map,str2,str1)
tmp=0
all_res = rbind(all_res,data.frame(f1=fidcur,l1=1:model$nf,liks=lprs,likm=lprm,l1pre=Jall_l[[findex]],l1true=class_true))
} else {
class_map = which.max(exp(lpr-max(lpr)))
class_pre = Jall_l[[findex]]
pr_map = max( exp(lpr-max(lpr))/(sum(exp(lpr-max(lpr)))) )
str2 = ifelse(class_pre!=firms_to_update_new_types[[ii]],"!","")
flog.info("j1pre=%2i j1map=%2i j1new=%2i pr_map=%2f %s",class_pre,class_map, firms_to_update_new_types[[ii]],pr_map,str2)
all_res = rbind(all_res,data.frame(f1=fidcur,l1=1:model$nf,liks=lprs,likm=lprm,l1pre=Jall_l[[findex]]))
}
if (ii%%50==0) flog.info("computing firms posterior %i/%i",ii,ctrl$nfirms_to_update);
}
# construct the updated set of firms ID
Jall_l_new = copy(Jall_l)
for (ii in 1:(ctrl$nfirms_to_update)) {
findex = firms_to_update_indices[[ii]] # index of the firm to update
Jall_l_new[[findex]] = firms_to_update_new_types[[ii]]
}
names(Jall_l_new) = Jall_f
flog.info("done updating firms (%i/%i changed)", sum(Jall_l_new!=Jall_l), ctrl$nfirms_to_update)
return(list(grps=Jall_l_new,all=all_res,updates=sum(Jall_l_new!=Jall_l),
lik_before_firm_update=lik_before_firm_update,posterior_pr=posterior_pr,lik_before_firm_update_movers=lik_before_firm_update_movers,lik_before_firm_update_stayers=lik_before_firm_update_stayers,lik_before_firm_update_gpr=lik_before_firm_update_gpr))
}
#' computes simple posterior probabilities
#' @export
m2.prob.get_firm_posteriors <- function(sim,model) {
nk = model$nk
nf = model$nf
lik_stayers_l = array(0,nf)
lik_movers_ll = array(0,c(nf,nf))
R0 = array(0,c(model$nk))
for (k in 1:model$nk) {
R0[k] = logsumexp(model$g0[,k])
}
## we can just loop over l1 and k
Y1s = sim$sdata$y1
ns = sim$sdata[,.N]
dd = sim$sdata[,list(f1)]
nfids = length(unique(sim$sdata$f1))
lpr_post = array(0,c(nfids,nf))
## --- compute posterior for each i,l1 --- #
lps = array(0,c(ns,nk))
lps_post = array(0,c(ns,nf))
for (l1 in 1:nf) {
for (k in 1:nk) {
lpr_y1 = lognormpdf(Y1s , model$A1[l1,k], model$S1[l1,k]) # the wage
lpr_k1 = model$g0[l1,k] - R0[k]
lps[,k] = log(model$pk[k]) + lpr_k1 + lpr_y1 + log(1-model$pm[l1,k])
}
lps_post[,l1] = logRowSumExp(lps)
# aggregate across firms
dd[, lpost := lps_post[,l1]]
lpr_post[,l1] = dd[,sum(lpost),f1][,V1]
}
rownames(lpr_post) = dd[,sum(lpost),f1][,f1]
#lpr_post = lpr_post - spread(logRowSumExp(lpr_post),2,nf)
return(lpr_post)
}
#' precompute the likelihood denominator constants
#' @export
m2.proba.gibbs <- function(sim,model,pl,eps_mix=0.1,mobility_smooth=1e-10,nfirms_to_update=1,maxiter=100,grp_start=NA,appendto=NA,debug=F) {
fids = sim$sdata[,unique(f1)]
ctrl = em.control(tol=1e-7,check_lik=F,dprior=1.01,fixb=T)
ctrl$proba_include =c(1,1,1,1) #use full likelihood to upate
ctrl$nfirms_to_update = nfirms_to_update
# USING RANDOM CLASSIFICATION AS STARTING POINT
if (any(is.na(grp_start))) {
grps_random = sample(model$nf,length(fids),replace=T)
names(grps_random) = fids
sim = grouping.append(sim,grps_random)
} else {
sim = grouping.append(sim,grp_start)
}
# check if we should append
if (is.na(appendto)) {
rr = data.frame()
istart=1
} else{
rr = appendto
istart = max(appendto$i)+1
maxiter = istart + maxiter
}
# run the GIBBS SAMPLER
for (i in istart:maxiter) {
grps_r = m2.proba.updateclasses(sim,model,pl,ctrl,debug=debug,mobility_smooth=mobility_smooth)
sim = grouping.append(sim,grps_r$grps,sort=FALSE)
#print(AA)
if (debug==T) {
AA = acast(sim$sdata[,.N,list(j1,j1true)],j1~j1true,fill=0,value.var = "N")
rr = rbind(rr,data.frame(i=i,step = i * ctrl$nfirms_to_update, match = sum(diag(AA))/sum(AA), mse = sim$sdata[,mean( (j1-j1true)^2)], fsize= fsize, lik_pre=grps_r$lik_before_firm_update))
flog.info("iter=%i mse=%f match=%f lik_pre=%4.4f",i * ctrl$nfirms_to_update,sim$sdata[,mean( (j1-j1true)^2)],sum(diag(AA))/sum(AA),grps_r$lik_before_firm_update)
} else {
rr = rbind(rr,data.frame(i=i,step = i * ctrl$nfirms_to_update, lik=grps_r$lik_before_firm_update, likm=grps_r$lik_before_firm_update_movers,liks=grps_r$lik_before_firm_update_stayers,likg=grps_r$lik_before_firm_update_gpr,updates=grps_r$updates))
flog.info("iter=%i lik_pre=%4.4e movers=%4.4e stayers=%4.4e grps=%4.4e",i * ctrl$nfirms_to_update,grps_r$lik_before_firm_update,grps_r$lik_before_firm_update_movers,grps_r$lik_before_firm_update_stayers,grps_r$lik_before_firm_update_gpr)
}
}
return(list(rr=rr,last_grp = grps_r$grps))
}
#' extract the proportions from a classification
#' @export
m2.proba.getlcasspr <- function(sim,nf) {
V = rep(0,nf)
# get the number of firms per class
for (i in 1:nf) {
V[i] = sim$sdata[j1==i,length(unique(f1))]
}
class_pr = V/sum(V)
lik = sum(log(class_pr)*V)
return(list(class_pr = class_pr,lik=lik,NNf=V))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.