#' Use this function to estimate Multivariate Switch model
#' @param data data frame containing the variables in the model
#' @param outcome main (continious) equation formula
#' @param selection1 the first selecton equation formula
#' @param selection2 the second selecton equation formula
#' @param selection3 the third selecton equation formula
#' @param selection4 the fourth selecton equation formula
#' @param selection5 the fifth selecton equation formula
#' @param groups vector which determines each groups outcome
#' @param rules matrix which rows correspond to possible combination of selection equations values.
#' @param show_info shows likelihood function optimization info
#' @param only_twostep if true then only two-step procedure used
#' @param opts options that are passed to nlopt
#' @param x0 optional initial values to be used while solving optimization task
#' @param remove_zero_columns set true for switch regression (more then 1 groups) if your dependent variable is a part of dummy variable
#' @details This function estimates Multivariate Switch-Selection model via maximum-likelihood and two-step procedures.
#' This model was developed by E.V. Kossova and B.S. Potanin
#' Please cite as follows:
#' Kossova, Elena & Potanin, Bogdan, 2018. 'Heckman method and switching regression model multivariate
#' generalization,' Applied Econometrics, Publishing House 'SINERGIA PRESS', vol. 50, pages 114-143.
#' Dependent variables in selection equations should have values -1 or 1.
#' Also there is a special value 0 which indicates that this observation is unobservable but it is necessary
#' to take into consideration other selection equation information while calculating likelihood function.
#' The i-th row of rules corresponds to i-th element of groups. rules rows should contain information regarding
#' possible combinations of selection equations values. While groups determines the outcome for each of this
#' possible combinations. Special 0 value for groups responsible for sample selection.
gheckman<-function(data, outcome, selection1=NULL, selection2=NULL, selection3=NULL,
selection4=NULL, selection5=NULL, groups=NULL, rules=NULL,
selection_equations_names=rep(NA,5),
show_info=FALSE, only_twostep=FALSE,
opts=list("algorithm" = "NLOPT_LD_TNEWTON", "xtol_rel" = 1e-16, "print_level" = 1, maxeval = 1000000),
x1=NULL, remove_zero_columns=FALSE)
{
#PHASE 0: Extracting data from formulas
y_variables=model.frame(formula = outcome, data=data, na.action = NULL);#data for main equation
n_observations_total=length(y_variables[,1]);#total number of observations
y_variables[rowSums(is.na(y_variables))>0,]=NA;#remove y that could not be calculated
y=y_variables[,1];#dependend variable
names(y)=names(y_variables)[1];#remebmer variable name
y_variables[,1]=1;#intercept
colnames(y_variables)[1]="intercept";
#Data for selection equations
z_variables=matrix(list(),5,1)#selection equation independent variables
z=matrix(NA,n_observations_total,5);#selection equation dedepndent variables
for (i in 1:5)#for each possible selection equation
{
selection=paste("selection",toString(i),sep="");#name of current selection equation
if (!is.null(get(selection)))#if this selection equation assigned
{
z_variables[[i]]=model.frame(formula = as.formula(get(selection)), data=data, na.action = NULL)
z_variables[[i]][rowSums(is.na(z_variables[[i]]))>0,]=NA;#remove z that could not be calculated
z[,i]=z_variables[[i]][,1];
if(is.na(selection_equations_names[i]))
{
selection_equations_names[i]=colnames(z_variables[[i]])[1];
}
z_variables[[i]][,1]=rep(1,n_observations_total);#intercept
colnames(z_variables[[i]])[1]="intercept";
z_variables[[i]][is.na(z_variables[[i]])]=0;
}
else#if it is no such selection equation
{
z_variables=z_variables[1:(i-1)];#preserve only existing selection equation
z=z[,1:(i-1)];
break
}
}
#PHASE 1: Preparing data
sort_list<-gheckmanSort(y, y_variables, z, z_variables, groups, rules, remove_zero_columns);
sort_list$selection_equations_names=selection_equations_names;
#Make sort_list elements variables
for (i in 1:length(sort_list))
{
if(names(sort_list)[[i]]!="z" & names(sort_list)[[i]]!="z_variables")
{
do.call("<-",list(names(sort_list)[[i]], sort_list[[i]]));
}
}
opts = opts;#setting optimization options max(maxeval/15,n_selection_equations_max*50)
#PHASE 2: Two-step method
n_parameters=coef_z[[n_selection_equations_max]][dim(coef_z[[n_selection_equations_max]])[1]]
n_parameters_y=coef_y[[n_outcome]][dim(coef_y[[n_outcome]])[1]]
x0=matrix(0,n_parameters);
x0_names=vector(length = length(x0));#Store variables names
#Get initial values
if (n_selection_equations_max==1)
{
z=matrix(z,ncol=1);
}
for (i in 1:n_selection_equations_max)
{
#-1 in order to exclude constant
x0[coef_z[[i]]]=coef(probit_simple<-glm(I((z[z[,i]!=0,i]+1)/2)~.-1,
data=data.frame(z_variables[[i]][z[,i]!=0,]),family=binomial(link="probit")));
x0_names[coef_z[[i]]]=all.vars(formula(probit_simple)[-2]);#get z_variables variables names
}
z_variables_names=matrix(list(),n_selection_equations_max);#z_variables_names
for (i in 1:n_selection_equations_max) {z_variables_names[[i]]=colnames(z_variables[[i]]);}
#Load z data from sort_list
z_variables=sort_list$z_variables;#Only now we can load it from sort method result
z=sort_list$z;#Only now we can load it from sort method result
#Set lower and upper bound constraints for x0_names
lb=c(rep(-0.9999999,rho_z_n),rep(0,n_parameters_y-rho_z_n),rep(-Inf,n_parameters-coef_z[[1]][1]+1));
ub=c(rep(0.9999999,rho_z_n),rep(0,n_parameters_y-rho_z_n),rep(Inf,n_parameters-coef_z[[1]][1]+1));
#Estimate coefficients and store them to x0
f<-nloptr(x0=x0, eval_f=gheckmanLikelihood,opts=opts, lb=lb, ub=ub, y=y, z_variables=z_variables,
y_variables=y_variables, rules_no_ommit=rules_no_ommit, n_selection_equations=n_selection_equations,
coef_z=coef_z, sigma_last_index=sigma_last_index, coef_y=coef_y, groups=groups*0, n_groups=n_groups,
n_selection_equations_max=n_selection_equations_max, rules_converter=rules_converter,
n_outcome=n_outcome, rules=rules, groups_observations=groups_observations,
rho_y_indices=rho_y_indices, show_info=show_info, maximization=FALSE);
x0=f$solution;
#Storing covariance matrix
covmatrix=jacobian(func = gheckmanGradient, x = x0, y=y, z_variables=z_variables, y_variables=y_variables, rules_no_ommit=rules_no_ommit, n_selection_equations=n_selection_equations, coef_z=coef_z, sigma_last_index=sigma_last_index, coef_y=coef_y, groups=groups*0, n_groups=n_groups, n_selection_equations_max=n_selection_equations_max, rules_converter=rules_converter, n_outcome=n_outcome, rules=rules, groups_observations=groups_observations, rho_y_indices=rho_y_indices, show_info=FALSE);
covmatrix_gamma=solve(covmatrix[coef_z[[1]][1]:length(x0),coef_z[[1]][1]:length(x0)]);
sort_list$covmatrix_gamma=covmatrix_gamma;
#Calculating lambda
#Storing variables names to sort_list
sort_list$x0_names=x0_names
#One dimensional
sort_list=lambdaCalculate(x0=x0, sort_list=sort_list, is_2D = TRUE);
#Twostep LS
sort_list=gheckmanLS(x0, sort_list);
sort_list=gheckmanLSAdjustCovariance(sort_list);
#Update variables
for (i in 1:length(sort_list))
{
do.call("<-",list(names(sort_list)[[i]], sort_list[[i]]));
}
x0[(sigma_last_index-n_outcome+1):sigma_last_index]=sigma_twostep;
#Return the result for twostep
if (only_twostep)
{
return(list('model'=twostep_LS, 'covmatrix'=Cov_B,'sigma'=sigma_twostep,
'model_unadjusted'=twostep_LS_unadjusted, 'sort_list'=sort_list));
}
#PAHSE 3: MLE with Two-step initial values
#Set lower and upper bound constraints for x0_names
rho_n=sigma_last_index-n_outcome;
no_rho_n=length(x0)-rho_n;
lb=(c(rep(-1,rho_n),rep(-Inf,no_rho_n)));
ub=(c(rep(1,rho_n),rep(Inf,no_rho_n)));
x0[is.na(x0)]=0;
sort_list$x0=x0
#Estimate coefficients and store them to MLE
if (!is.null(x1)) {x0<-x1;};#substitute some initial value
f<-nloptr(x0=x0, eval_f=gheckmanLikelihood,opts=opts, lb=lb, ub=ub, y=y, z_variables=z_variables, y_variables=y_variables, rules_no_ommit=rules_no_ommit, n_selection_equations=n_selection_equations, coef_z=coef_z, sigma_last_index=sigma_last_index, coef_y=coef_y, groups=groups, n_groups=n_groups, n_selection_equations_max=n_selection_equations_max, rules_converter=rules_converter, n_outcome=n_outcome, rules=rules, groups_observations=groups_observations, rho_y_indices=rho_y_indices, show_info=show_info, maximization=FALSE);
#Standard deviations estimation
st_dev=jacobian(func = gheckmanGradient, x = f$solution, y=y, z_variables=z_variables, y_variables=y_variables, rules_no_ommit=rules_no_ommit, n_selection_equations=n_selection_equations, coef_z=coef_z, sigma_last_index=sigma_last_index, coef_y=coef_y, groups=groups, n_groups=n_groups, n_selection_equations_max=n_selection_equations_max, rules_converter=rules_converter, n_outcome=n_outcome, rules=rules, groups_observations=groups_observations, rho_y_indices=rho_y_indices, show_info=FALSE);
Cov_M=solve(st_dev)
sort_list$Cov_M=Cov_M;#save Covariance matrix to sort_list list
st_dev=sqrt(diag(Cov_M));
x0=f$solution;
aSTD=pnorm(x0/st_dev);
bSTD=pnorm(-x0/st_dev);
p_value=x0*0;
for (i in (1:length(x0)))
{
p_value[i]=1-max(aSTD[i],bSTD[i])+min(aSTD[i],bSTD[i]);
}
#Recalculating lambda
sort_list=lambdaCalculate(x0=x0, sort_list=sort_list, is_2D = TRUE);
for (i in 1:length(sort_list))
{
do.call("<-",list(names(sort_list)[[i]], sort_list[[i]]));
}
sort_list$x0=f$solution;
x0=sort_list$x0;
#Orginizing output
counter=0;
for (i in 1:n_selection_equations_max)
{
for (j in (i+1):n_selection_equations_max)
{
if (j>i & j<=n_selection_equations_max)
{
counter=counter+1;
x0_names[counter]=paste(c('rho Z',i,j), collapse = " ");
}
}
}
for (i in 1:n_outcome)
{
for (j in 1:n_selection_equations_max)
{
counter=counter+1;
x0_names[counter]=paste(c('rho Y',i,j), collapse = " ");
}
}
for (i in 1:n_outcome)
{
counter=counter+1;
x0_names[counter]=paste(c('sigma',i), collapse = " ");
}
#MLE result table
result=noquote(cbind(x0_names,x0,st_dev,p_value));
colnames(result)=c("Coefficients:","Estimate","Std. Error","p-value");
result=as.table(result)
rownames(result)<-rep("",length(x0_names));
#Citation
citation="Kossova, Elena & Potanin, Bogdan, 2018. 'Heckman method and switching regression model multivariate generalization,' Applied Econometrics, Publishing House 'SINERGIA PRESS', vol. 50, pages 114-143."
print(noquote(paste("Please cite as :",citation)));
return(list("mle"=list("result" = result, "coefficients"=x0,"st_dev"=st_dev,"p-value"=p_value), "twostep"=list("model"=twostep_LS,"covmatrix"=Cov_B,"sigma"=sigma_twostep, "twostep_LS"=twostep_LS_unadjusted, "x0"=sort_list$x0), "logLikelihood"=-f$objective, "x0"=x0, "sort_list"=sort_list, "Cov_M"=Cov_M, "citation"=citation))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.