#' Use InteractiveFixedEffectModels.jl to run large fixed effect models in julia
#'
#' \code{FixedEffectInteract} returns the results of a 2 stage linear fixed effect regression
#'
#' @param dt dataset of interest
#' @param lhs String: lhs variable
#' @param rhs String: rhs variables
#' @param ife String of interacted fixed effects (id1 + id2)
#' @param rank_ife Integer: number of factors to be estimated
#' @param fe expression of fixed effects id1 + id2:id3
#' @param weights expression of weights
#' @param vcov Specification for the error
#' @param maxiter Parameter that passes to julia the solver max iteration
#' @param timing Add timing
#' @param save_res Save the results of the model
#' @param print do we print output or not
#'
#' @return The return value will be a list which contains two elements at this point
#' results: includes most of the observation from the julia call
#' summary: includes information that is of importance to write a table
#'
#' @examples
#' # See vignettes and readme
#' # Short example
#' \dontrun{
#' df = Ecdat::Cigar
#' FixedEffectInteract(df, "sales", "price", "state+year", 2, "state", vcov = "robust")
#' }
#'
#' @export
#####################################################################################################################
FixedEffectInteract <- function(
dt,
lhs,
rhs,
ife,
rank_ife,
fe = NULL,
weights = NULL,
vcov = NULL,
maxiter = 10000L,
timing = TRUE,
save_res = TRUE, # do we save PCs, loadings and fixed effects
print = TRUE
){
# initialize
setDT(dt)
# parse the rhs, fe and cluster
if (is.null(rhs)){ rhs <- "0" }
rhs_split <- unlist( stringr::str_split(rhs, "\\+") )
rhs_split <- unique( stringr::str_replace_all(rhs_split, " ", "") )
if (is.null(fe)){ fe <- "0" }
fe_split <- unlist( stringr::str_split(fe, "\\+") )
n_fe = length(fe_split)
fe_split <- unlist( stringr::str_split(fe_split, "\\:") )
fe_split <- unique( stringr::str_replace_all(fe_split, " ", "") )
ife_split <- unlist( stringr::str_split(ife, "\\+") )
n_ife = length(ife_split)
ife_split <- unlist( stringr::str_split(ife_split, "\\:") )
ife_split <- unique( stringr::str_replace_all(ife_split, " ", "") )
cluster_split <- gsub("cluster", "",
gsub("[()]", "",
gsub("robust", "", vcov) ) )
cluster_split <- gsub(" ", "",
unlist( stringr::str_split(cluster_split, "\\+") ) )
# set the formula for julia
julia_formula = paste(lhs, "~", rhs)
# set the julia_ife
julia_ife = paste0("ife = ( ", ife, ", ", rank_ife, " )")
# set the julia_reg_fe
if (fe == "0"){ fe <- NULL }
if (!is.null(fe)){
julia_reg_fe = paste("fe =", stringr::str_replace_all(fe, "\\:", "\\&"))
} else if (is.null(fe)){
julia_reg_fe = ""
}
# set the weights
if (!is.null(weights)){
if (stringr::str_length(weights)>0){
julia_reg_fe = paste(julia_reg_fe, ", weights =", weights)
} }
# set vcov
if (is.null(vcov)){ # default to robust
vcov <- "robust"
} else if (!stringr::str_detect(vcov, "cluster")) {
vcov <- "robust"
}
julia_reg_vcov = paste("vcov = ", vcov)
julia_reg_save = paste("save = ", ifelse(save_res, "true", "false"))
julia_reg_opt = paste(c(julia_reg_fe, julia_reg_vcov, julia_reg_save),
collapse = ", ")
julia_regcall = paste("reg_res = regife(df_julia, @model(",
paste(c(julia_formula, julia_ife, julia_reg_opt), collapse = ", "),
"), maxiter =", as.integer(maxiter), ");")
julia_regcall <- gsub(", ,", ",", julia_regcall) # clean up because of missing instructions (e.g. when no fe are specified)
# ---------------------------------
# SPECIFY THE DATASET FOR REGRESSION
# move only the right amount of data into julia (faster)
col_keep = intersect(names(dt), c(lhs, rhs_split, ife_split, fe_split, cluster_split, weights))
dt <- data.table(dt)[, c(col_keep), with = F ]
# fill NAs on lhs, sometimes object passed onto julia have NaN instead of missing
for ( lhs_iter in seq(1, length(lhs)) ){
dt[ !is.finite(get(lhs[lhs_iter])), c(lhs[lhs_iter]) := NA ]
# dt_tmp[ !is.finite(dt_tmp[[lhs[lhs_iter]]]), c(lhs[lhs_iter]) := NA ] # Alternative writing in data.table
}
dt[, temporary_id_key := seq(1, .N) ] # create an id to recognize rows later on
dt_missing <- dt[, lapply(.SD, as.numeric) ]
dt_missing <- dt_missing[, .(missing_tag = is.na(rowSums(.SD)), temporary_id_key) ]
dt_reg <- dt[!dt_missing[missing_tag==T], on = c("temporary_id_key")]
julia_assign("df_julia", dt_reg)
# create pooled fe variables in the dataset
for (iter in seq(1, length(ife_split))){
pool_cmd = paste0("df_julia[:", ife_split[iter], "]",
" = categorical(df_julia[:",
ife_split[iter], "]);")
julia_command(pool_cmd)
}
# create pooled fe variables in the dataset (if there is something to do)
if (!is.null(fe)){
for (iter in seq(1, length(fe_split))){
pool_cmd = paste0("df_julia[:", fe_split[iter], "]",
" = categorical(df_julia[:",
fe_split[iter], "]);")
julia_command(pool_cmd)
}
}
# and clustering variables
if ( stringr::str_length(paste(cluster_split, collapse="")) > 0 ){
for (iter in seq(1, length(cluster_split))){
pool_cmd = paste0("df_julia[:", cluster_split[iter], "]",
" = categorical(df_julia[:",
cluster_split[iter], "]);")
julia_command(pool_cmd)
}
}
# ----------------------------
# Managing exceptions in julia
# initialize:
julia_command("reg_res = (augmentdf=0, coefnames=0);")
julia_regcall_exception = gsub("reg_res = ", "", julia_regcall)
julia_regcall_exception = paste0("reg_res = try ", julia_regcall_exception, "catch; (augmentdf=0, coefnames=0) end;")
julia_regcall = julia_regcall_exception;
if (timing == TRUE){
julia_regcall = paste("@time", julia_regcall);
}
# ----------------------------
# ----------------------------
if (print==TRUE){
message(julia_regcall)
}
# Run the regression
try(julia_command(julia_regcall))
#tryCatch(julia_command(julia_regcall),
# error = function(c) "IFE error",
# warning = function(c) "IFE warning",
# message = function(c) "IFE message"
#)
# ----------------------------
# ----------------------------
# BRANCH OUT IF REGRESSION FAILS
augment <- julia_eval("getfield(reg_res, :augmentdf);")
results <- nrow(as.matrix(augment))
if (results <= 1){
if (print==TRUE){
message("IFE regression has failed\n")
}
return(list(statistics = NA,
dt_augment = NA))
}
# ----------------------------
# ----------------------------
# return results in R: coefficient results if there is a rhs
list_tmp <- list() # initialize result is a list
list_tmp$julia_call = julia_regcall;
if (rhs != "0"){
jl_coefficients = julia_eval(paste0("reg_res.coef"))
names(jl_coefficients) = julia_eval(paste0("reg_res.coefnames"))
list_tmp$coefficients = jl_coefficients
list_tmp$lhs = julia_eval(paste0("reg_res.yname"))
list_tmp$julia_call = julia_regcall
list_tmp$se = julia_eval(paste0("stderror(reg_res)"))
list_tmp$tt = julia_eval(paste0("coef(reg_res) ./ stderror(reg_res)"))
list_tmp$pvalues = julia_eval(paste0("coeftable(reg_res).mat[:,4]"))
list_tmp$ci = julia_eval(paste0("confint(reg_res)"))
list_tmp$coefnms = julia_eval(paste0("coefnames(reg_res)"))
list_tmp$nobs = julia_eval(paste0("reg_res.nobs")) # number of observations
list_tmp$r2 = list(r2 = julia_eval(paste0("reg_res.r2")),
r2_adjusted = julia_eval(paste0("reg_res.r2_a")),
r2_within = julia_eval(paste0("reg_res.r2_within")) )
list_tmp$statistics = list(F_stat = NA,
pvalue = list_tmp$pvalues)
# Coeftest class is easy: only need coefficients
Rcoef = matrix(c(jl_coefficients, list_tmp$se, list_tmp$tt, list_tmp$pvalues),
nrow=length(list_tmp$coefnms), ncol=4, byrow = F)
colnames(Rcoef) = c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
rownames(Rcoef) = list_tmp$coefnms
class(Rcoef) = "coeftest"
list_tmp$coeftest = Rcoef
}
coef_list <- list_tmp
# output dataset (that keeps loading and PCs)
# augment <- julia_eval("getfield(reg_res, :augmentdf);")
setDT(augment)
if (!is.null(fe)){
setnames(augment, fe_split, paste0("p", fe_split))
}
augment <- cbind(dt_reg[, ife_split, with=F], augment)
# fill back the missing
augment <- rbind(dt[dt_missing[missing_tag==T], on = "temporary_id_key" ][, ife_split, with=F],
augment, fill = TRUE)
# ----------------------------
# output results (nothing saved yet)
if (print == TRUE){
reg_list <- julia_eval(paste0("reg_res"))
message(reg_list)
}
# ----------------------------
# return both all of the regression and coefficient subset
if (rhs != "0"){
return(list(statistics = coef_list,
summary = coef_list$coeftest,
dt_augment = augment # NB empty is save_res = FALSE
))
} else if (rhs == "0"){
return(list(statistics = coef_list,
dt_augment = augment))
}
} # end of FixedEffectInteract
# ----------------------------------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.