#' @title Converts `lfe::felm()` commands into their `fixest::feols()` equivalents.
#'
#' @description Takes an R script with `lfe::felm()` commands, converts them
#' into their `fixest::feols` equivalents, and then exports the resulting
#' script to disk. Conversion is the only thing it does. Neither the input
#' not output script are run.
#' @param infile An R script containing `lfe::felm()` commands. Required.
#' @param outfile File or connection to write the resulting R script (i.e. with
#' `fixest::feols()` conversion) to. Can be the same as the input script, in
#' which case the the latter will obviously be overwritten. Can also be left
#' blank in which case nothing will be written to disk and the output will
#' simply be printed to screen.
#' @param verbose Logical. Should the result be printed to screen. Defaults to
#' `FALSE` unless `outfile` above is left blank.
#' @param robust Logical. By default, iid errors will be used unless cluster
#' variables have been specified in the `felm()` formula(s). If users would
#' like HC-robust standard errors, they should specify `TRUE`. Will be ignored
#' if the `felm` formula contains cluster variables, since the errors will
#' then default to cluster-robust.
#' @details `lfe::felm()` and `fixest::feols()` provide "fixed-effects"
#' estimation routines for high-dimensional data. Both methods are highly
#' optimised, although `feols()` is newer and tends to be quite a bit faster.
#' The syntax between these two methods is similar, if not quite offering
#' drop-in replacement. This function aims to automate the conversion process;
#' ignoring non-relevant arguments and differing options between the two,
#' while doing its best to ensure that the resulting scripts will produce the
#' same output.
#'
#' Note that the conversion only handles (or attempts to handle) the actual
#' model calls. No attempt is made to convert downstream objects or functions
#' like regression table construction. Although, you will probably be okay if
#' you use a modern table-generating package like `modelsummary`.
#'
#' Other limitations include: (1) The function more or less implements a
#' literal translation of the relevant `felm` model. It doesn't support
#' translation for some of the specialised syntax that `feols()` offers, e.g.
#' multiple estimation and varying slopes. Everything should still work even
#' if the literal translation doesn't yield all of the additional performance
#' boosts and tricks that `feols()` offers. (2) The function assumes that
#' users always provide a dataset in their model calls; i.e. regressions with
#' global variables are not supported. (3) Similarly, models that are
#' constructed programatically (e.g. with `Formula()`) are not supported.
#'
#' @seealso \code{\link[lfe]{felm}}, \code{\link[fixest]{feols}}.
#' @return An R script.
#' @export
#' @examples
#' \dontrun{
#' ## Write a (deliberately messy) lfe script
#' lfe_string = "
#' library(lfe)
#' library(modelsummary)
#'
#' ## Our toy dataset
#' aq = airquality
#' names(aq) = c('y', 'x1', 'x2', 'x3', 'mnth', 'dy')
#'
#' ## Simple OLS (no FEs)
#' mod1 = felm(y ~ x1 + x2, aq)
#'
#' ## Add a FE and cluster variable
#' mod2 = felm(y ~ x1 + x2 |
#' dy |
#' 0 |
#' mnth, aq)
#'
#' ## Add a second cluster variable and some estimation options
#' mod3 = felm(y ~ x1 + x2 |
#' dy |
#' 0 |
#' dy + mnth,
#' cmethod = 'reghdfe',
#' exactDOF = TRUE,
#' aq)
#'
#' ## IV reg with weights
#' mod4 = felm(y ~ 1 |
#' dy |
#' (x1 ~ x3) |
#' mnth,
#' weights = aq$x2,
#' data = aq
#' )
#'
#' ## Regression table
#' mods = list(mod1, mod2, mod3, mod4)
#' msummary(mods, gof_omit = 'Pseudo|Within|Log|IC', output = 'markdown')
#' "
#' writeLines(lfe_string, 'lfe_script.R')
#'
#' ## Covert to fixest equivalents
#' lfe2fixest('lfe_script.R') ## no output file provided, will print to screen
#' lfe2fixest('lfe_script.R', 'fixest_script.R') ## write converted script to disk
#'
#' ## Check equivalence
#'
#' ## First the lfe version
#' source('lfe_script.R', print.eval = TRUE)
#'
#' ## Then the fixest conversion
#' source('fixest_script.R', print.eval = TRUE)
#'
#' ## Clean up
#' file.remove(c('lfe_script.R', 'fixest_script.R'))
#' }
lfe2fixest =
function(infile = NULL, outfile = NULL, verbose = FALSE, robust = FALSE) {
if (is.null(infile)) stop('Input file required.')
lfe_script = readLines(infile)
start_felm_lines = grep('felm', lfe_script)
end_felm_lines = start_felm_lines
sapply(seq_along(end_felm_lines), function(i) {
while(!endsWith(lfe_script[end_felm_lines[i]], ')')) {
end_felm_lines[i] <<- end_felm_lines[i] + 1
}
})
fixest_fmls =
sapply(
seq_along(start_felm_lines),
function(i, ...) {
felm_call = lfe_script[start_felm_lines[i]:end_felm_lines[i]]
felm_call = trimws(gsub('#.*', '', felm_call))
felm_call = paste0(felm_call, collapse = ' ')
felm_call = gsub('\t', '', felm_call)
pref = gsub('felm\\(.*', 'felm\\(', felm_call)
fml = gsub(',.*', '', felm_call)
fml = gsub(pref, '', fml, fixed = TRUE)
suff = gsub(paste0(pref, fml), '', felm_call, fixed = TRUE)
fml_split = strsplit(fml, '\\|')[[1]]
## Account for multiple IV with some very tedious regexp
tildas = grep('~', fml_split)
if (TRUE %in% (tildas > 1)) {
tildas = setdiff(tildas, 1)
if (tildas > 3) {
ne = tildas-3+1 ## adjust for how many endog vars there are
pre_iv = fml_split[1:(tildas-ne)]
iv_part = trimws(paste(fml_split[(tildas-ne+1):tildas], collapse = ' + '))
iv_part = sub(')$', '', iv_part)
if (length(fml_split)>=tildas+1) {
post_iv = fml_split[(tildas+1):length(fml_split)]
} else {
post_iv = NULL
}
fml_split = c(pre_iv, iv_part, post_iv)
rm(pre_iv, iv_part, post_iv)
}
}
main = trimws(fml_split[1])
add_fes = FALSE
add_iv = FALSE
add_cluster = FALSE
if (length(fml_split) >= 2) {
fes = trimws(fml_split[2])
add_fes = fes!='0'
}
if (length(fml_split) >= 3) {
iv = gsub('\\(|\\)', '', trimws(fml_split[3]))
add_iv = iv!='0'
}
if (length(fml_split) >= 4) {
cluster_vars = paste0('~', trimws(fml_split[4]))
add_cluster = TRUE
}
fixest_fml = main
if (add_fes) fixest_fml = paste0(fixest_fml, ' | ', fes)
if (add_iv) fixest_fml = paste0(fixest_fml, ' | ', iv)
fixest_pref = gsub('felm', 'feols', pref)
fixest_suff = suff
## Catch if 'data' arg not specified explicitly
if (!grepl('data', fixest_suff)) {
data_part = gsub('.*,', 'data =', fixest_suff)
} else {
data_part = ''
}
fixest_suff = strsplit(fixest_suff, ',')[[1]]
fixest_suff = paste(trimws(fixest_suff)[grepl('data|subset|weights', fixest_suff)],
collapse = ', ')
fixest_suff = paste(',', fixest_suff, data_part)
if (add_cluster) {
fixest_suff = paste0(', cluster = ', cluster_vars, fixest_suff)
} else if (robust) {
fixest_suff = paste0('se = hetero', fixest_suff)
}
fixest_replacement = paste0(fixest_pref, fixest_fml, trimws(fixest_suff))
return(fixest_replacement)
}
)
felm_lines = sapply(seq_along(start_felm_lines), function (i) {
seq(from = start_felm_lines[i], to = end_felm_lines[i])
})
fixest_script = lfe_script
invisible(sapply(seq_along(felm_lines),
function(i) {
adj = length(lfe_script) - length(fixest_script)
pre = fixest_script[1:(start_felm_lines[i]-1-adj)]
post = fixest_script[(end_felm_lines[i]+1-adj):length(fixest_script)]
if ((end_felm_lines[i]+1-adj) > length(fixest_script)) post = ''
mid = fixest_fmls[i]
fixest_script <<- c(pre, mid, post)
}))
fixest_script = gsub('library(lfe)', 'library(fixest)', fixest_script, fixed = TRUE)
if (is.null(outfile) | verbose) cat(paste(fixest_script, collapse = '\n'))
if (!is.null(outfile)) writeLines(fixest_script, outfile)
}
## Let's add a 'felm2feols' alias
#' @rdname lfe2fixest
#' @examples
#' \dontrun{
#' ## For people that like options, there's the felm2feols() alias...
#' felm2fixest('another_felm_script.R')
#' }
#' @export
felm2feols = lfe2fixest
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.