R/methods.R

Defines functions bread.fixest estfun.fixest df.residual.fixest hatvalues.fixest deviance.fixest sigma.fixest weights.fixest terms.fixest model.matrix.fixest formula.fixest update.fixest_multi update.fixest confint.fixest predict.fixest residuals.fixest fitted.values.fixest coefficients.fixest logLik.fixest BIC.fixest AIC.fixest nobs.fixest pvalue.fixest tstat.fixest se.fixest coeftable.fixest se.matrix pvalue.default tstat.default se.default coeftable.default tstat pvalue se coeftable plot.fixest.fixef fixef.fixest summary.fixest.fixef summary.fixest_list summary.fixest print.fixest

Documented in AIC.fixest BIC.fixest bread.fixest coefficients.fixest coeftable coeftable.default coeftable.fixest confint.fixest deviance.fixest df.residual.fixest estfun.fixest fitted.values.fixest fixef.fixest formula.fixest hatvalues.fixest logLik.fixest model.matrix.fixest nobs.fixest plot.fixest.fixef predict.fixest print.fixest pvalue pvalue.default pvalue.fixest residuals.fixest se se.default se.fixest se.matrix sigma.fixest summary.fixest summary.fixest.fixef summary.fixest_list terms.fixest tstat tstat.default tstat.fixest update.fixest update.fixest_multi weights.fixest

#----------------------------------------------#
# Author: Laurent Berge
# Date creation: Mon Jun 01 08:21:26 2020
# ~: Most methods are here
#----------------------------------------------#


####
#### print/summary ####
####



#' A print facility for `fixest` objects.
#'
#' This function is very similar to usual `summary` functions as it
#' provides the table of coefficients along with other information on the fit of
#' the estimation. The type of output can be customized by the user (using
#' function `setFixest_print`).
#'
#' @method print fixest
#'
#' @param x A `fixest` object. Obtained using the methods
#'   [`femlm`], [`feols`] or [`feglm`].
#' @param n Integer, number of coefficients to display. By default, only the
#'   first 8 coefficients are displayed if `x` does not come from  [`summary.fixest`].
#' @param type Either `"table"` (default) to display the coefficients table
#'   or `"coef"` to display only the coefficients.
#' @param fitstat A formula or a character vector representing which fit
#'   statistic to display. The types must be valid types of the function
#'   [`fitstat`]. The default fit statistics depend on the
#'   type of estimation (OLS, GLM, IV, with/without fixed-effect). Providing the
#'   argument `fitstat` overrides the default fit statistics, you can
#'   however use the point "." to summon them back. Ex 1: `fitstat = ~ . + ll` adds the log-likelihood
#'   to the default values. Ex 2: `fitstat = ~ ll + pr2` only displays the log-likelihood and the pseudo-R2.
#' @param ... Other arguments to be passed to [`vcov.fixest`].
#'
#' @details It is possible to set the default values for the arguments
#' `type` and `fitstat` by using the function `setFixest_print`.
#'
#' @seealso See also the main estimation functions [`femlm`],
#' [`feols`] or [`feglm`]. Use
#' [`summary.fixest`] to see the results with the appropriate
#' standard-errors, [`fixef.fixest`] to extract the
#' fixed-effects coefficients, and the function [`etable`] to
#' visualize the results of multiple estimations.
#'
#' @author Laurent Berge
#'
#' @examples
#'
#' # Load trade data
#' data(trade)
#'
#' # We estimate the effect of distance on trade
#' #   => we account for 3 fixed-effects (FEs)
#' est_pois = fepois(Euros ~ log(dist_km)|Origin+Destination+Product, trade)
#'
#' # displaying the results
#' #  (by default SEs are clustered if FEs are used)
#' print(est_pois)
#'
#' # By default the coefficient table is displayed.
#' #  If the user wished to display only the coefficents, use option type:
#' print(est_pois, type = "coef")
#'
#' # To permanently display coef. only, use setFixest_print:
#' setFixest_print(type = "coef")
#' est_pois
#' # back to default:
#' setFixest_print(type = "table")
#'
#' #
#' # fitstat
#' #
#'
#' # We modify which fit statistic to display
#' print(est_pois, fitstat = ~ . + lr)
#'
#' # We add the LR test to the default (represented by the ".")
#'
#' # to show only the LR stat:
#' print(est_pois, fitstat = ~ . + lr.stat)
#'
#' # To modify the defaults:
#' setFixest_print(fitstat = ~ . + lr.stat + rmse)
#' est_pois
#'
#' # Back to default (NULL == default)
#' setFixest_print(fitstat = NULL)
#'
#'
print.fixest = function(x, n, type = "table", fitstat = NULL, ...){
  
  if(inherits(x, "dummy_print")){
    # This is a hack to avoid print when lags are created in data.tables
    # ex: pdat_dt[, x1_l1 := d(x1)]
    return(invisible())
  }
  
  # checking the arguments
  if(is_user_level_call()){
    validate_dots(suggest_args = c("n", "type", "vcov"),
                  valid_args = c("vcov", "se", "cluster", "ssc", "forceCovariance", "keepBounded"))
  }

  # The objects from the estimation and the summary are identical, except regarding the vcov
  from_summary = isTRUE(x$summary)

  if(!missnull(fitstat)){
    fitstat = fitstat_validate(fitstat, TRUE)
  }

  # User options
  set_defaults("fixest_print")

  # if NOT from summary, we consider the argument 'type'
  if(!from_summary){
    # checking argument type
    check_set_arg(type, "match(coef, table)")

    if(type == "coef"){
      print(coef(x))
      return(invisible())
    }
  }

  isNegbin = x$method == "fenegbin" || (x$method_type == "feNmlm" && x$family == "negbin")

  x = summary(x, fromPrint = TRUE, ...)

  check_arg(n, "integer scalar GE{1}")

  msgRemaining = ""
  nb_coef = length(coef(x)) - isNegbin
  if(missing(n) && is.null(x$n_print)){
    if(from_summary && !isTRUE(x$summary_from_fit)){
      n = Inf
    } else {
      if(nb_coef <= 15){
        n = 15
      } else {
        n = 8
        msgRemaining = sma("... {nb_coef - n} coefficients remaining (display them with summary() or use argument n)\n")
      }
    }

  } else {
    if(!is.null(x$n_print)) n = x$n_print

    if(n < nb_coef){
      msgRemaining = sma("... {nb_coef - n} coefficients remaining\n")
    }
  }

  # We also add the collinearity message
  collinearity_msg = ""
  if(!is.null(x$collin.var)){
    n_collin = length(x$collin.var)
    collinearity_msg = sma("... {n_collin} variable{#s, were} removed because of collinearity ({enum.3 ? x$collin.var}{&n_collin>3; [full set in $collin.var]})\n")
    if(isTRUE(x$iv) && any(grepl("^fit_", x$collin.var))){
      if(!any(grepl("^fit_", names(x$coefficients)))){
        iv_msg = "NOTE: all endogenous regressors were removed.\n"
      } else {
        n_rm = sum(grepl("^fit_", x$collin.var))
        iv_msg = sma("Important note: {N?n_rm} endogenous regressor{#s, were} removed => IV estimation not valid.\n")
      }

      collinearity_msg = paste0(collinearity_msg, iv_msg)
    }
  }

  if(isFALSE(x$convStatus)){
    last_warn = getOption("fixest_last_warning")
    if(is.null(last_warn) || (proc.time() - last_warn)[3] > 1){
      if(x$method_type == "feNmlm"){
        warning("The optimization algorithm did not converge, the results are not reliable. (", x$message, ")", call. = FALSE)
      } else if(x$method_type == "feols"){
        warning("The demeaning algorithm did not converge, the results are not reliable. (", x$message, ")", call. = FALSE)
      } else {
        warning("The GLM algorithm did not converge, the results are not reliable. (", x$message, ")", call. = FALSE)
      }
    }

  }

  coeftable = x$coeftable

  # The type of SE
  se.type = attr(coeftable, "type")
  if(is.null(se.type)) se.type = "Custom"

  if(x$method_type == "feNmlm"){
    family_format = c(poisson = "Poisson", negbin = "Negative Binomial", 
                      logit = "Logit", gaussian = "Gaussian")
    msg = ifelse(is.null(x$call$NL.fml), "", "Non-linear ")
    half_line = paste0(msg, "ML estimation, family = ", family_format[x$family])
    
  } else if(x$method %in% c("feglm", "feglm.fit")) {
    fam_call = x$call$family
    if(is.null(names(fam_call))){
      half_line = paste0("GLM estimation, family = ", x$family$family)
    } else {
      half_line = paste0("GLM estimation, family = ", deparse_long(fam_call))
    }
  } else if(x$method == "fepois") {
    half_line = "Poisson estimation"
  } else if(x$method == "fenegbin") {
    half_line = "Negative Binomial ML estimation"
  } else {
    half_line = "OLS estimation"
  }

  if(isTRUE(x$iv)){
    first_line = sma("TSLS estimation - Dep. Var.: {as.character(x$fml)[[2]]}\n",
                     "                  Endo.    : {', 'c ? get_vars(x$iv_endo_fml)}\n",
                     "                  Instr.   : {', 'c ? x$iv_inst_names}\n")
    second_line = sma("{Nth.upper ? x$iv_stage} stage: Dep. Var.: ", as.character(x$fml)[[2]], "\n")
    catma(first_line, second_line)
  } else {
    depvar_name = as.character(x$fml)[[2]]
    sep_value = if(nchar(half_line) + nchar(depvar_name) <= 60) ", " else "\n"
    cat(half_line, sep_value, "Dep. Var.: ", depvar_name, "\n", sep="")
  }

  catma("Observations: {n ? x$nobs}\n")

  extra_info = c("subset", "sample", "offset", "weights")
  for(i in seq_along(extra_info)){
    nm = extra_info[i]
    if(nm %in% names(x$model_info)){
      xtra = x$model_info[[extra_info[i]]]

      if(length(xtra) == 1){
        catma("{upper.first ? nm}: {xtra}\n")
      } else {
        if(xtra$value != "Full sample"){
          catma("{upper.first ? nm} ({xtra$var}): {xtra$value}\n")
        }
      }
    }
  }

  if(!is.null(x$fixef_terms)){
    terms_full = extract_fe_slope(x$fixef_terms)
    fixef_vars = terms_full$fixef_vars

    if(length(fixef_vars) > 0){
      catma("Fixed-effects: {',  'c ! {fixef_vars}: {n ? x$fixef_sizes[fixef_vars]}}\n")
    }

    catma("Varying slopes: {',  'c ! {terms_full$slope_vars} ({terms_full$slope_fe}): {n ? x$fixef_sizes[terms_full$slope_fe]}}\n")

  } else {
    if(!is.null(x$fixef_sizes)){
      catma("Fixed-effects: {',  'c ! {x$fixef_vars}: {n ? x$fixef_sizes}}\n")
    } 
  }


  if(is.null(x$onlyFixef)){

    cat("Standard-errors:", se.type, "\n")

    last_line = paste0(msgRemaining, collinearity_msg)

    # The matrix of coefficients
    if(isNegbin){
      if(nrow(coeftable) == 2){
        new_table = coeftable[1, , drop = FALSE]
      } else {
        new_table = coeftable[-nrow(coeftable), ]
      }

      print_coeftable(head(new_table, n), lastLine = last_line)

      theta = coeftable[".theta", 1]
      noDispInfo = ifelse(theta > 1000, "(theta >> 0, no sign of overdispersion, you may consider a Poisson model)", "")
      cat("Over-dispersion parameter: theta =", theta, noDispInfo, "\n")
    } else {
      print_coeftable(head(coeftable, n), lastLine = last_line)
    }
  }

  if(isTRUE(x$NA_model)){
    return(invisible())
  }

  if(!is.null(fitstat) && identical(fitstat, NA)){
    # No fitstat

  } else {

    if(is.null(fitstat) || "." %in% fitstat){
      if(x$method_type == "feols"){
        default_fit = c("rmse", "ar2")

        if(!is.null(x$fixef_sizes) && is.null(x$onlyFixef)){
          default_fit = c(default_fit, "wr2")
        }

        if(isTRUE(x$iv)){
          default_fit = c(default_fit, "ivf1", "wh", "sargan")
        }

      } else {
        default_fit = c("ll", "apr2", "bic", "cor2")
      }

      if("." %in% fitstat){
        fitstat = setdiff(c(default_fit, fitstat), ".")
      } else {
        fitstat = default_fit
      }
    }

    print(fitstat(x, fitstat), na.rm = TRUE, group.solo = TRUE)
  }

  if(isFALSE(x$convStatus)){
    iter_format = x$iterations
    if(length(iter_format)== 1){
      iter_format = paste0("lhs: ", iter_format)
    } else {
      n_iter = length(iter_format)
      iter_format = sma("lhs: {iter_format[n_iter]}, rhs: {', 'c ? head(iter_format, min(n_iter - 1, n))}")
    }
    cat("# Evaluations:", iter_format, "--", x$message, "\n")
  }

}

##

#' Summary of a `fixest` object. Computes different types of standard errors.
#'
#' This function is similar to `print.fixest`. It provides the table of coefficients along with 
#' other information on the fit of the estimation. It can compute different types of standard 
#' errors. The new variance covariance matrix is an object returned.
#'
#' @inheritParams feNmlm
#' @inheritParams aggregate.fixest
#'
#' @method summary fixest
#' @param vcov Versatile argument to specify the VCOV. In general, it is either a character 
#' scalar equal to a VCOV type, either a formula of the form: `vcov_type ~ variables`. The 
#' VCOV types implemented are: "iid", "hetero" (or "HC1"), "cluster", "twoway", 
#' "NW" (or "newey_west"), "DK" (or "driscoll_kraay"), and "conley". It also accepts 
#' object from [`vcov_cluster`], [`vcov_NW`][fixest::vcov_hac], [`NW`][fixest::vcov_hac], 
#' [`vcov_DK`][fixest::vcov_hac], [`DK`][fixest::vcov_hac], [`vcov_conley`] and 
#' [`conley`][fixest::vcov_conley]. It also accepts covariance matrices computed externally. 
#' Finally it accepts functions to compute the covariances. See the `vcov` documentation 
#' in the [vignette](https://lrberge.github.io/fixest/articles/fixest_walkthrough.html#the-vcov-argument-1).
#' @param se Character scalar. Which kind of standard error should be computed: 
#' \dQuote{standard}, \dQuote{hetero}, \dQuote{cluster}, \dQuote{twoway}, \dQuote{threeway} 
#' or \dQuote{fourway}? By default if there are clusters in the estimation: 
#' `se = "cluster"`, otherwise `se = "iid"`. Note that this argument is deprecated, 
#' you should use `vcov` instead.
#' @param cluster Tells how to cluster the standard-errors (if clustering is requested). 
#' Can be either a list of vectors, a character vector of variable names, a formula or 
#' an integer vector. Assume we want to perform 2-way clustering over `var1` and `var2` 
#' contained in the data.frame `base` used for the estimation. All the following 
#' `cluster` arguments are valid and do the same thing: 
#' `cluster = base[, c("var1", "var2")]`, `cluster = c("var1", "var2")`, `cluster = ~var1+var2`. 
#' If the two variables were used as fixed-effects in the estimation, you can leave it 
#' blank with `vcov = "twoway"` (assuming `var1` \[resp. `var2`\] was 
#' the 1st \[resp. 2nd\] fixed-effect). You can interact two variables using `^` with 
#' the following syntax: `cluster = ~var1^var2` or `cluster = "var1^var2"`.
#' @param stage Can be equal to `2` (default), `1`, `1:2` or `2:1`. Only used if the object 
#' is an IV estimation: defines the stage to which `summary` should be applied. If `stage = 1` 
#' and there are multiple endogenous regressors or if `stage` is of length 2, then an 
#' object of class `fixest_multi` is returned.
#' @param object A `fixest` object. Obtained using the functions [`femlm`], [`feols`] or [`feglm`].
#' @param ssc An object of class `ssc.type` obtained with the function [`ssc`]. Represents 
#' how the degree of freedom correction should be done.You must use the function [`ssc`] 
#' for this argument. The arguments and defaults of the function [`ssc`] are: 
#' `adj = TRUE`, `fixef.K="nested"`, `cluster.adj = TRUE`, `cluster.df = "min"`, 
#' `t.df = "min"`, `fixef.force_exact=FALSE)`. See the help of the function [`ssc`] for details.
#' @param .vcov A user provided covariance matrix or a function computing this matrix. If a 
#' matrix, it must be a square matrix of the same number of rows as the number 
#' of variables estimated. If a function, it must return the previously mentioned matrix.
#' @param lean Logical, default is `FALSE`. Used to reduce the (memory) size of the summary object.
#'  If `TRUE`, then all objects of length N (the number of observations) are removed 
#' from the result. Note that some `fixest` methods may consequently not work when applied 
#' to the summary.
#' @param forceCovariance (Advanced users.) Logical, default is `FALSE`. In the peculiar case 
#' where the obtained Hessian is not invertible (usually because of collinearity of 
#' some variables), use this option to force the covariance matrix, by using a generalized 
#' inverse of the Hessian. This can be useful to spot where possible problems come from.
#' @param keepBounded (Advanced users -- `feNmlm` with non-linear part and bounded 
#' coefficients only.) Logical, default is `FALSE`. If `TRUE`, then the bounded coefficients 
#' (if any) are treated as unrestricted coefficients and their S.E. is computed (otherwise 
#' it is not).
#' @param vcov_fix Logical scalar, default is `TRUE`. If the VCOV ends up not being 
#' positive definite, whether to "fix" it using an eigenvalue decomposition 
#' (a la Cameron, Gelbach & Miller 2011).
#' @param n Integer, default is 1000. Number of coefficients to display when the print method 
#' is used.
#' @param ... Only used if the argument `.vcov` is provided and is a function: extra arguments 
#' to be passed to that function.
#'
#' @section Compatibility with \pkg{sandwich} package:
#' The VCOVs from `sandwich` can be used with `feols`, `feglm` and `fepois` estimations. 
#' If you want to have a `sandwich` VCOV when using `summary.fixest`, you can use 
#' the argument `vcov` to specify the VCOV function to use (see examples).
#' Note that if you do so and you use a formula in the `cluster` argument, an innocuous 
#' warning can pop up if you used several non-numeric fixed-effects in the estimation 
#' (this is due to the function [`expand.model.frame`] used in `sandwich`).
#'
#' @return
#' It returns a `fixest` object with:
#' \item{cov.scaled}{The new variance-covariance matrix (computed according to the argument `se`).}
#' \item{se}{The new standard-errors (computed according to the argument `se`).}
#' \item{coeftable}{The table of coefficients with the new standard errors.}
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. 
#' Use [`fixef.fixest`] to extract the fixed-effects coefficients, and the function [`etable`] 
#' to visualize the results of multiple estimations.
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' # Load trade data
#' data(trade)
#'
#' # We estimate the effect of distance on trade (with 3 fixed-effects)
#' est_pois = fepois(Euros ~ log(dist_km)|Origin+Destination+Product, trade)
#'
#' # Comparing different types of standard errors
#' sum_standard = summary(est_pois, vcov = "iid")
#' sum_hetero   = summary(est_pois, vcov = "hetero")
#' sum_oneway   = summary(est_pois, vcov = "cluster")
#' sum_twoway   = summary(est_pois, vcov = "twoway")
#'
#' etable(sum_standard, sum_hetero, sum_oneway, sum_twoway)
#'
#' # Alternative ways to cluster the SE:
#' summary(est_pois, vcov = cluster ~ Product + Origin)
#' summary(est_pois, vcov = ~Product + Origin)
#' summary(est_pois, cluster = ~Product + Origin)
#'
#' # You can interact the clustering variables "live" using the var1 ^ var2 syntax.#'
#' summary(est_pois, vcov = ~Destination^Product)
#'
#' #
#' # Newey-West and Driscoll-Kraay SEs
#' #
#'
#' data(base_did)
#' # Simple estimation on a panel
#' est = feols(y ~ x1, base_did)
#'
#' # --
#' # Newey-West
#' # Use the syntax NW ~ unit + time
#' summary(est, NW ~ id + period)
#'
#' # Now take a lag of 3:
#' summary(est, NW(3) ~ id + period)
#'
#' # --
#' # Driscoll-Kraay
#' # Use the syntax DK ~ time
#' summary(est, DK ~ period)
#'
#' # Now take a lag of 3:
#' summary(est, DK(3) ~ period)
#'
#' #--
#' # Implicit deductions
#' # When the estimation is done with a panel.id, you don't need to
#' # specify these values.
#'
#' est_panel = feols(y ~ x1, base_did, panel.id = ~id + period)
#'
#' # Both methods, NM and DK, now work automatically
#' summary(est_panel, "NW")
#' summary(est_panel, "DK")
#'
#' #
#' # VCOVs robust to spatial correlation
#' #
#'
#' data(quakes)
#' est_geo = feols(depth ~ mag, quakes)
#'
#' # --
#' # Conley
#' # Use the syntax: conley(cutoff) ~ lat + lon
#' # with lat/lon the latitude/longitude variable names in the data set
#' summary(est_geo, conley(100) ~ lat + long)
#'
#' # Change the cutoff, and how the distance is computed
#' summary(est_geo, conley(200, distance = "spherical") ~ lat + long)
#'
#' # --
#' # Implicit deduction
#' # By default the latitude and longitude are directly fetched in the data based
#' # on pattern matching. So you don't have to specify them.
#' # Further an automatic cutoff is computed by default.
#'
#' # The following works
#' summary(est_geo, "conley")
#'
#'
#'
#' #
#' # Compatibility with sandwich
#' #
#'
#' # You can use the VCOVs from sandwich by using the argument .vcov:
#' library(sandwich)
#' summary(est_pois, .vcov = vcovCL, cluster = trade[, c("Destination", "Product")])
#'
#'
summary.fixest = function(object, vcov = NULL, cluster = NULL, ssc = NULL, .vcov = NULL,
                          stage = NULL, lean = FALSE, agg = NULL, forceCovariance = FALSE,
                          se = NULL, keepBounded = FALSE, n = 1000, vcov_fix = TRUE,
                          nthreads = getFixest_nthreads(), ...){

  # computes the clustered SEs and returns the modified vcov and coeftable
  # NOTA: if the object is already a summary

  if(isTRUE(object$onlyFixef) || isTRUE(object$NA_model)){
    # means that the estimation is done without variables
    return(object)
  }

  mc = match.call()

  dots = list(...)

  check_arg(n, "integer scalar GE{1}")
  if(!missing(n)){
    object$n_print = n
  }

  # we need this to save the summary flags
  # All three arguments se+cluster+.vcov are formatted into a valid vcov arg.
  vcov_in = vcov = oldargs_to_vcov(se, cluster, vcov, .vcov)

  check_arg(lean, "logical scalar")
  check_arg(stage, "NULL integer vector no na len(,2) GE{1} LE{2}")

  skip_vcov = FALSE
  if(isTRUE(object$summary)){

    if("fromPrint" %in% names(dots)){
      # From print
      return(object)

    } else if(is.null(vcov) && is.null(ssc)){
      # We return directly the object ONLY if not any other argument has been passed

      skip_vcov = TRUE
      if(is.null(agg) && is.null(stage)){
        if(!lean || (lean && isTRUE(object$lean))){
          # No modification required
          object$summary_from_fit = FALSE
          return(object)
        }
      }
    }

    assign_flags(object$summary_flags, vcov = vcov, ssc = ssc, agg = agg)
  }

  # Checking arguments in ...
  if(is_user_level_call()){
    if(!is_function_in_it(vcov)){
      validate_dots(suggest_args = c("se", "cluster", "ssc"), valid_args = "dof")
    }
  }

  if(is.null(stage)) stage = 2


  # IV
  if(isTRUE(object$iv) && !isTRUE(dots$iv)){
    stage = unique(stage)
    res = list()

    # if lean, we still compute the summary for the first stage,
    #  then we will inject it in the iv_first_stage object of the 2nd stage
    # => this is critical to get the right Wald stat (of the 1st stage),
    #  otherwise it won't be possible to get it.
    remove_stage_1 = FALSE
    if(lean && !1 %in% stage){
      remove_stage_1 = TRUE
      stage = 1:2
    }

    stage_names = c()

    for(s in seq_along(stage)){
      if(stage[s] == 1){
        for(i in seq_along(object$iv_first_stage)){
          res[[length(res) + 1]] = summary(object$iv_first_stage[[i]],
                                           vcov = vcov, ssc = ssc, lean = lean,
                                           forceCovariance = forceCovariance,
                                           vcov_fix = vcov_fix,
                                           n = n, nthreads = nthreads, iv = TRUE)

          stage_names[length(stage_names) + 1] = paste0("First stage: ", 
                                                        names(object$iv_first_stage)[i])
        }

      } else {
        # We keep the information on clustering => matters for wald tests of 1st stage
        my_res = summary(object, vcov = vcov, ssc = ssc, lean = lean,
                         forceCovariance = forceCovariance, vcov_fix = vcov_fix,
                         n = n, nthreads = nthreads, iv = TRUE)

        res[[length(res) + 1]] = my_res
        stage_names[length(stage_names) + 1] = "Second stage"
      }
    }

    if(lean && 2 %in% stage){
      # we inject the summary of the first stage into the iv_first_stage
      qui_1st = which(grepl("^First", stage_names))
      qui_2nd = which(stage_names == "Second stage")

      tmp_1st = res[qui_1st]
      names(tmp_1st) = names(object$iv_first_stage)

      res[[qui_2nd]][["iv_first_stage"]] = tmp_1st
    }

    if(remove_stage_1){
      qui_2nd = which(stage_names == "Second stage")
      return(res[[qui_2nd]])
    }

    if(length(res) == 1){
      return(res[[1]])
    }

    values = list("iv" = stage_names)
    res_multi = setup_multi(res, values)
    attr(res_multi, "print_request") = "long"

    return(res_multi)
  }


  # The new VCOV
  if(skip_vcov){
    vcov = object$cov.scaled

  } else {
    vcov = vcov.fixest(object, vcov = vcov, ssc = ssc, forceCovariance = forceCovariance,
                       vcov_fix = vcov_fix,
                       keepBounded = keepBounded, nthreads = nthreads,
                       attr = TRUE, se = se, cluster = cluster, ...)
  }

  # NOTA:
  # I need to add se and cluster even if they're not needed only to ensure it
  # works fine when vcov is a function and cluster/se are arguments

  sd2 = diag(vcov)
  sd2[sd2 < 0] = NA
  se = sqrt(sd2)

  # used to handle the case of bounded parameters
  params = names(object$coefficients)
  if(length(se) != length(params)){
    se = se[params]
  }
  names(se) = params

  # The coeftable is modified accordingly
  coeftable = object$coeftable

  # th z & p values
  zvalue = object$coefficients/se
  pvalue = fixest_pvalue(object, zvalue, vcov)

  # update of se if bounded
  se_format = se
  isBounded = object$isBounded
  if(!is.null(isBounded) && any(isBounded)){
    if(!keepBounded){
      se_format[!isBounded] = decimalFormat(se_format[!isBounded])
      se_format[isBounded] = attr(isBounded, "type")
    }
  }

  # modifs of the table
  coeftable = cbind("Estimate" = object$coefficients, "Std. Error" = se_format,
                    "t value" = zvalue, "Pr(>|t|)" = pvalue)
  if(object$method != "feols"){
    colnames(coeftable) = colnames(object$coeftable)
  }

  attr(coeftable, "type") = attr(se, "type") = attr(vcov, "type")

  object$cov.scaled = vcov
  object$coeftable = coeftable
  object$se = se

  if(lean){
    var2clean = c("fixef_id", "data", "residuals", "fitted.values", "scores", "sumFE",
                  "slope_variables_reordered", "y", "weights", "irls_weights",
                  "obs_selection", "iv_residuals", "fitted.values_demean",
                  "working_residuals", "linear.predictors")

    object[var2clean] = NULL

    object$lean = TRUE

    # We also clean the environments from the formulas
    for(i in seq_along(object$fml_all)){
      attr(object$fml_all[[i]], ".Environment") = .GlobalEnv
    }
    attr(object$fml, ".Environment") = .GlobalEnv

  }

  object$summary = TRUE

  # We save the arguments used to construct the summary
  if("summary_flags" %in% names(dots)){
    # If here => this is a call from an estimation (=fit)
    object$summary_flags = dots$summary_flags
    object$summary_from_fit = TRUE
  } else {
    # build_flags does not accept missing arguments
    if(missing(ssc)) ssc = NULL

    if(lean){

      size_KB = as.numeric(object.size(vcov)) / 8 / 1000

      if(size_KB > 100){
        # Here => means the user has manually provided a cluster => will be of size N at least
        # To respect lean = TRUE we keep no memory of this choice
        vcov_in = NULL
      }

    }

    object$summary_flags = build_flags(mc, vcov = vcov_in, ssc = ssc)
    object$summary_from_fit = NULL
  }

  # agg
  if(!missnull(agg)){
    agg_result = aggregate(object, agg, full = TRUE, from_summary = TRUE)
    object$coeftable = agg_result$coeftable
    object$model_matrix_info = agg_result$model_matrix_info
    object$is_agg = TRUE
  }

  return(object)
}


#' @rdname summary.fixest
summary.fixest_list = function(object, se, cluster, ssc = getFixest_ssc(), .vcov, 
                               stage = 2, lean = FALSE, n, ...){

  dots = list(...)

  res = list()
  for(i in seq_along(object)){
    my_res = summary(object[[i]], se = se, cluster = cluster, ssc = ssc, 
                     .vcov = .vcov, stage = stage, lean = lean, n = n)

    # we unroll in case of IV
    if("fixest_multi" %in% class(my_res)){
      for(j in seq_along(my_res)){
        res[[length(res) + 1]] = my_res[[j]]
      }
    } else {
      res[[length(res) + 1]] = my_res
    }
  }

  # We return a simple list
  class(res) = NULL

  res
}

#' Summary method for fixed-effects coefficients
#'
#' This function summarizes the main characteristics of the fixed-effects coefficients. 
#' It shows the number of fixed-effects that have been set as references and the first 
#' elements of the fixed-effects.
#'
#' @method summary fixest.fixef
#'
#' @param object An object returned by the function [`fixef.fixest`].
#' @param n Positive integer, defaults to 5. The `n` first fixed-effects for each 
#' fixed-effect dimension are reported.
#' @param ... Not currently used.
#'
#' @return
#' It prints the number of fixed-effect coefficients per fixed-effect dimension, as well as 
#' the number of fixed-effects used as references for each dimension, and the mean and variance 
#' of the fixed-effect coefficients. Finally, it reports the first 5 (arg. `n`) elements of 
#' each fixed-effect.
#'
#' @author
#' Laurent Berge
#'
#' @seealso
#' [`femlm`], [`fixef.fixest`], [`plot.fixest.fixef`].
#'
#' @examples
#'
#' data(trade)
#'
#' # We estimate the effect of distance on trade
#' # => we account for 3 fixed-effects effects
#' est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination+Product, trade)
#'
#' # obtaining the fixed-effects coefficients
#' fe_trade = fixef(est_pois)
#'
#' # printing some summary information on the fixed-effects coefficients:
#' summary(fe_trade)
#'
#'
summary.fixest.fixef = function(object, n = 5, ...){
  # This function shows some generic information on the fixed-effect coefficients

  # checking arguments in dots
  if(is_user_level_call()){
    validate_dots(suggest_args = "n")
  }

  Q = length(object)
  fixef_names = names(object)
  slope_flag = grepl("\\[", fixef_names)
  fe = gsub("\\[.+", "", fixef_names)
  slope = gsub(".+\\[|\\].+", "", fixef_names)

  isSlope = any(slope_flag)
  isFE = any(!slope_flag)
  info = as.character(10*isFE + isSlope)

  # we rework the names
  fixef_names[slope_flag] = paste0(slope[slope_flag], " (slopes: ", fe[slope_flag], ")")

  isRegular = TRUE
  if(Q > 1){
    nb_ref = attr(object, "references")
    nb_per_cluster = sapply(object, length)
    mean_per_cluster = sd_per_cluster = c()
    for(i in 1:Q){
      mean_per_cluster[i] = as.character(signif(mean(object[[i]]), 3))
      sd_per_cluster[i] = as.character(signif(sd(object[[i]]), 3))
    }
    res = as.data.frame(rbind(nb_per_cluster, nb_ref, mean_per_cluster, sd_per_cluster))

    row_1 = paste0("Number of ", switch(info, 
                                        "11" = "fixed-effects/slopes", 
                                        "10"="fixed-effects", 
                                        "1"="slopes"))

    rownames(res) = c(row_1, "Number of references", "Mean", "Standard-deviation")

    colnames(res) = fixef_names

    if(sum(nb_ref) > Q-1){
      isRegular = FALSE
    }
  }

  # The message

  my_title = paste0(switch(info, 
                           "11" = "Fixed-effects/Slope", 
                           "10"="Fixed_effects", 
                           "1"="Slope"), 
                    " coefficients\n")
  cat(my_title)
  if(Q == 1){
    x1 = object[[1]]
    if(slope_flag){
      catma("Number of slope coefficients for variable {slope} (slope: {fe}) is {len ? x1}.\n")
    } else {
      catma("Number of fixed-effects for variable {fixef_names} is {len ? x1}.\n")
    }

    catma("\tMean = ", signif(mean(x1), 3), "\tVariance = ", signif(var(x1), 3), "\n")
  } else {
    print(res)
  }

  # We print the first 5 elements of each fixed-effect
  cat("\nCOEFFICIENTS:\n")
  for(i in 1:Q){
    m = head(object[[i]], n)

    m_char = as.data.frame(t(as.data.frame(c("", as.character(signif(m, 4))))))
    names(m_char) = c(paste0(fixef_names[i], ":"), names(m))
    rownames(m_char) = " "

    n_cluster = length(object[[i]])
    if(n_cluster > n){
      m_char[["   "]] = sma("... {n ? n_cluster - n} remaining")
    }

    print(m_char)
    if(i != Q) cat("-----\n")
  }

}

####
#### fixef ####
####


#' Extract the Fixed-Effects from a `fixest` estimation.
#'
#' This function retrieves the fixed effects from a `fixest` estimation. It is useful only 
#' when there are one or more fixed-effect dimensions.
#'
#' @inheritParams feNmlm
#'
#' @param object A `fixest` estimation (e.g. obtained using [`feols`] or [`feglm`]).
#' @param notes Logical. Whether to display a note when the fixed-effects coefficients are 
#' not regular.
#' @param sorted Logical, default is `TRUE`. Whether to order the fixed-effects by their names. 
#' If `FALSE`, then the order used in the demeaning algorithm is used.
#'
#' @details
#' If the fixed-effect coefficients are not regular, then several reference points need to 
#' be set: this means that the fixed-effects coefficients cannot be directly interpreted. 
#' If this is the case, then a warning is raised.
#'
#' @return
#' A list containing the vectors of the fixed effects.
#'
#' If there is more than 1 fixed-effect, then the attribute \dQuote{references} is created. 
#' This is a vector of length the number of fixed-effects, each element contains the number 
#' of coefficients set as references. By construction, the elements of the first 
#' fixed-effect dimension are never set as references. In the presence of regular 
#' fixed-effects, there should be Q-1 references (with Q the number of fixed-effects).
#'
#' @seealso
#' [`plot.fixest.fixef`]. See also the main estimation functions [`femlm`], [`feols`] 
#' or [`feglm`]. Use [`summary.fixest`] to see the results with the appropriate 
#' standard-errors, [`fixef.fixest`] to extract the fixed-effect coefficients, and 
#' the function [`etable`] to visualize the results of multiple estimations.
#'
#' @author
#' Laurent Berge
#'
#'
#' @examples
#'
#' data(trade)
#'
#' # We estimate the effect of distance on trade => we account for 3 fixed-effects
#' est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination+Product, trade)
#'
#' # Obtaining the fixed-effects coefficients:
#' fe_trade = fixef(est_pois)
#'
#' # The fixed-effects of the first fixed-effect dimension:
#' head(fe_trade$Origin)
#'
#' # Summary information:
#' summary(fe_trade)
#'
#' # Plotting them:
#' plot(fe_trade)
#'
fixef.fixest = function(object, notes = getFixest_notes(), sorted = TRUE, 
                        nthreads = getFixest_nthreads(), 
                        fixef.tol = 1e-5, fixef.iter = 10000, ...){

  # object is a fixest object
  # This function retrieves the dummies

  check_arg(notes, sorted, "logical scalar")

  # Checking the arguments
  if(is_user_level_call()){
    validate_dots()
  }

  check_value(fixef.tol, "numeric scalar GT{0} LT{1}")
  check_value(fixef.iter, "strict integer scalar GT{0}")

  if(isTRUE(object$lean)){
    # LATER: recompute the FEs by extracting them from the data
    stop("Fixed-effects from 'lean' fixest objects cannot be extracted. Please re-estimate with 'lean = FALSE'.")
  }

  # Preliminary stuff
  S = object$sumFE

  if(is.null(S)){
    stop("The estimation was done without fixed-effects (FE). The FE coefficients cannot be retrieved.")
  }

  family = object$family
  fixef_names = object$fixef_vars
  
  fixef_algo = object$fixef.algo

  fixef_id = object$fixef_id

  Q = length(fixef_id)
  N = length(S)

  # either (we need to clean its attributes for unlist to be efficient)
  id_dummies_vect = list()
  for(i in 1:Q) id_dummies_vect[[i]] = as.vector(fixef_id[[i]])

  is_ref_approx = FALSE
  isSlope = FALSE
  if(!is.null(object$fixef_terms)){
    isSlope = TRUE
    # This is an estimation with slopes
    # we apply another method => we use the demeaning function

    slope_variables = object$slope_variables_reordered
    slope_flag = object$slope_flag_reordered

    new_order = object$fe.reorder
    fixef_vars = object$fixef_vars[new_order]
    fixef_sizes = as.integer(object$fixef_sizes[new_order])

    # We reconstruct the terms
    fixef_terms = c()
    start = c(0, cumsum(abs(slope_flag)))
    for(i in seq_along(slope_flag)){
      sf = slope_flag[i]
      if(sf >= 0){
        fixef_terms = c(fixef_terms, fixef_vars[i])
      }

      if(abs(sf) > 0){
        fixef_terms = c(fixef_terms, paste0(fixef_vars[i], "[[", names(slope_variables)[start[i] + 1:abs(sf)], "]]"))
      }
    }

    fe_id_list = object$fixef_id[new_order]

    #
    # STEP 2: demeaning
    #

    nthreads = check_set_nthreads(nthreads)

    table_id_I = as.integer(unlist(lapply(fe_id_list, tabulate), use.names = FALSE))

    S_demean = cpp_demean(y = S, X_raw = 0, r_weights = 0, iterMax = as.integer(fixef.iter),
                          diffMax = fixef.tol, r_nb_id_Q = fixef_sizes,
                          fe_id_list = fe_id_list, table_id_I = table_id_I,
                          slope_flag_Q = slope_flag, slope_vars_list = slope_variables,
                          r_init = 0, nthreads = nthreads, 
                          algo_extraProj = fixef_algo$extraProj, 
                          algo_iter_warmup = fixef_algo$iter_warmup, 
                          algo_iter_projAfterAcc = fixef_algo$iter_projAfterAcc, 
                          algo_iter_grandAcc = fixef_algo$iter_grandAcc, 
                          save_fixef = TRUE)

    fixef_coef = S_demean$fixef_coef

    names(fixef_sizes) = fixef_vars

    fe_all = c()
    for(i in seq_along(slope_flag)){
      fe_all = c(fe_all, rep(fixef_vars[i], 1 + abs(slope_flag[i]) - (slope_flag[i] < 0)))
    }

    start = 1
    i = 1
    fixef_values = list()
    for(q in seq_along(slope_flag)){
      sf = slope_flag[q]
      if(sf == 0){
        fixef_values[[i]] = fixef_coef[seq(start, length.out = fixef_sizes[q])]
        i = i + 1
        start = start + fixef_sizes[q]
      } else {
        nb = abs(sf) + (sf > 0)

        adj = 0
        if(sf > 0){
          # The fixed-effects is in the last position
          j_fe = nb - 1
          fixef_values[[i]] = fixef_coef[seq(start + j_fe, by = nb, length.out = fixef_sizes[q])]
          adj = 1
        }

        for(j in 0:(nb - 1 - adj)){
          fixef_values[[i + j + adj]] = fixef_coef[seq(start + j, by = nb, 
                                                       length.out = fixef_sizes[q])]
        }
        i = i + nb
        start = start + fixef_sizes[q] * nb
      }

    }

    #
    # Now the referenes
    #

    nb_ref = integer(length(fixef_terms))

    # FE references
    who_fe = slope_flag >= 0
    Q_fe = sum(who_fe)
    if(Q_fe >= 2){

      my_dum = fe_id_list[who_fe]

      dumMat = matrix(unlist(my_dum, use.names = FALSE), N, Q_fe) - 1
      orderCluster = matrix(unlist(lapply(my_dum, order), use.names = FALSE), N, Q_fe) - 1

      nbCluster = sapply(my_dum, max)

      fixef_values_tmp = cpp_get_fe_gnl(Q_fe, N, rep(1, N), dumMat, nbCluster, orderCluster)

      # the information on the references
      nb_ref_fe = fixef_values_tmp[[Q_fe+1]]
    } else {
      nb_ref_fe = integer(Q_fe)
    }

    # Slope references (if associated FE + constant)

    names(slope_flag) = fixef_vars

    Q_slope = sum(abs(slope_flag))
    nb_ref_slope = integer(Q_slope)
    i_noVS = 1
    for(i in seq_along(fixef_terms)){

      ft = fixef_terms[i]

      if(!grepl("[[", ft, fixed = TRUE)){
        # No slope => already computed
        nb_ref[i] = nb_ref_fe[i_noVS]
        i_noVS = i_noVS + 1

      } else {
        # Slope
        fe_name = gsub("\\[.+", "", ft)
        my_dum = fe_id_list[[fe_name]]

        my_order = order(my_dum)
        var_sorted = slope_variables[[gsub(".+\\[|\\]+", "", ft)]][my_order]

        # if no associated FE => we check only 0 values
        if(slope_flag[fe_name] < 0){
          nb_ref[i] = cpp_constant_dum(fixef_sizes[fe_name], var_sorted, my_dum[my_order], 
                                       only_0 = TRUE)
        } else {
          nb_ref[i] = cpp_constant_dum(fixef_sizes[fe_name], var_sorted, my_dum[my_order])
        }
      }
    }

    # we recreate that to avoid conditioning on isSlope later
    fixef_id = fixef_id[fe_all]
    fixef_names = fixef_terms

  } else if(Q == 1){
    # This is the simplest case
    id = id_dummies_vect[[1]]

    myOrder = order(id)
    myDiff = c(1, diff(id[myOrder]))

    select = myOrder[myDiff == 1]

    fixef_values = list(S[select])

    # There are no references => no need to set nb_ref
  } else {
    # We apply a Rcpp script to handle complicated cases (and we don't know beforehand if the input is one)

    dumMat = matrix(unlist(id_dummies_vect), N, Q) - 1
    orderCluster = matrix(unlist(lapply(id_dummies_vect, order)), N, Q) - 1

    nbCluster = sapply(fixef_id, max)

    fixef_values = cpp_get_fe_gnl(Q, N, S, dumMat, nbCluster, orderCluster)

    # The algorithm is fast but may fail on some instance. We need to check
    if(any(fixef_values[[Q + 1]] > 1) && Q >= 3){
      # we re-compute the "sumFE"
      sum_FE = fixef_values[[1]][1 + dumMat[, 1]]
      for(i in 2:Q){
        sum_FE = sum_FE + fixef_values[[i]][1 + dumMat[, i]]
      }

      if(max(abs(sum_FE - S)) > 1e-1){
        # divergence => we need to correct
        # we recompute the FEs

        is_ref_approx = TRUE

        fixef_sizes = as.integer(object$fixef_sizes)
        new_order = order(object$fixef_sizes, decreasing = TRUE)
        fixef_sizes = fixef_sizes[new_order]

        fe_id_list = object$fixef_id[new_order]
        table_id_I = as.integer(unlist(lapply(fe_id_list, tabulate), use.names = FALSE))

        slope_flag = rep(0L, Q)
        slope_variables = list()

        S_demean = cpp_demean(y = S, X_raw = 0, r_weights = 0, iterMax = as.integer(fixef.iter),
                              diffMax = fixef.tol, r_nb_id_Q = fixef_sizes,
                              fe_id_list = fe_id_list, table_id_I = table_id_I,
                              slope_flag_Q = slope_flag, slope_vars_list = slope_variables,
                              r_init = 0, nthreads = nthreads, 
                              algo_extraProj = fixef_algo$extraProj, 
                              algo_iter_warmup = fixef_algo$iter_warmup, 
                              algo_iter_projAfterAcc = fixef_algo$iter_projAfterAcc, 
                              algo_iter_grandAcc = fixef_algo$iter_grandAcc, 
                              save_fixef = TRUE)

        fixef_coef = S_demean$fixef_coef

        end = cumsum(fixef_sizes)
        start = c(1, end + 1)
        for(i in 1:Q){
          fixef_values[[new_order[i]]] = fixef_coef[start[i]:end[i]]
        }
      }
    }

    # the information on the references
    nb_ref = fixef_values[[Q + 1]]
    fixef_values[[Q + 1]] = NULL
  }

  # now saving & adding information
  all_clust = list()
  Q_all = ifelse(isSlope, length(fixef_terms), Q)
  for(i in 1:Q_all){
    # We put it in the right order, if requested
    fn = attr(fixef_id[[i]], "fixef_names")

    if(sorted){
      if(all(!grepl("[^[:digit:]]", fn))) fn = as.numeric(fn)
      my_order = order(fn)

      cv = fixef_values[[i]][my_order]
      names(cv) = fn[my_order]
      all_clust[[fixef_names[i]]] = cv
    } else {
      cv = fixef_values[[i]]
      names(cv) = fn
      all_clust[[fixef_names[i]]] = cv
    }

  }

  class(all_clust) = c("fixest.fixef", "list")

  # Dealing with the references
  if(Q_all > 1){
    names(nb_ref) = fixef_names
    attr(all_clust, "references") = nb_ref

    if(!isSlope) slope_flag = rep(FALSE, Q)

    # warning if unbalanced
    if(notes && sum(nb_ref[!slope_flag]) > Q-1){
      if(is_ref_approx){
        message("NOTE: The fixed-effects are not regular, they cannot be straightforwardly interpreted. The number of references is only approximate.")
      } else {
        message("NOTE: The fixed-effects are not regular, they cannot be straightforwardly interpreted.")
      }

    }
  }

  # Family information
  attr(all_clust, "exponential") = FALSE
  if(object$method_type == "feNmlm" && object$family %in% c("poisson", "negbin")){
    attr(all_clust, "exponential") = TRUE
  } else if(object$method_type == "feglm" && object$family$link == "log"){
    attr(all_clust, "exponential") = TRUE
  }

  return(all_clust)
}

#' Functions exported from \pkg{nlme} to implement \pkg{fixest} methods
#'
#' The package \pkg{fixest} uses the `fixef` method from \pkg{nlme}. Unfortunately, 
#' re-exporting this method is required in order not to attach package \pkg{nlme}.
#'
#' * Here is the help from package \pkg{nlme}: [`fixef`][nlme::fixed.effects]. The 
#' help from package \pkg{fixest} is here: [`fixef.fixest`].
#'
#' @note
#' I could find this workaround thanks to the package \pkg{plm}.
#'
#' @name fixef_reexported
#' @keywords internal
NULL

#' @rdname fixef_reexported
#' @name fixef
NULL




#' Displaying the most notable fixed-effects
#'
#' This function plots the 5 fixed-effects with the highest and lowest values, for 
#' each of the fixed-effect dimension. It takes as an argument the fixed-effects obtained 
#' from the function [`fixef.fixest`] after an estimation using [`femlm`], [`feols`] or [`feglm`].
#'
#' @method plot fixest.fixef
#'
#' @param x An object obtained from the function [`fixef.fixest`].
#' @param n The number of fixed-effects to be drawn. Defaults to 5.
#' @param ... Not currently used.
#'
#' Note that the fixed-effect coefficients might NOT be interpretable. This function is 
#' useful only for fully regular panels.
#'
#' If the data are not regular in the fixed-effect coefficients, this means that several 
#' \sQuote{reference points} are set to obtain the fixed-effects, thereby 
#' impeding their interpretation. In this case a warning is raised.
#'
#' @seealso
#' [`fixef.fixest`] to extract clouster coefficients. See also the main 
#' estimation function [`femlm`], [`feols`] or [`feglm`]. Use [`summary.fixest`] to see 
#' the results with the appropriate standard-errors, the function [`etable`] to 
#' visualize the results of multiple estimations.
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' data(trade)
#'
#' # We estimate the effect of distance on trade
#' # => we account for 3 fixed-effects
#' est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination+Product, trade)
#'
#' # obtaining the fixed-effects coefficients
#' fe_trade = fixef(est_pois)
#'
#' # plotting them
#' plot(fe_trade)
#'
#'
plot.fixest.fixef = function(x, n = 5, ...){

  # Checking the arguments
  if(is_user_level_call()){
    validate_dots(suggest_args = "n")
  }

  Q = length(x)

  mfrow = as.character(c(11, 12, 22, 22, 32, 32, 33, 33))

  fixef_names = names(x)
  slope_flag = grepl("\\[", fixef_names)

  if(Q > 1 && sum(attr(x, "references")[!slope_flag]) > sum(!slope_flag)-1){
    warning("The fixed-effects are not regular, they cannot be straightforwardly interpreted.", call. = FALSE)
  }

  # modification par:
  opar = par(no.readonly =TRUE)
  on.exit(par(opar))

  par(mfrow = as.numeric(strsplit(mfrow[Q], "")[[1]]), mar = c(3, 3, 2.5, 3))

  addExp = attr(x, "exponential")
  for(i in 1:Q){
    plot_single_cluster(x[[i]], n = n, addExp = addExp, fe_name = fixef_names[i])
  }

}



####
#### fixest own methods ####
####



#' Extracts the coefficients table from an estimation
#'
#' Methods to extracts the coefficients table and its sub-components from an estimation.
#'
#' @param object An estimation (fitted model object), e.g. a `fixest` object.
#' @param ... Other arguments to the methods.
#'
#' @return
#' Returns a matrix (`coeftable`) or vectors.
#'
#' @seealso
#' Please look at the [`coeftable.fixest`] page for more detailed information.
#'
#' @examples
#'
#' est = lm(mpg ~ cyl, mtcars)
#' coeftable(est)
#'
coeftable = function(object, ...){
  UseMethod("coeftable")
}

#' @rdname coeftable
se = function(object, ...){
  UseMethod("se")
}

#' @rdname coeftable
pvalue = function(object, ...){
  UseMethod("pvalue")
}

#' @rdname coeftable
tstat = function(object, ...){
  UseMethod("tstat")
}


#' Extracts the coefficients table from an estimation
#'
#' Default method to extracts the coefficients table and its sub-components from an estimation.
#'
#' @inheritParams etable
#'
#' @method coeftable default
#'
#' @param object The result of an estimation (a fitted model object). Note that this function 
#' is made to work with `fixest` objects so it may not work for the specific model you provide.
#' @param ... Other arguments that will be passed to `summary`.
#'
#' First the method summary is applied if needed, then the coefficients table is extracted from 
#' its output.
#'
#' The default method is very naive and hopes that the resulting coefficients table 
#' contained in the summary of the fitted model is well formed: this assumption is very 
#' often wrong. Anyway, there is no development intended since the coeftable/se/pvalue/tstat 
#' series of methods is only intended to work well with `fixest` objects. To extract 
#' the coefficients table from fitted models in a general way, it's better to 
#' use [tidy from broom](https://broom.tidymodels.org/).
#'
#' @return
#' Returns a matrix (`coeftable`) or vectors.
#'
#' @examples
#'
#' # NOTA: This function is really made to handle fixest objects
#' # The default methods works for simple structures, but you'd be
#' # likely better off with broom::tidy for other models
#'
#' est = lm(mpg ~ cyl, mtcars)
#' coeftable(est)
#'
#' se(est)
#'
#'
#'
#'
#'
coeftable.default = function(object, keep, drop, order, ...){
  # This function is EXTREMELY naive and I don't intend to improve it
  # => there is tidy for that which is much better
  # I just created that method to handle fixest/fixest_multi more easily

  check_arg(keep, drop, order, "NULL character vector no na")

  if(!any(grepl("summary", class(object)))){
    object_sum = try(summary(object, ...))
    if(!inherits(object_sum, "try-error")){
      object = object_sum
    }
  }

  list_mat = object[sapply(object, is.matrix)]

  ok = FALSE
  for(i in seq_along(list_mat)){
    mat = list_mat[[i]]
    if(!is.null(colnames(mat)) && any(grepl("(?i)(coef|estimate|value|Pr\\()", colnames(mat)))){
      ok = TRUE
      res = mat
    }
  }

  if(ok == FALSE){
    stop("Sorry, the coeffficients table could not be extracted.")
  }

  if(!missnull(keep) || !missnull(drop) || !missnull(order)){
    r_names = rownames(res)
    r_names = keep_apply(r_names, keep)
    r_names = drop_apply(r_names, drop)
    r_names = order_apply(r_names, order)

    if(length(r_names) == 0){
      return(NULL)
    }

    res = res[r_names, , drop = FALSE]
  }

  res
}


#' @describeIn coeftable.default Extracts the standard-errors from an estimation
se.default = function(object, keep, drop, order, ...){
  # There is NO GARANTEE that it works

  check_arg(keep, drop, order, "NULL character vector no na")

  mat = coeftable(object, keep = keep, drop = drop, order = order, ...)


  if(is.null(mat)){
    return(NULL)
  }

  res = mat[, 2]
  names(res) = row.names(mat)

  res
}

#' @describeIn coeftable.default Extracts the standard-errors from an estimation
tstat.default = function(object, keep, drop, order, ...){
  # There is NO GARANTEE that it works

  check_arg(keep, drop, order, "NULL character vector no na")

  mat = coeftable(object, keep = keep, drop = drop, order = order, ...)

  if(is.null(mat)){
    return(NULL)
  }

  if(ncol(mat) != 4){
    stop("The p-values could not be obtained. Use broom::tidy?")
  }

  res = mat[, 3]
  names(res) = row.names(mat)

  res
}

#' @describeIn coeftable.default Extracts the p-values from an estimation
pvalue.default = function(object, keep, drop, order, ...){
  # There is NO GARANTEE that it works

  check_arg(keep, drop, order, "NULL character vector no na")

  mat = coeftable(object, keep = keep, drop = drop, order = order, ...)

  if(is.null(mat)){
    return(NULL)
  }

  if(ncol(mat) != 4){
    stop("The p-values could not be obtained. Use broom::tidy?")
  }

  res = mat[, 4]
  names(res) = row.names(mat)

  res
}

#' @describeIn coeftable.default Extracts the standard-errors from a VCOV matrix
se.matrix = function(object, keep, drop, order, ...){
  # There is NO GARANTEE that it works

  check_arg(object, "square numeric matrix")
  check_arg(keep, drop, order, "NULL character vector no na")

  vcov_diag = diag(object)
  vcov_diag[vcov_diag < 0] = NA

  all_names = rownames(object)
  se = sqrt(vcov_diag)
  names(se) = all_names

  if(!missnull(keep) || !missnull(drop) || !missnull(order)){

    if(is.null(all_names)){
      stop("The VCOV matrix has no names attributes: the arguments keep, drop or order cannot be used.")
    }

    all_names = keep_apply(all_names, keep)
    all_names = drop_apply(all_names, drop)
    all_names = order_apply(all_names, order)

    if(length(all_names) == 0){
      return(NULL)
    }

    se = se[all_names]
  }

  se
}



#' Obtain various statistics from an estimation
#'
#' Set of functions to directly extract some commonly used statistics, like the p-value or 
#' the table of coefficients, from estimations. This was first implemented for 
#' `fixest` estimations, but has some support for other models.
#'
#' @inheritParams etable
#'
#' @method coeftable fixest
#'
#' @param object A `fixest` object. For example an estimation obtained from [`feols`].
#' @param cluster Tells how to cluster the standard-errors (if clustering is requested). Can 
#' be either a list of vectors, a character vector of variable names, a formula or an 
#' integer vector. Assume we want to perform 2-way clustering over `var1` and `var2` contained 
#' in the data.frame `base` used for the estimation. All the following `cluster` arguments 
#' are valid and do the same thing: `cluster = base[, c("var1, "var2")]`, 
#' `cluster = c("var1, "var2")`, `cluster = ~var1+var2`. If the two variables were used as 
#' clusters in the estimation, you could further use `cluster = 1:2` or leave it blank 
#' with `se = "twoway"` (assuming `var1` \[resp. `var2`\] was the 1st \[resp. 2nd\] cluster).
#' @param list Logical, default is `FALSE`. If `TRUE`, then a nested list is returned, the 
#' first layer is accessed with the coefficients names; the second layer with the 
#' following values: `coef`, `se`, `tstat`, `pvalue`. Note that the variable `"(Intercept)"` 
#' is renamed into `"constant"`.
#' @param ... Other arguments to be passed to [`summary.fixest`].
#'
#' @details
#' This set of tiny functions is primarily constructed for `fixest` estimations.
#'
#' @return
#' Returns a table of coefficients, with in rows the variables and four columns: the estimate, 
#' the standard-error, the t-statistic and the p-value.
#'
#' If `list = TRUE` then a nested list is returned, the first layer is accessed with 
#' the coefficients names; the second layer with the following values: 
#' `coef`, `se`, `tstat`, `pvalue`. For example, with `res = coeftable(est, list = TRUE)` 
#' you can access the SE of the coefficient `x1` with `res$x1$se`; and its 
#' coefficient with `res$x1$coef`, etc.
#'
#' @examples
#'
#' # Some data and estimation
#' data(trade)
#' est = fepois(Euros ~ log(dist_km) | Origin^Product + Year, trade)
#'
#' #
#' # Coeftable/se/tstat/pvalue
#' #
#'
#' # Default is clustering along Origin^Product
#' coeftable(est)
#' se(est)
#' tstat(est)
#' pvalue(est)
#'
#' # Now with two-way clustered standard-errors
#' #  and using coeftable()
#'
#' coeftable(est, cluster = ~Origin + Product)
#' se(est, cluster = ~Origin + Product)
#' pvalue(est, cluster = ~Origin + Product)
#' tstat(est, cluster = ~Origin + Product)
#'
#' # Or you can cluster only once:
#' est_sum = summary(est, cluster = ~Origin + Product)
#' coeftable(est_sum)
#' se(est_sum)
#' tstat(est_sum)
#' pvalue(est_sum)
#'
#' # You can use the arguments keep, drop, order
#' # to rearrange the results
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' est_iv = feols(y ~ x1 | x2 ~ x3, base)
#'
#' tstat(est_iv, keep = "x1")
#' coeftable(est_iv, keep = "x1|Int")
#'
#' coeftable(est_iv, order = "!Int")
#'
#' #
#' # Using lists
#' #
#'
#' # Returning the coefficients table as a list can be useful for quick
#' # reference in markdown documents.
#' # Note that the "(Intercept)" is renamed into "constant"
#'
#' res = coeftable(est_iv, list = TRUE)
#'
#' # coefficient of the constant:
#' res$constant$coef
#'
#' # pvalue of x1
#' res$x1$pvalue
#'
#'
#'
coeftable.fixest = function(object, vcov = NULL, ssc = NULL, cluster = NULL,
                            keep = NULL, drop = NULL, order = NULL, list = FALSE, ...){
  # We don't explicitly refer to the other arguments

  check_arg(keep, drop, order, "NULL character vector no na")
  check_arg(list, "logical scalar")


  if(!isTRUE(object$summary) || !(all_missing(vcov, ssc, cluster) && ...length() > 0)){
    object = summary(object, vcov = vcov, ssc = ssc, cluster = cluster, ...)
  }

  # Let's find out the coefficients table
  res = object$coeftable

  if(!missnull(keep) || !missnull(drop) || !missnull(order)){
    r_names = rownames(res)
    r_names = keep_apply(r_names, keep)
    r_names = drop_apply(r_names, drop)
    r_names = order_apply(r_names, order)

    if(length(r_names) == 0){
      return(NULL)
    }

    res = res[r_names, , drop = FALSE]
  }

  if(list){
    res_list = list()
    coef_names = rownames(res)
    coef_names[coef_names == "(Intercept)"] = "constant"
    for(i in 1:nrow(res)){
      row = res[i, ]
      item = list(coef = row[1], se = row[2], tstat = row[3], pvalue = row[4])
      res_list[[coef_names[i]]] = item
    }
    res = res_list
  }

  res
}

#' @describeIn coeftable.fixest Extracts the standard-error of an estimation
se.fixest = function(object, vcov = NULL, ssc = NULL, cluster = NULL,
                     keep = NULL, drop = NULL, order = NULL, ...){

  check_arg(keep, drop, order, "NULL character vector no na")

  mat = coeftable(object, vcov = vcov, ssc = ssc, cluster = cluster,
                  keep = keep, drop = drop, order = order, ...)

  if(is.null(mat)){
    return(NULL)
  }

  res = mat[, 2]
  names(res) = row.names(mat)

  res
}

#' @describeIn coeftable.fixest Extracts the t-statistics of an estimation
tstat.fixest = function(object, vcov = NULL, ssc = NULL, cluster = NULL,
                        keep = NULL, drop = NULL, order = NULL, ...){

  check_arg(keep, drop, order, "NULL character vector no na")

  mat = coeftable(object, vcov = vcov, ssc = ssc, cluster = cluster,
                  keep = keep, drop = drop, order = order, ...)

  if(is.null(mat)){
    return(NULL)
  }

  res = mat[, 3]
  names(res) = row.names(mat)

  res
}

#' @describeIn coeftable.fixest Extracts the p-value of an estimation
pvalue.fixest = function(object, vcov = NULL, ssc = NULL, cluster = NULL,
                         keep = NULL, drop = NULL, order = NULL, ...){

  check_arg(keep, drop, order, "NULL character vector no na")

  mat = coeftable(object, vcov = vcov, ssc = ssc, cluster = cluster,
                  keep = keep, drop = drop, order = order, ...)

  if(is.null(mat)){
    return(NULL)
  }

  res = mat[, 4]
  names(res) = row.names(mat)

  res
}


####
#### stats ####
####



#' Extracts the number of observations form a `fixest` object
#'
#' This function simply extracts the number of observations form a `fixest` object, 
#' obtained using the functions [`femlm`], [`feols`] or [`feglm`].
#'
#' @inheritParams summary.fixest
#'
#' @param ... Not currently used.
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. 
#' Use [`summary.fixest`] to see the results with the appropriate standard-errors, 
#' [`fixef.fixest`] to extract the fixed-effects coefficients, and the function [`etable`] 
#' to visualize the results of multiple estimations.
#'
#' @author
#' Laurent Berge
#'
#' @return
#' It returns an interger.
#'
#' @examples
#'
#' # simple estimation on iris data with "Species" fixed-effects
#' res = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'             Petal.Width | Species, iris)
#'
#' nobs(res)
#' logLik(res)
#'
#'
nobs.fixest = function(object, ...){
  object$nobs
}

#' Aikake's an information criterion
#'
#' This function computes the AIC (Aikake's, an information criterion) from a `fixest` estimation.
#'
#' @inheritParams nobs.fixest
#'
#' @param ... Optionally, more fitted objects.
#' @param k A numeric, the penalty per parameter to be used; the default k = 2 is the 
#' classical AIC (i.e. `AIC=-2*LL+k*nparams`).
#'
#' @details
#' The AIC is computed as:
#' \deqn{AIC = -2\times LogLikelihood + k\times nbParams}
#' with k the penalty parameter.
#'
#' You can have more information on this criterion on [`AIC`][stats::AIC].
#'
#' @return
#' It return a numeric vector, with length the same as the number of objects taken as arguments.
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. 
#' Other statictics methods: [`BIC.fixest`], [`logLik.fixest`], [`nobs.fixest`].
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' # two fitted models with different expl. variables:
#' res1 = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'              Petal.Width | Species, iris)
#' res2 = femlm(Sepal.Length ~ Petal.Width | Species, iris)
#'
#' AIC(res1, res2)
#' BIC(res1, res2)
#'
#'
AIC.fixest = function(object, ..., k = 2){

  dots = list(...)
  if(length(dots) > 0){
    # we check consistency with observations
    nobs_all = c(nobs(object), sapply(dots, nobs))

    if(any(diff(nobs_all) != 0)){
      warning("Models are not all fitted to the same number of observations.")
    }

    otherAIC = sapply(dots, AIC)
  } else {
    otherAIC = c()
  }

  all_AIC = c(-2*logLik(object) + k*object$nparams, otherAIC)

  all_AIC
}

#' Bayesian information criterion
#'
#' This function computes the BIC (Bayesian information criterion) from a `fixest` estimation.
#'
#'
#' @inheritParams nobs.fixest
#'
#' @param ... Optionally, more fitted objects.
#'
#' @details
#' The BIC is computed as follows:
#' \deqn{BIC = -2\times LogLikelihood + \log(nobs)\times nbParams}
#' with k the penalty parameter.
#'
#' You can have more information on this criterion on [`AIC`][stats::AIC].
#'
#' @return
#' It return a numeric vector, with length the same as the number of objects taken as arguments.
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. Other statistics functions: [`AIC.fixest`], [`logLik.fixest`].
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' # two fitted models with different expl. variables:
#' res1 = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'             Petal.Width | Species, iris)
#' res2 = femlm(Sepal.Length ~ Petal.Width | Species, iris)
#'
#' AIC(res1, res2)
#' BIC(res1, res2)
#'
BIC.fixest = function(object, ...){

  dots = list(...)
  if(length(dots) > 0){
    # we check consistency with observations
    nobs_all = c(nobs(object), sapply(dots, nobs))

    if(any(diff(nobs_all) != 0)){
      warning("Models are not all fitted to the same number of observations.")
    }

    otherBIC = sapply(dots, BIC)
  } else {
    otherBIC = c()
  }

  all_BIC = c(-2*logLik(object) + object$nparams*log(nobs(object)), otherBIC)

  all_BIC
}

#' Extracts the log-likelihood
#'
#' This function extracts the log-likelihood from a `fixest` estimation.
#'
#' @inheritParams nobs.fixest
#'
#' @param ... Not currently used.
#'
#' @details
#' This function extracts the log-likelihood based on the model fit. You can have more 
#' information on the likelihoods in the details of the function [`femlm`].
#'
#' @return
#' It returns a numeric scalar.
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. Other 
#' statistics functions: [`AIC.fixest`], [`BIC.fixest`].
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' # simple estimation on iris data with "Species" fixed-effects
#' res = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'             Petal.Width | Species, iris)
#'
#' nobs(res)
#' logLik(res)
#'
#'
logLik.fixest = function(object, ...){

  if(object$method_type == "feols"){
    # if the summary is 'lean', then no way we can compute that
    resid = object$residuals
    if(is.null(resid)) resid = NA

    sigma = sqrt(mean(resid^2))
    n = length(resid)
    ll = -1/2/sigma^2 * sum(resid^2) - n * log(sigma) - n * log(2*pi)/2
  } else {
    ll = object$loglik
  }

  ll
}

#' Extracts the coefficients from a `fixest` estimation
#'
#' This function extracts the coefficients obtained from a model estimated with 
#' [`femlm`], [`feols`] or [`feglm`].
#'
#' @inheritParams nobs.fixest
#' @inheritParams etable
#'
#' @param agg Logical scalar, default is `TRUE`. If the coefficients of the estimation have been aggregated, whether to report the aggregated coefficients. If `FALSE`, the raw coefficients will be returned.
#' @param collin Logical, default is `FALSE`. Whether the coefficients removed because of collinearity should be also returned as `NA`. It cannot be used when coefficients aggregation is also used.
#' @param ... Not currently used.
#'
#' @details
#' The coefficients are the ones that have been found to maximize the log-likelihood of the specified model. More information can be found on the models from the estimations help pages: [`femlm`], [`feols`] or [`feglm`].
#'
#' Note that if the model has been estimated with fixed-effects, to obtain the fixed-effect coefficients, you need to use the function [`fixef.fixest`].
#'
#' @return
#' This function returns a named numeric vector.
#'
#' @author
#' Laurent Berge
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. [`summary.fixest`], [`confint.fixest`], [`vcov.fixest`], [`etable`], [`fixef.fixest`].
#'
#' @examples
#'
#' # simple estimation on iris data, using "Species" fixed-effects
#' res = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'             Petal.Width | Species, iris)
#'
#' # the coefficients of the variables:
#' coef(res)
#'
#' # the fixed-effects coefficients:
#' fixef(res)
#'
#'
coef.fixest = coefficients.fixest = function(object, keep, drop, order,
                       collin = FALSE, agg = TRUE, ...){

  check_arg(keep, drop, order, "NULL character vector no na")
  check_arg(collin, agg, "logical scalar")

  if(isTRUE(object$is_agg) && agg){
    if(collin){
      warning("The argument 'collin = TRUE' cannot be used when there is coefficient aggregation.")
    }

    res = object$coeftable[, 1]
    names(res) = rownames(object$coeftable)

  } else if(collin && !is.null(object$collin.coef)){
    res = object$collin.coef
  } else {
    res = object$coefficients
  }

  if(!missnull(keep) || !missnull(drop) || !missnull(order)){
    cnames = names(res)
    cnames = keep_apply(cnames, keep)
    cnames = drop_apply(cnames, drop)
    cnames = order_apply(cnames, order)

    if(length(cnames) == 0){
      return(numeric(0))
    }

    res = res[cnames]
  }

  if(identical(object$family, "negbin")){
    res = res[-length(res)]
  }


  # deltaMethod tweak
  if(is_calling_fun("deltaMethod")){
    sysOrigin = sys.parent()
    mc_DM = match.call(definition = sys.function(sysOrigin), call = sys.call(sysOrigin))

    if("parameterNames" %in% names(mc_DM)){
      PN = eval(mc_DM$parameterNames, parent.frame())

      check_value(PN, "character vector no na len(data)", .data = res,
                  .arg_name = "parameterNames", .up = 1)

      names(res) = PN
    }
  }

  res
}

#' @rdname coef.fixest
coefficients.fixest = coef.fixest


#' Extracts fitted values from a `fixest` fit
#'
#' This function extracts the fitted values from a model estimated with [`femlm`], 
#' [`feols`] or [`feglm`]. The fitted values that are returned are the *expected predictor*.
#'
#' @inheritParams nobs.fixest
#'
#' @param type Character either equal to `"response"` (default) or `"link"`. 
#' If `type="response"`, then the output is at the level of the response variable, i.e. 
#' it is the expected predictor \eqn{E(Y|X)}. If `"link"`, then the output is at 
#' the level of the explanatory variables, i.e. the linear predictor \eqn{X\cdot \beta}.
#' @param na.rm Logical, default is `TRUE`. If `FALSE` the number of observation returned 
#' will be the number of observations in the original data set, otherwise it will be the 
#' number of observations used in the estimation.
#' @param ... Not currently used.
#'
#' @details
#' This function returns the *expected predictor* of a `fixest` fit. The likelihood functions 
#' are detailed in [`femlm`] help page.
#'
#' @return
#' It returns a numeric vector of length the number of observations used to estimate the model.
#'
#' If `type = "response"`, the value returned is the expected predictor, i.e. the 
#' expected value of the dependent variable for the fitted model: \eqn{E(Y|X)}.
#' If `type = "link"`, the value returned is the linear predictor of the fitted model, 
#' that is \eqn{X\cdot \beta} (remind that \eqn{E(Y|X) = f(X\cdot \beta)}).
#'
#' @author
#' Laurent Berge
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. 
#' [`resid.fixest`], [`predict.fixest`], [`summary.fixest`], [`vcov.fixest`], [`fixef.fixest`].
#'
#' @examples
#'
#' # simple estimation on iris data, using "Species" fixed-effects
#' res_poisson = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'                     Petal.Width | Species, iris)
#'
#' # we extract the fitted values
#' y_fitted_poisson = fitted(res_poisson)
#'
#' # Same estimation but in OLS (Gaussian family)
#' res_gaussian = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'                     Petal.Width | Species, iris, family = "gaussian")
#'
#' y_fitted_gaussian = fitted(res_gaussian)
#'
#' # comparison of the fit for the two families
#' plot(iris$Sepal.Length, y_fitted_poisson)
#' points(iris$Sepal.Length, y_fitted_gaussian, col = 2, pch = 2)
#'
#'
fitted.fixest = fitted.values.fixest = function(object, type = c("response", "link"), 
                                                na.rm = TRUE, ...){

  # Checking the arguments
  if(is_user_level_call()){
    validate_dots(suggest_args = "type")
  }

  type = match.arg(type)

  fit = predict(object)

  if(type == "response" || object$method_type == "feols"){
    res = fit
  } else if(!is.null(object$mu)){
    res = object$mu
  } else if(object$method_type == "feNmlm"){
    family = object$family
    famFuns = switch(family,
                     poisson = ml_poisson(),
                     negbin = ml_negbin(),
                     logit = ml_logit(),
                     gaussian = ml_gaussian())

    res = famFuns$linearFromExpected(fit)
  } else {
    res = object$family$linkfun(fit)
  }

  # Nota: obs can be removed: either because of NA, either because perfect fit
  # Shall I put perfect fit as NA since they're out of the estimation???
  # Still pondering...
  # Actually adding them means a lot of work to ensure consistency (also in predict...)
  if(!na.rm) res = fill_with_na(res, object)

  res
}

#' @rdname fitted.fixest
#' @method fitted.values fixest
fitted.values.fixest = fitted.fixest

#' Extracts residuals from a `fixest` object
#'
#' This function extracts residuals from a fitted model estimated with [`femlm`], 
#' [`feols`] or [`feglm`].
#'
#' @inheritParams nobs.fixest
#'
#' @param type A character scalar, either `"response"` (default), `"deviance"`, 
#' `"pearson"`, or `"working"`. Note that the `"working"` corresponds to the residuals 
#' from the weighted least square and only applies to [`feglm`] models.
#' @param na.rm Logical, default is `TRUE`. Whether to remove the observations with NAs 
#' from the original data set. If `FALSE`, then the vector returned is always of the same 
#' length as the original data set.
#' @param ... Not currently used.
#'
#'
#' @return
#' It returns a numeric vector of the length the number of observations used for the estimation 
#' (if `na.rm = TRUE`) or of the length of the original data set (if `na.rm = FALSE`).
#'
#' @author
#' Laurent Berge
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. [`fitted.fixest`], [`predict.fixest`], [`summary.fixest`], [`vcov.fixest`], [`fixef.fixest`].
#'
#' @examples
#'
#' # simple estimation on iris data, using "Species" fixed-effects
#' res_poisson = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'                     Petal.Width | Species, iris)
#'
#' # we plot the residuals
#' plot(resid(res_poisson))
#'
resid.fixest = residuals.fixest = function(object, type = c("response", "deviance", "pearson", "working"),
                       na.rm = TRUE, ...){

  check_set_arg(type, "match")
  check_set_arg(na.rm, "logical scalar")

  method = object$method
  family = object$family

  r = object$residuals
  w = object[["weights"]]

  if(isTRUE(object$lean)){
    stop("The method 'resid.fixest' cannot be applied to a 'lean' fixest object. Please apply reestimate with 'lean = FALSE'.")
  }

  if(method %in% c("feols", "feols.fit") || (method %in% c("feNmlm", "femlm") && family == "gaussian")){

    if(type == "working"){
      stop("Type 'working' only applies to models fitted via feglm (thus is not valid for feols).")
    } 

    if(type %in% c("deviance", "pearson") && !is.null(w)){
      res = r * sqrt(w)
    } else {
      res = r
    }

  } else if(method %in% c("fepois", "feglm")){

    if(type == "response"){
      res = r

    } else if(type == "working"){
      res = object$working_residuals

    } else {
      mu = object$fitted.values
      if(is.null(w)) w = rep(1, length(r))

      if(type == "deviance"){
        y = r + mu

        res = sqrt(pmax((object$family$dev.resids)(y, mu, w), 0))
        qui = y < mu
        res[qui] = -res[qui]

      } else if(type == "pearson"){
        res = r * sqrt(w)/sqrt(object$family$variance(object$fitted.values))

      }
    }


  } else {

    if(type == "working"){
      stop("Type 'working' only applies to models fitted via feglm (thus is not valid for ", method, ").")
    }

    if(type == "response"){
      res = r

    } else {
      # deviance or pearson
      mu = object$fitted.values
      if(is.null(w)) w = rep(1, length(r))

      theta = ifelse(family == "negbin", object$theta, 1)

      if(type == "deviance"){

        # dev.resids function
        if(family == "poisson"){
          dev.resids = poisson()$dev.resids

        } else if(family == "logit"){
          dev.resids = binomial()$dev.resids

        } else if(family == "negbin"){
          dev.resids = function(y, mu, wt) 2 * wt * (y * log(pmax(1, y)/mu) - (y + theta) * log((y + theta)/(mu + theta)))

        }

        y = object$residuals + mu

        res = sqrt(pmax(dev.resids(y, mu, w), 0))
        qui = y < mu
        res[qui] = -res[qui]

      } else if(type == "pearson"){

        # variance function
        if(family == "poisson"){
          variance = poisson()$variance

        } else if(family == "logit"){
          variance = binomial()$variance

        } else if(family == "negbin"){
          variance = function(mu) mu + mu^2/theta

        }

        res = r * sqrt(w)/sqrt(variance(mu))

      }
    }


  }

  if(!na.rm){
    res = fill_with_na(res, object)
  }

  res
}

#' @rdname resid.fixest
residuals.fixest = resid.fixest

#' Predict method for `fixest` fits
#'
#' This function obtains prediction from a fitted model estimated with [`femlm`], 
#' [`feols`] or [`feglm`].
#'
#' @inheritParams nobs.fixest
#' @inheritParams fitted.fixest
#' @inheritParams summary.fixest
#'
#' @param newdata A data.frame containing the variables used to make the prediction. 
#' If not provided, the fitted expected (or linear if `type = "link"`) predictors are returned.
#' @param sample Either "estimation" (default) or "original". This argument is only used 
#' when arg. 'newdata' is missing, and is ignored otherwise. If equal to "estimation", 
#' the vector returned matches the sample used for the estimation. If equal to "original", 
#' it matches the original data set (the observations not used for the estimation being filled 
#' with NAs).
#' @param se.fit Logical, default is `FALSE`. If `TRUE`, the standard-error of the predicted 
#' value is computed and returned in a column named `se.fit`. This feature is only available 
#' for OLS models not containing fixed-effects.
#' @param interval Either "none" (default), "confidence" or "prediction". What type of 
#' confidence interval to compute. Note that this feature is only available for OLS models 
#' not containing fixed-effects (GLM/ML models are not covered).
#' @param level A numeric scalar in between 0.5 and 1, defaults to 0.95. Only used when 
#' the argument 'interval' is requested, it corresponds to the width of the confidence interval.
#' @param fixef Logical scalar, default is `FALSE`. If `TRUE`, a data.frame is returned, 
#' with each column representing the fixed-effects coefficients for each observation in 
#' `newdata` -- with as many columns as fixed-effects. Note that when there are variables 
#' with varying slopes, the slope coefficients are returned (i.e. they are not multiplied 
#' by the variable).
#' @param vs.coef Logical scalar, default is `FALSE`. Only used when `fixef = TRUE` and 
#' when variables with varying slopes are present. If `TRUE`, the coefficients of the 
#' variables with varying slopes are returned instead of the coefficient multiplied by the 
#' value of the variables (default).
#' @param ... Not currently used.
#'
#'
#' @return
#' It returns a numeric vector of length equal to the number of observations in argument `newdata`.
#' If `newdata` is missing, it returns a vector of the same length as the estimation sample, 
#' except if `sample = "original"`, in which case the length of the vector will match the one 
#' of the original data set (which can, but also cannot, be the estimation sample).
#' If `fixef = TRUE`, a `data.frame` is returned.
#' If `se.fit = TRUE` or `interval != "none"`, the object returned is a data.frame 
#' with the following columns: `fit`, `se.fit`, and, if CIs are requested, `ci_low` and `ci_high`.
#'
#'
#' @author
#' Laurent Berge
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. [`update.fixest`], [`summary.fixest`], [`vcov.fixest`], [`fixef.fixest`].
#'
#' @examples
#'
#' # Estimation on iris data
#' res = fepois(Sepal.Length ~ Petal.Length | Species, iris)
#'
#' # what would be the prediction if the data was all setosa?
#' newdata = data.frame(Petal.Length = iris$Petal.Length, Species = "setosa")
#' pred_setosa = predict(res, newdata = newdata)
#'
#' # Let's look at it graphically
#' plot(c(1, 7), c(3, 11), type = "n", xlab = "Petal.Length",
#'      ylab = "Sepal.Length")
#'
#' newdata = iris[order(iris$Petal.Length), ]
#' newdata$Species = "setosa"
#' lines(newdata$Petal.Length, predict(res, newdata))
#'
#' # versicolor
#' newdata$Species = "versicolor"
#' lines(newdata$Petal.Length, predict(res, newdata), col=2)
#'
#' # virginica
#' newdata$Species = "virginica"
#' lines(newdata$Petal.Length, predict(res, newdata), col=3)
#'
#' # The original data
#' points(iris$Petal.Length, iris$Sepal.Length, col = iris$Species, pch = 18)
#' legend("topleft", lty = 1, col = 1:3, legend = levels(iris$Species))
#'
#'
#' #
#' # Getting the fixed-effect coefficients for each obs.
#' #
#'
#' data(trade)
#' est_trade = fepois(Euros ~ log(dist_km) | Destination^Product +
#'                                            Origin^Product + Year, trade)
#' obs_fe = predict(est_trade, fixef = TRUE)
#' head(obs_fe)
#'
#' # can we check we get the right sum of fixed-effects
#' head(cbind(rowSums(obs_fe), est_trade$sumFE))
#'
#'
#' #
#' # Standard-error of the prediction
#' #
#'
#' base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
#'
#' est = feols(y ~ x1 + species, base)
#'
#' head(predict(est, se.fit = TRUE))
#'
#' # regular confidence interval
#' head(predict(est, interval = "conf"))
#'
#' # adding the residual to the CI
#' head(predict(est, interval = "predi"))
#'
#' # You can change the type of SE on the fly
#' head(predict(est, interval = "conf", vcov = ~species))
#'
#'
#'
predict.fixest = function(object, newdata, type = c("response", "link"), se.fit = FALSE,
                          interval = "none", level = 0.95, fixef = FALSE,
                          vs.coef = FALSE, sample = c("estimation", "original"),
                          vcov = NULL, ssc = NULL, ...){

  # Checking the arguments
  if(is_user_level_call()){
    validate_dots(suggest_args = c("newdata", "type"))
  }

  # Controls
  check_set_arg(type, sample, "match")
  check_arg(fixef, vs.coef, "logical scalar")
  check_arg(se.fit, "logical scalar")
  check_arg(level, "numeric scalar GT{.50} LT{1}")
  check_set_arg(interval, "match(none, confidence, prediction)")
  if(!se.fit && interval != "none"){
    se.fit = TRUE
  }

  if(se.fit && object$method_type != "feols"){
    stop("The standard-error of the prediction is currently only available for OLS models, sorry.")
  }

  # renaming to clarify
  fixef.return = fixef
  do_anyway = fixef.return || se.fit

  # if newdata is missing
  is_original_data = FALSE
  if(missing(newdata)){

    if(do_anyway || isTRUE(object$lean)){
      newdata = fetch_data(object, "In 'predict', ")
      is_original_data = TRUE
    } else {
      if(type == "response" || object$method_type == "feols"){
        res = object$fitted.values
      } else if(object$method_type == "feNmlm") {
        if("mu" %in% names(object)){
          res = object$mu
        } else {
          family = object$family
          famFuns = switch(family,
                   poisson = ml_poisson(),
                   negbin = ml_negbin(),
                   logit = ml_logit(),
                   gaussian = ml_gaussian())

          res = famFuns$linearFromExpected(object$fitted.values)
        }
      } else {
        res = object$family$linkfun(object$fitted.values)
      }

      if(sample == "original") res = fill_with_na(res, object)

      return(res)
    }

  }

  if(!is.matrix(newdata) && !"data.frame" %in% class(newdata)){
    stop("Argument 'newdata' must be a data.frame.")
  }

  # we ensure it really is a clean data.frame
  newdata = as.data.frame(newdata)

  mc = match.call()
  if(fixef.return){

    if(is.null(object$fixef_vars)){
      stop("The argument 'fixef=TRUE' cannot work since the estimation did not contain fixed-effects.")
    }

    if("type" %in% names(mc)){
      warning("Argument 'type' is ignored when fixef = TRUE.")
    }
  }

  # We deconstruct it in four steps:
  # 1) fixed-effects
  # 2) linear
  # 3) non-linear
  # 4) offset

  # + step 0: panel setup

  n = nrow(newdata)

  # NOTA 2019-11-26: I'm pondering whether to include NA-related messages
  # (would it be useful???)


  # STEP 0: panel setup

  fml = object$fml
  panel__meta__info = set_panel_meta_info(object, newdata)

  #
  # 1) Fixed-effects
  #

  # init fixed-effect values
  value_fixef = 0

  fixef_vars = object$fixef_vars
  if(!is.null(fixef_vars)){

    n_fe = length(fixef_vars)

    # Extraction of the FEs
    id_fixef = list()
    for(i in 1:n_fe){
      # checking if the variable is in the newdata
      fe_var = fixef_vars[i]
      variable = all.vars(str2lang(fe_var))
      isNotHere = !variable %in% names(newdata)
      if(any(isNotHere)){
        stop("The variable ", variable[isNotHere][1], " is absent from the 'newdata' but is needed for prediction (it is a fixed-effect variable).")
      }

      # The values taken by the FE variable
      fixef_values_possible = attr(object$fixef_id[[i]], "fixef_names")

      # Checking if ^ is present
      if(grepl("\\^", fe_var)){
        # If fastCombine was used => we're screwed, impossible to recover
        if(!all(grepl("_", fixef_values_possible, fixed = TRUE))){
          stop("You cannot use predict() based on the initial regression since the fixed-effect '", fe_var, "' was combined using an algorithm dropping the FE values (but fast). Please re-run the regression using the argument 'combine.quick=FALSE'.")
        }

        fe_var = fml_combine(fe_var, fastCombine = FALSE, vars = TRUE)
      }

      # Obtaining the vector of fixed-effect
      fixef_current = eval(str2lang(fe_var), newdata)

      fixef_current_num = unclass(factor(fixef_current, levels = fixef_values_possible))
      id_fixef[[i]] = fixef_current_num
    }

    names(id_fixef) = fixef_vars

    # Value of the fixef coefficients // we don't show the notes, it's inelegant
    fixef_coef = fixef(object, sorted = FALSE, notes = FALSE)

    # We create the DF to be returned
    if(fixef.return){
      fixef_df = list()
    }

    # Adding the FEs and Slopes
    if(!is.null(object$fixef_terms)){

      terms_full = extract_fe_slope(object$fixef_terms)
      fixef_vars = terms_full$fixef_vars
      slope_fe = terms_full$slope_fe
      slope_vars = terms_full$slope_vars
      slope_terms = terms_full$slope_terms

      # We extract the slope variables
      slope_vars_unik = unique(slope_vars)

      slope_var_list = list()
      for(i in 1:length(slope_vars_unik)){
        variable = all.vars(str2lang(slope_vars_unik[i]))
        isNotHere = !variable %in% names(newdata)
        if(any(isNotHere)){
          stop("The variable ", variable[isNotHere][1], " is absent from the 'newdata' but is needed for prediction (it is a variable with varying slope).")
        }

        slope_var_list[[slope_vars_unik[i]]] = eval(str2lang(slope_vars_unik[i]), newdata)
      }

      # Adding the FE values
      for(var in fixef_vars){
        fixef_current_num = id_fixef[[var]]
        fixef_coef_current = fixef_coef[[var]]

        if(fixef.return){
          fixef_df[[var]] = fixef_coef_current[fixef_current_num]

        } else {
          value_fixef = value_fixef + fixef_coef_current[fixef_current_num]
        }

      }

      # Adding the slopes
      for(i in seq_along(slope_vars)){

        fixef_current_num = id_fixef[[slope_fe[i]]]
        fixef_coef_current = fixef_coef[[slope_terms[i]]]

        if(fixef.return){
          vname = slope_terms[i]

          # We return only the coefs OR the coef * the variable
          if(vs.coef){
            fixef_df[[vname]] = fixef_coef_current[fixef_current_num]

          } else {
            fixef_df[[vname]] = fixef_coef_current[fixef_current_num] * slope_var_list[[slope_vars[i]]]
          }


        } else {
          value_fixef = value_fixef + fixef_coef_current[fixef_current_num] * slope_var_list[[slope_vars[i]]]
        }
      }


    } else {
      # Adding only FEs
      for(i in 1:n_fe){
        fixef_current_num = id_fixef[[i]]
        fixef_coef_current = fixef_coef[[i]]

        if(fixef.return){
          fixef_df[[fixef_vars[i]]] = fixef_coef_current[fixef_current_num]

        } else {
          value_fixef = value_fixef + fixef_coef_current[fixef_current_num]
        }

      }
    }

    if(fixef.return){

      # putting the results into a DF
      res = fixef_df
      attr(res, "row.names") = .set_row_names(length(res[[1L]]))
      oldClass(res) = "data.frame"

      if(is_original_data && sample == "estimation"){
        # here we want the same nber of obs
        # as in the estimation sample
        for(i in seq_along(object$obs_selection)){
          res = res[object$obs_selection[[i]], , drop = FALSE]
        }
      }

      return(res)
    }

    # dropping names
    value_fixef = as.vector(value_fixef)
  }

  #
  # 2) Linear values
  #

  coef = object$coefficients

  value_linear = 0
  var_keep = NULL
  rhs_fml = fml_split(fml, 1)
  linear.varnames = all_vars_with_i_prefix(rhs_fml[[3]])

  if(length(linear.varnames) > 0){
    # Checking all variables are there

    if(isTRUE(object$iv) && object$iv_stage == 2){
      names(coef) = gsub("^fit_", "", names(coef))
      linear.varnames = c(linear.varnames, all_vars_with_i_prefix(object$fml_all$iv[[2]]))
      iv_fml = object$fml_all$iv
      rhs_fml = .xpd(..lhs ~ ..endo + ..rhs, 
                     ..lhs = rhs_fml[[2]], 
                     ..endo = iv_fml[[2]], 
                     ..rhs = rhs_fml[[3]])
    }

    varNotHere = setdiff(linear.varnames, names(newdata))
    if(length(varNotHere) > 0){
      stopi("The variable{$s, enum.q ? varNotHere} used to estimate the model (in fml) {$are} missing in the data.frame given by the argument 'newdata'.")
    }

    # we create the matrix
    matrix_linear = error_sender(fixest_model_matrix_extra(object = object, newdata = newdata,
                                                           original_data = FALSE, fml = rhs_fml,
                                                           i_noref = TRUE),
                                 "Error when creating the linear matrix: ")

    # Checking the levels created with i()
    mm_info_new = attr(matrix_linear, "model_matrix_info")
    if(!is.null(mm_info_new)){
      mm_info = object$model_matrix_info
      # The order of creation is exactly the same (same fun used),
      # so the two mm_info are identical in structure
      for(i in seq_along(mm_info)){
        mm_new_i = mm_info_new[[i]]
        mm_i = mm_info[[i]]
        if("coef_names_full" %in% names(mm_i)){
          pblm = setdiff(mm_new_i$coef_names_full, mm_i$coef_names_full)
          if(length(pblm) > 0){
            stopi("In i(), predictions cannot be done for values that were not present at estimation time. It concerns the value{$s, enum ? pblm}.")
          }
        }
      }
    }
    

    var_keep = intersect(names(coef), colnames(matrix_linear))
    value_linear = value_linear + as.vector(matrix_linear[, var_keep, drop = FALSE] %*% coef[var_keep])
  }

  #
  # 3) Non linear terms
  #

  value_NL = 0
  NL_fml = object$NL.fml
  if(!is.null(NL_fml)){
    # controlling that we can evaluate that
    NL_vars = all.vars(NL_fml)
    varNotHere = setdiff(NL_vars, c(names(coef), names(newdata)))
    if(length(varNotHere) > 0){
      stopi("Some variables used to estimate the model (in the non-linear formula) are missing from argument 'newdata': {enum.q ? varNotHere}.")
    }

    var2send = intersect(NL_vars, names(newdata))
    env = new.env()
    for(var in var2send){
      assign(var, newdata[[var]], env)
    }

    coef2send = setdiff(NL_vars, names(newdata))
    for(iter_coef in coef2send){
      assign(iter_coef, coef[iter_coef], env)
    }

    # Evaluation of the NL part
    value_NL = eval(NL_fml[[2]], env)
  }

  #
  # 4) offset value
  #

  value_offset = 0
  offset = object$call$offset
  if(!is.null(offset)){
    # evaluation of the offset
    
    if(is.numeric(offset)){
      # simple numeric offset
      value_offset = offset

    } else {
      # offset valid only if formula
      offset_char = as.character(offset)

      if(length(offset_char) == 2 && offset_char[1] == "~"){
        offset_fml = eval(offset)
        varNotHere = setdiff(all.vars(offset_fml), names(newdata))
        if(length(varNotHere) > 0){
          stopi("In the offset, the variable{$s, enum.bq, is ? varNotHere} not present in 'newdata'.")
        }

        value_offset = eval(offset_fml[[length(offset_fml)]], newdata)
      } else {
        stop("Predict can't be applied to this estimation because the offset (", deparse_long(offset), ") cannot be evaluated for the new data. Use a formula for the offset in the first estimation to avoid this.")
      }

    }

  }

  value_predicted = value_fixef + value_linear + value_NL + value_offset

  if(type == "link" || object$method_type == "feols"){
    res = value_predicted
  } else if(object$method_type == "feNmlm") {
    # Now the expected predictor
    family = object$family
    famFuns = switch(family,
                     poisson = ml_poisson(),
                     negbin = ml_negbin(),
                     logit = ml_logit(),
                     gaussian = ml_gaussian())

    if(family == "gaussian"){
      exp_value = 0
    } else {
      exp_value = exp(value_predicted)
    }

    res = famFuns$expected.predictor(value_predicted, exp_value)
  } else {
    res = object$family$linkinv(value_predicted)
  }


  #
  # se.fit
  #

  if(se.fit){

    if(!is.null(object$fixef_vars)){
      stop("The standard-errors (SEs) of the prediction cannot be computed in the presence of fixed-effects. To obtain the SEs, you would need to include the FEs as standard factors in the model.")
    }

    if(!is.null(NL_fml)){
      stop("The standard-errors (SEs) of the prediction cannot be computed in models containing non-linear in parameter elements.")
    }

    # The matrix has been created already

    V_raw = vcov(object, attr = TRUE, vcov = vcov, ssc = ssc)
    V = V_raw[var_keep, var_keep, drop = FALSE]
    X = matrix_linear[, var_keep, drop = FALSE]

    var.fit = rowSums((X %*% V) * X)
    se.fit = sqrt(var.fit)

    res = data.frame(fit = res, se.fit = se.fit)

    if(interval != "none"){
      fact = fixest_CI_factor(object, level, V_raw)

      if(interval == "prediction"){
        w = object$weights

        if(!is.null(w)){
          var.u = cpp_ssq(resid(object), w) / w
        } else {
          var.u = cpp_ssq(resid(object))
        }
        var.u = var.u / degrees_freedom(object, "resid")

        se_obs = sqrt(var.fit + var.u)

      } else {
        se_obs = se.fit
      }

      res$ci_low  = res$fit + fact[1] * se_obs
      res$ci_high = res$fit + fact[2] * se_obs
    }

  }

  res
}


#' Confidence interval for parameters estimated with `fixest`
#'
#' This function computes the confidence interval of parameter estimates obtained from a 
#' model estimated with [`femlm`], [`feols`] or [`feglm`].
#'
#' @inheritParams nobs.fixest
#' @inheritParams vcov.fixest
#'
#' @param parm The parameters for which to compute the confidence interval (either an 
#' integer vector OR a character vector with the parameter name). If missing, all 
#' parameters are used.
#' @param level The confidence level. Default is 0.95.
#' @param coef.col Logical, default is `FALSE`. If `TRUE` the column `coefficient` is 
#' inserted in the first position containing the coefficient names.
#'
#' @return
#' Returns a data.frame with two columns giving respectively the lower and upper bound 
#' of the confidence interval. There is as many rows as parameters.
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' # Load trade data
#' data(trade)
#'
#' # We estimate the effect of distance on trade (with 3 fixed-effects)
#' est_pois = femlm(Euros ~ log(dist_km) + log(Year) | Origin + Destination +
#'                  Product, trade)
#'
#' # confidence interval with "normal" VCOV
#' confint(est_pois)
#'
#' # confidence interval with "clustered" VCOV (w.r.t. the Origin factor)
#' confint(est_pois, se = "cluster")
#'
#'
confint.fixest = function(object, parm, level = 0.95, vcov, se, cluster,
                          ssc = NULL, coef.col = FALSE, ...){

  # Checking the arguments
  if(is_user_level_call()){
    validate_dots(suggest_args = c("parm", "level", "se", "cluster"),
                  valid_args = c("forceCovariance", "keepBounded"))
  }

  # Control
  check_arg(level, "numeric scalar gt{0.5} lt{1}")

  dots = list(...)

  IS_INTERNAL = isTRUE(dots$internal)
  if(IS_INTERNAL){
    if(length(object$coefficients) == 0){
      return(NULL)
    }
  }

  # The proper SE
  sum_object = summary(object, vcov = vcov, se = se, cluster = cluster, ssc = ssc, ...)
  coeftable = sum_object$coeftable

  se_all = coeftable[, 2]
  coef_all = coeftable[, 1]

  # the parameters for which we should compute the confint
  all_params = rownames(coeftable)

  # Ensuring names are all right
  names(se_all) = names(coef_all) = all_params

  if(missing(parm)){
    parm_use = all_params
  } else if(is.numeric(parm)){
    if(any(parm %% 1 != 0)){
      stop("If the argument 'parm' is numeric, it must be integers.")
    }

    parm_use = unique(na.omit(all_params[parm]))
    if(length(parm_use) == 0){

      if(IS_INTERNAL){
        return(NULL)
      }

      stop("There are ", length(all_params), " coefficients, the argument 'parm' does not correspond to any of them.")
    }
  } else if(is.character(parm)){
    parm_pblm = setdiff(parm, all_params)
    if(length(parm_pblm) > 0 && !IS_INTERNAL){
      stopi("some parameters of 'parm' have no estimated coefficient: {enum.bq ? parm_pblm}.")
    }

    parm_use = intersect(parm, all_params)

    if(IS_INTERNAL && length(parm_use) == 0){
      return(NULL)
    }
  }

  # multiplicative factor
  fact = fixest_CI_factor(object, level, sum_object$cov.scaled)

  # The confints
  # Note that for glm models, there is no profiling
  lower_bound = coef_all[parm_use] + fact[1] * se_all[parm_use]
  upper_bound = coef_all[parm_use] + fact[2] * se_all[parm_use]

  val = (1 - level) / 2
  bound_names = paste0(round(100*c(val, 1-val), 1), " %")

  if(coef.col){
    res = data.frame(coefficient = parm_use, lower_bound, upper_bound, row.names = NULL)
    names(res)[-1] = bound_names
  } else {
    res = data.frame(lower_bound, upper_bound, row.names = parm_use)
    names(res) = bound_names
  }

  attr(res, "type") = attr(se_all, "type")

  res
}

#' Updates a `fixest` estimation
#'
#' Updates and re-estimates a `fixest` model (estimated with [`femlm`], [`feols`] or [`feglm`]). 
#' This function updates the formulas and use previous starting values to estimate a new 
#' `fixest` model. The data is obtained from the original `call`.
#'
#' @method update fixest
#' 
#' @param object A `fixest` or `fixest_multi` object. These are obtained from [`feols`], or
#' [`feglm`] estimations, for example.
#' @param fml.update Changes to be made to the original argument `fml`. See more information 
#' on [`update.formula`][stats::update.formula]. You can add/withdraw both variables 
#' and fixed-effects. E.g. `. ~ . + x2 | . + z2` would add the variable `x2` and the 
#' fixed-effect `z2` to the former estimation.
#' @param nframes (Advanced users.) Defaults to 1. Number of frames up the stack where 
#' to perform the evaluation of the updated call. By default, this is the parent frame.
#' @param evaluate Logical, default is `TRUE`. If `FALSE`, only the updated call is returned.
#' @param ... Other arguments to be passed to the functions [`femlm`], [`feols`] or [`feglm`].
#'
#' @return
#' It returns a `fixest` object (see details in [`femlm`], [`feols`] or [`feglm`]).
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. [`predict.fixest`], [`summary.fixest`], [`vcov.fixest`], [`fixef.fixest`].
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' # Example using trade data
#' data(trade)
#'
#' # main estimation
#' est_pois = fepois(Euros ~ log(dist_km) | Origin + Destination, trade)
#'
#' # we add the variable log(Year)
#' est_2 = update(est_pois, . ~ . + log(Year))
#'
#' # we add another fixed-effect: "Product"
#' est_3 = update(est_2, . ~ . | . + Product)
#'
#' # we remove the fixed-effect "Origin" and the variable log(dist_km)
#' est_4 = update(est_3, . ~ . - log(dist_km) | . - Origin)
#'
#' # Quick look at the 4 estimations
#' etable(est_pois, est_2, est_3, est_4)
#'
update.fixest = function(object, fml.update, nframes = 1, evaluate = TRUE, ...){
  # Update method
  # fml.update: update the formula
  # If 1) SAME DATA and 2) SAME dep.var, then we make initialisation


  if(missing(fml.update)){
    fml.update = . ~ .
  } else {
    check_arg(fml.update, "formula")
  }

  check_arg(evaluate, "logical scalar")

  if(isTRUE(object$is_fit)){
    stop("update method not available for fixest estimations obtained from fit methods.")
  }

  if(!isScalar(nframes) || nframes < 1 || nframes %% 1 != 0){
    stop("Argument 'nframes' must be a single integer greater than, or equal to, 1.")
  }

  call_new = match.call()
  dots = list(...)
  
  is_multiple = isTRUE(dots$is_multiple)
  if(is_multiple){
    dots$is_multiple = NULL
    call_new = dots$call
    dots$call = NULL
  }

  dot_names = names(dots)
  if("fixef" %in% dot_names){
    stop("Argument 'fixef' is not accepted in the 'update.fixest' method. Please make modifications to fixed-effects directly in the argument 'fml.update'. (E.g. .~.|.+v5 to add variable v5 as a fixed-effect.)")
  }

  if(any(dot_names == "")){
    call_new_names = names(call_new)
    problems = call_new[call_new_names == ""][-1]
    stop("In 'update.fixest' the arguments of '...' are passed to the function ", object$method, ", and must be named. Currently there are un-named arguments (e.g. '", deparse_long(problems[[1]]), "').")
  }

  #
  # I) Linear formula update
  #

  fml_old = object$fml_all$linear
  fml_linear = update(fml_old, fml_split(fml.update, 1))

  # Family information
  if(!is.null(dots$family)){
    if(object$method_type == "feols"){
      stop("'family' is not an argument of function feols().")
    } else if(object$method %in% c("femlm", "feNmlm", "fepois", "fenegbin")){
      family_new = match.arg(dots$family, c("poisson", "negbin", "gaussian", "logit"))
    }
  }

  #
  # II) fixed-effects updates
  #

  fml_fixef = NULL

  update_fml_parts = fml_split(fml.update, raw = TRUE)
  n_parts = length(update_fml_parts)

  if(n_parts > 2 + (object$method_type == "feols")){
    stop("The update formula cannot have more than ", 2 + (object$method_type == "feols"), " parts for the method ", object$method, ".")
  }

  is_fe = n_parts > 1 && !is_fml_inside(update_fml_parts[[2]])

  fixef_vars = object$fixef_vars

  if(is_fe){

    fixef_old = object$fml_all$fixef

    # I use it as text to catch the var1^var2 FEs (update does not work)
    if(is.null(fixef_old)){
      fixef_old = ~1
      fixef_old_text = "~ 1"
    } else {
      fixef_old_text = deparse_long(fixef_old)
    }

    fixef_new_fml = fml_maker(update_fml_parts[[2]])
    fixef_new_text = deparse_long(fixef_new_fml)

    if(fixef_new_text == "~."){
      # nothing happens
      fixef_new = fixef_old

    } else if(fixef_new_text %in% c("~0", "~1")){
      fixef_new = ~1

    } else if(grepl("\\^", fixef_old_text) || grepl("\\^", fixef_new_text)){
      # we update manually.... dammmit
      # Note that what follows does not work ONLY when you have number^var or number^number
      # and both cases don't make much sense -- I need not control for them
      fml_text_old = gsub("\\^", "__666__", fixef_old_text)
      fml_text_new = gsub("\\^", "__666__", fixef_new_text)

      fixef_new_wip = update(as.formula(fml_text_old), as.formula(fml_text_new))

      fixef_new = as.formula(gsub("__666__", "^", fixef_new_wip))
    } else {
      fixef_new = update(fixef_old, fixef_new_fml)
    }

    if(length(all.vars(fixef_new)) > 0){
      # means there is a fixed-effect
      fml_fixef = fixef_new
    }

  } else if(!is.null(fixef_vars)){
    # the formula updated:
    fml_fixef = object$fml_all$fixef

  }

  #
  # III) IV updates
  #

  if(n_parts > 2 || (n_parts == 2 && !is_fe)){

    iv_new_fml = fml_maker(update_fml_parts[[n_parts]])

    if(!is_fml_inside(iv_new_fml)){
      stop("The third part of the update formula in 'feols' must be a formula.")
    }

    iv_old = object$fml_all$iv

    if(is.null(iv_old)){
      fml_iv = iv_new_fml

    } else {
      fml_iv = update(iv_old, iv_new_fml)
    }

  } else {
    fml_iv = object$fml_all$iv
  }


  fml_new = merge_fml(fml_linear, fml_fixef, fml_iv)

  #
  # The call
  #

  call_old = object$call

  # we drop the argument fixef from old call (now it's in the fml_new)
  call_old$fixef = NULL

  # We also drop the arguments leading to multiple estimations:
  if(!is_multiple){
    # I have to think on how to handle this when implementing subset = ~x %in% list(a, b)
    # which can lead to multiple estimations
    # 
    # hmmm. limitation => I need to keep track of the sample... shouldn't I?
    # Otherwise, stg estimated for setosa following a split, may end up estimated 
    # on the whole sample. 
    # I'm not sure many persons use update that way... so it's not top priority, but 
    # it should definitely be there at some point
    # 
    # actually the split leads to exactly the same problem as subset
    
    call_old$split = call_old$fsplit = NULL
  }

  # new call: call_clear
  call_clear = call_old
  for(arg in setdiff(names(call_new)[-1], c("fml.update", "nframes", "evaluate", "object"))){
    if(is.null(call_new[[arg]])){
      # for some raeson, it wouldn't work if call_new[[arg]] is NULL
      call_clear[[arg]] = NULL
    } else {
      call_clear[[arg]] = call_new[[arg]]
    }    
  }

  call_clear$fml = as.call(fml_new)

  if(!evaluate){
    return(call_clear)
  }
  
  # Data: if the data has been saved AND isn't provided in input, the new estimation
  # will be within the saved data set
  
  if(!is.null(object$data) && !"data" %in% dot_names){
    prms = list(DATA_SAVED = object$data)
    call_clear[["data"]] = quote(DATA_SAVED)
    res = eval(call_clear, prms, parent.frame(nframes))
  } else {
    res = eval(call_clear, parent.frame(nframes))
  }

  res
}


#' @rdname update.fixest
update.fixest_multi = function(object, fml.update, nframes = 1, evaluate = TRUE, ...){
  # We use update.fixest
  # We just need to rewrite the formula since the call in the object is the one 
  # of the main estimation.
  
  est_first = object[[1]]
  fml_full = est_first$call$fml
  fml_parts = fml_split(fml_full, raw = TRUE)
  n_parts = length(fml_parts)
  is_fe = n_parts > 1 && !is_fml_inside(fml_parts[[2]])

  fml_all = est_first$fml_all
  # linear 
  fml_all$linear = fml_parts[[1]]

  # fixef
  if(is_fe){
    fml_fixef = ~1
    fml_fixef[[2]] = fml_parts[[2]]
    fml_all$fixef = fml_fixef
  }
  est_first$fml_all = fml_all
  
  call_new = match.call()

  update(est_first, fml.update = fml.update, nframes = nframes + 1, 
         evaluate = evaluate, is_multiple = TRUE, call = call_new, ...)
}


#' Extract the formula of a `fixest` fit
#'
#' This function extracts the formula from a `fixest` estimation (obtained with [`femlm`], 
#' [`feols`] or [`feglm`]). If the estimation was done with fixed-effects, they are added 
#' in the formula after a pipe (\dQuote{|}). If the estimation was done with a non 
#' linear in parameters part, then this will be added in the formula in between `I()`.
#'
#'
#' @param x An object of class `fixest`. Typically the result of a [`femlm`], [`feols`] 
#' or [`feglm`] estimation.
#' @param type A character scalar. Default is `type = "full"` which gives back a formula 
#' containing the linear part of the model along with the fixed-effects (if any) and the 
#' IV part (if any). If `type = "linear"` then only the linear formula is returned. 
#' If `type = "NL"` then only the non linear in parameters part is returned.
#' @param ... Not currently used.
#'
#' @return
#' It returns a formula.
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. 
#' [`model.matrix.fixest`], [`update.fixest`], [`summary.fixest`], [`vcov.fixest`].
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' # simple estimation on iris data, using "Species" fixed-effects
#' res = femlm(Sepal.Length ~ Sepal.Width + Petal.Length +
#'             Petal.Width | Species, iris)
#'
#' # formula with the fixed-effect variable
#' formula(res)
#'
#' # linear part without the fixed-effects
#' formula(res, "linear")
#'
#'
formula.fixest = function(x, type = c("full", "linear", "iv", "NL"), ...){
  # Extract the formula from the object
  # we add the clusters in the formula if needed

  # Checking the arguments
  if(is_user_level_call()){
    validate_dots(suggest_args = "type")
  }

  if(isTRUE(x$is_fit)){
    stop("formula method not available for fixest estimations obtained from fit methods.")
  }

  check_set_arg(type, "match")

  if(type == "linear"){
    return(x$fml)

  } else if(type == "NL"){

    if(!x$method == "feNmlm"){
      stop("type = 'NL' is not valid for a ", x$method, " estimation.")
    }

    NL.fml = x$NL.fml
    if(is.null(NL.fml)){
      stop("There was no nonlinear part estimated, option type = 'NL' cannot be used.")
    }

    return(NL.fml)

  } else if(type == "iv"){
    if(is.null(x$fml_all$iv)){
      stop("type = 'iv' is only available for feols estimations with IV.")
    }
  }

  # Shall I add LHS ~ RHS + NL(NL fml) | fe | iv ???
  res = merge_fml(x$fml_all$linear, x$fml_all$fixef, x$fml_all$iv)

  res
}


#' Design matrix of a `fixest` object
#'
#' This function creates the left-hand-side or the right-hand-side(s) of a [`femlm`], 
#' [`feols`] or [`feglm`] estimation.
#'
#' @method model.matrix fixest
#'
#' @inheritParams nobs.fixest
#'
#' @param data If missing (default) then the original data is obtained by evaluating 
#' the `call`. Otherwise, it should be a `data.frame`.
#' @param type Character vector or one sided formula, default is "rhs". Contains the type of 
#' matrix/data.frame to be returned. Possible values are: "lhs", "rhs", "fixef", "iv.rhs1" 
#' (1st stage RHS), "iv.rhs2" (2nd stage RHS), "iv.endo" (endogenous vars.), "iv.exo" 
#' (exogenous vars), "iv.inst" (instruments).
#' @param na.rm Default is `TRUE`. Should observations with NAs be removed from the matrix?
#' @param subset Logical or character vector. Default is `FALSE`. If `TRUE`, then the 
#' matrix created will be restricted only to the variables contained in the argument `data`, 
#' which can then contain a subset of the variables used in the estimation. If a 
#' character vector, then only the variables matching the elements of the vector via 
#' regular expressions will be created.
#' @param as.matrix Logical scalar, default is `FALSE`. Whether to coerce the result to a matrix.
#' @param as.df Logical scalar, default is `FALSE`. Whether to coerce the result to a data.frame.
#' @param collin.rm Logical scalar, default is `TRUE`. Whether to remove variables that were 
#' found to be collinear during the estimation. Beware: it does not perform a 
#' collinearity check.
#' @param ... Not currently used.
#'
#' @return
#' It returns either a vector, a matrix or a data.frame. It returns a vector for the 
#' dependent variable ("lhs"), a data.frame for the fixed-effects ("fixef") and a matrix 
#' for any other type.
#'
#' @seealso
#' See also the main estimation functions [`femlm`], [`feols`] or [`feglm`]. [`formula.fixest`], [`update.fixest`], [`summary.fixest`], [`vcov.fixest`].
#'
#'
#' @author
#' Laurent Berge
#'
#' @examples
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' est = feols(y ~ poly(x1, 2) + x2, base)
#' head(model.matrix(est))
#'
#' # Illustration of subset
#'
#' # subset => character vector
#' head(model.matrix(est, subset = "x1"))
#'
#' # subset => TRUE, only works with data argument!!
#' head(model.matrix(est, data = base[, "x1", drop = FALSE], subset = TRUE))
#'
#'
#'
model.matrix.fixest = function(object, data, type = "rhs", na.rm = TRUE, subset = FALSE,
                               as.matrix = FALSE, as.df = FALSE, collin.rm = TRUE, ...){
  # We evaluate the formula with the past call
  # type: lhs, rhs, fixef, iv.endo, iv.inst, iv.rhs1, iv.rhs2
  # if fixef => return a DF

  # Checking the arguments
  if(is_user_level_call()){
    validate_dots(suggest_args = c("data", "type"))
  }

  # We allow type to be used in the location of data if data is missing
  if(!missing(data) && missing(type)){
    sc = sys.call()
    if(!"data" %in% names(sc)){
      if(!is.null(data) && (is.character(data) || "formula" %in% class(data))){
        # data is in fact the type
        type = data
        data = NULL
      }
    }
  }


  type = check_set_types(type, c("lhs", "rhs", "fixef", "iv.endo", "iv.inst", "iv.exo", 
                                 "iv.rhs1", "iv.rhs2"))

  if(isTRUE(object$is_fit)){
    stop("model.matrix method not available for fixest estimations obtained from fit methods.")
  }

  if(any(grepl("^iv", type)) && !isTRUE(object$iv)){
    stopi("The type{$s, enum.Q, is ! {'^iv'get ? type}} only valid for IV estimations.")
  }

  check_arg(subset, "logical scalar | character vector no na")

  check_set_arg(as.matrix, as.df, collin.rm, "logical scalar")

  # The formulas
  fml_full = formula(object, type = "full")
  fml_linear = formula(object, type = "linear")

  # Evaluation with the data
  original_data = FALSE
  if(missnull(data)){
    original_data = TRUE

    data = fetch_data(object, "To apply 'model.matrix.fixest', ")

  }

  # control of the data
  if(is.matrix(data)){
    if(is.null(colnames(data))){
      stop("If argument 'data' is to be a matrix, its columns must be named.")
    }
    data = as.data.frame(data)
  }

  if(!"data.frame" %in% class(data)){
    stop("The argument 'data' must be a data.frame or a matrix.")
  }

  data = as.data.frame(data)

  # Panel setup
  panel__meta__info = set_panel_meta_info(object, data)

  res = list()

  if("lhs" %in% type){
    lhs = list()

    namesLHS = all.vars(fml_linear[[2]])
    if(length(pblm <- setdiff(namesLHS, names(data)))){
      stop("In 'model.matrix', to create the LHS, the variable{$s, enum.bq, is ? pblm} not in the data set.")
    }

    lhs_text = deparse_long(fml_linear[[2]])
    lhs[[lhs_text]] = eval(fml_linear[[2]], data)

    res[["lhs"]] = as.data.frame(lhs)
  }

  if("rhs" %in% type && !isTRUE(object$onlyFixef)){
    # we kick out the intercept if there is presence of fixed-effects
    fake_intercept = !is.null(object$fixef_vars) && !(!is.null(object$slope_flag) && all(object$slope_flag < 0))

    fml = fml_linear
    if(isTRUE(object$iv)){
      fml_iv = object$fml_all$iv
      fml = .xpd(..lhs ~ ..endo + ..rhs, 
                 ..lhs = fml[[2]], 
                 ..endo = fml_iv[[2]], 
                 ..rhs = fml[[3]])
    }

    linear.mat = error_sender(fixest_model_matrix_extra(
      object = object, newdata = data, original_data = original_data,
      fml = fml, fake_intercept = fake_intercept,
      subset = subset),
      "In 'model.matrix', the RHS could not be evaluated: ")

    if(collin.rm){
      qui = which(colnames(linear.mat) %in% object$collin.var)
      if(length(qui) == ncol(linear.mat)){
        linear.mat = NULL
      } else if(length(qui) > 0){
        linear.mat =  linear.mat[, -qui, drop = FALSE]
      }

      coefs = object$coefficients
      if(length(coefs) == ncol(linear.mat) && any(colnames(linear.mat) != names(coefs))){
        # we reorder the matrix
        # This can happen in multiple estimations, where we respect the
        # order of the user

        if(all(names(coefs) %in% colnames(linear.mat))){
          linear.mat = linear.mat[, names(coefs), drop = FALSE]
        }
      }
    }

    res[["rhs"]] = linear.mat
  }

  if("fixef" %in% type){

    if(is.null(object$fixef_vars)){
      stop("In model.matrix, the type 'fixef' is only valid for models with fixed-effects. This estimation does not contain fixed-effects.")
    }

    fixef_terms_full = fixef_terms(object$fml_all$fixef)
    fixef_terms = fixef_terms_full$fml_terms

    fixef_df = error_sender(prepare_df(fixef_terms_full$fe_vars, data, fastCombine = FALSE),
                "In 'model.matrix', problem evaluating the fixed-effects part of the formula:\n")

    isSlope = any(fixef_terms_full$slope_flag != 0)
    if(isSlope){
      slope_df = error_sender(prepare_df(fixef_terms_full$slope_vars, data),
                  "In 'model.matrix', problem evaluating the variables with varying slopes in the fixed-effects part of the formula:\n")

      fixef_df = cbind(fixef_df, slope_df)
    }

    res[["fixef"]] = fixef_df
  }

  if("iv.endo" %in% type){
    fml = object$iv_endo_fml

    endo.mat = error_sender(fixest_model_matrix_extra(object = object, newdata = data, 
                                                      original_data = original_data, fml = fml,
                                                      fake_intercept = TRUE), 
                            "In 'model.matrix', the endogenous variables could not be evaluated: ")

    if(collin.rm){
      qui = which(colnames(endo.mat) %in% object$collin.var)
      if(length(qui) == ncol(endo.mat)){
        endo.mat = NULL
      } else if(length(qui) > 0){
        endo.mat =  endo.mat[, -qui, drop = FALSE]
      }
    }

    res[["iv.endo"]] = endo.mat
  }

  if("iv.inst" %in% type){
    fml = object$fml_all$iv

    inst.mat = error_sender(fixest_model_matrix_extra(object = object, newdata = data, 
                                                      original_data = original_data, fml = fml, 
                                                      fake_intercept = TRUE), 
                            "In 'model.matrix', the instruments could not be evaluated: ")

    if(collin.rm){
      qui = which(colnames(inst.mat) %in% object$collin.var)
      if(length(qui) == ncol(inst.mat)){
        inst.mat = NULL
      } else if(length(qui) > 0){
        inst.mat =  inst.mat[, -qui, drop = FALSE]
      }
    }

    res[["iv.inst"]] = inst.mat
  }

  if("iv.exo" %in% type){

    fake_intercept = !is.null(object$fixef_vars) && !(!is.null(object$slope_flag) && all(object$slope_flag < 0))
    fml = object$fml_all$linear

    exo.mat = error_sender(fixest_model_matrix_extra(object = object, newdata = data, 
                                                     original_data = original_data, fml = fml, fake_intercept = fake_intercept), 
                           "In 'model.matrix', the instruments could not be evaluated: ")

    if(is.atomic(exo.mat) && length(exo.mat) == 1){
      # This is the intercept only
      # Two cases:
      is_int = attr(terms(fml), "intercept")
      if(is_int && is.null(object$fixef_vars)){
        # Valid intercept
        exo.mat = matrix(1, nrow(data))
      } else {
        # should be NULL
        exo.mat = NULL
      }
    } else if(collin.rm){
      qui = which(colnames(exo.mat) %in% object$collin.var)
      if(length(qui) == ncol(exo.mat)){
        exo.mat = NULL
      } else if(length(qui) > 0){
        exo.mat =  exo.mat[, -qui, drop = FALSE]
      }
    }

    res[["iv.exo"]] = exo.mat
  }

  if("iv.rhs1" %in% type){
    # First stage

    if(!isTRUE(object$iv)){
      stop("In model.matrix, the type 'iv.rhs1' is only valid for IV models. This estimation is no IV.")
    }

    fml = object$fml
    if(object$iv_stage == 2){
      fml_iv = object$fml_all$iv
      fml = .xpd(..lhs ~ ..inst + ..rhs, 
                 ..lhs = fml[[2]], 
                 ..inst = fml_iv[[3]], 
                 ..rhs = fml[[3]])
    }

    fake_intercept = !is.null(object$fixef_vars) && !(!is.null(object$slope_flag) && all(object$slope_flag < 0))
    # iv_rhs1 = error_sender(fixest_model_matrix(fml, data, fake_intercept = fake_intercept),
    #                        "In 'model.matrix', the RHS of the 1st stage could not be evaluated: ")
    iv_rhs1 = error_sender(fixest_model_matrix_extra(object = object, newdata = data, 
                                                     original_data = original_data, fml = fml, 
                                                     fake_intercept = fake_intercept, 
                                                     subset = subset), 
                           "In 'model.matrix', the RHS of the 1st stage could not be evaluated: ")

    if(collin.rm){
      qui = which(colnames(iv_rhs1) %in% object$collin.var)
      if(length(qui) == ncol(iv_rhs1)){
        iv_rhs1 = NULL
      } else if(length(qui) > 0){
        iv_rhs1 =  iv_rhs1[, -qui, drop = FALSE]
      }
    }

    res[["iv.rhs1"]] = iv_rhs1
  }

  if("iv.rhs2" %in% type){
    # Second stage

    if(!isTRUE(object$iv)){
      stop("In model.matrix, the type 'iv.rhs2' is only valid for second stage IV models. This estimation is not even IV.")
    }

    if(!object$iv_stage == 2){
      stop("In model.matrix, the type 'iv.rhs2' is only valid for second stage IV models. This estimation is the first stage.")
    }

    # I) we get the fit
    stage_1 = object$iv_first_stage

    fit_vars = c()
    for(i in seq_along(stage_1)){
      fit_vars[i] = v = paste0("fit_", names(stage_1)[i])
      data[[v]] = predict(stage_1[[i]], newdata = data, sample = "original")
    }

    # II) we create the variables

    fml = object$fml
    fml = .xpd(..lhs ~ ..fit + ..rhs, 
               ..lhs = fml[[2]], 
               ..fit = fit_vars, 
               ..rhs = fml[[3]])

    fake_intercept = !is.null(object$fixef_vars) && !(!is.null(object$slope_flag) && all(object$slope_flag < 0))
    # iv_rhs2 = error_sender(fixest_model_matrix(fml, data, fake_intercept = fake_intercept),
    #                        "In 'model.matrix', the RHS of the 2nd stage could not be evaluated: ")
    iv_rhs2 = error_sender(fixest_model_matrix_extra(object = object, newdata = data, 
                                                     original_data = original_data, fml = fml, 
                                                     fake_intercept = fake_intercept, 
                                                     subset = subset), 
                           "In 'model.matrix', the RHS of the 2nd stage could not be evaluated: ")

    if(collin.rm){
      qui = which(colnames(iv_rhs2) %in% object$collin.var)
      if(length(qui) == ncol(iv_rhs2)){
        iv_rhs2 = NULL
      } else if(length(qui) > 0){
        iv_rhs2 =  iv_rhs2[, -qui, drop = FALSE]
      }
    }

    res[["iv.rhs2"]] = iv_rhs2
  }

  # Formatting res
  if(length(res) == 0){
    return(NULL)
  } else if(length(type) > 1){
    res = res[type]
    res = do.call(cbind, unname(res))
  } else {
    res = res[[1]]
  }

  #
  # Removing obs if needed
  #

  check_0 = FALSE
  if(original_data){

    if(na.rm == FALSE){
      # We do nothing. Or shall I add NA values for obs not
      # included in the estimation?
      if(FALSE && length(object$obs_selection) > 0){

        # we reconstruct the full vector of obs
        # and we fill with NA
        obs_id = 1:nrow(data)
        for(i in seq_along(object$obs_selection)){
          obs_id = select_obs(obs_id, object$obs_selection[[i]])
        }

        res[!1:nrow(res) %in% obs_id, ] = NA

      }

    } else {
      for(i in seq_along(object$obs_selection)){
        check_0 = TRUE
        res = select_obs(res, object$obs_selection[[i]])
      }
    }



    na.rm = FALSE
  }

  if(na.rm){

    if(is.numeric(res) || all(sapply(res, is.numeric))){
      info = cpp_which_na_inf(res, nthreads = 1)
    } else {
      info = list(any_na_inf = anyNA(res))
      if(info$any_na_inf) info$is_na_inf = !complete.cases(res)
    }

    if(info$any_na_inf){
      check_0 = TRUE
      isNA_L = info$is_na_inf

      if(sum(isNA_L) == nrow(res)){
        warning("All observations contain NA values.")
        return(res[-which(isNA_L), , drop = FALSE])
      }

      res = select_obs(res, -which(isNA_L))
    }
  }


  if(as.matrix){
    res = as.matrix(res)
  } else if(as.df){
    res = as.data.frame(res)
  } else if(identical(type, "lhs")){
    res = res[[1]]
  }

  if(check_0 && !"fixef" %in% type){
    only_0 = cpp_check_only_0(base::as.matrix(res), nthreads = 1)
    if(all(only_0 == 1)){
      stop("After removing NAs, not a single explanatory variable is different from 0.")

    } else if(any(only_0 == 1)){
      # At that point it must be either a matrix or a DF
      # (can't be a vector)
      res = res[, only_0 == 0, drop = FALSE]
    }
  }

  res
}


#' Extract the terms
#'
#' This function extracts the terms of a `fixest` estimation, excluding the fixed-effects part.
#'
#' @param x A `fixest` object. Obtained using the functions [`femlm`], [`feols`] or [`feglm`].
#' @param ... Not currently used.
#'
#' @return
#' An object of class `c("terms", "formula")` which contains the terms representation of a 
#' symbolic model.
#'
#'
#' @examples
#'
#' # simple estimation on iris data, using "Species" fixed-effects
#' res = feols(Sepal.Length ~ Sepal.Width*Petal.Length +
#'             Petal.Width | Species, iris)
#'
#' # Terms of the linear part
#' terms(res)
#'
#'
terms.fixest = function(x, ...){
  terms(formula(x, type = "linear"))
}


#' Extracts the weights from a `fixest` object
#'
#' Simply extracts the weights used to estimate a `fixest` model.
#'
#' @param object A `fixest` object.
#' @param ... Not currently used.
#'
#' @return
#' Returns a vector of the same length as the number of observations in the original data set. 
#' Ignored observations due to NA or perfect fit are re-introduced and their weights set to NA.
#'
#' @seealso
#' [`feols`], [`fepois`][fixest::feglm], [`feglm`], [`fenegbin`][fixest::femlm], [`feNmlm`].
#'
#' @examples
#'
#' est = feols(Petal.Length ~ Petal.Width, iris, weights = ~as.integer(Sepal.Length) - 3.99)
#' weights(est)
#'
weights.fixest = function(object, ...){
  w = object[["weights"]]

  # To comply with stats default
  if(is.null(w)) return(NULL)

  w = fill_with_na(w, object)

  w
}



#' Residual standard deviation of `fixest` estimations
#'
#' Extract the estimated standard deviation of the errors from `fixest` estimations.
#'
#' @inheritParams weights.fixest
#'
#' @return
#' Returns a numeric scalar.
#'
#' @seealso
#' [`feols`], [`fepois`][fixest::feglm], [`feglm`], [`fenegbin`][fixest::femlm], [`feNmlm`].
#'
#'
#' @examples
#'
#' est = feols(Petal.Length ~ Petal.Width, iris)
#' sigma(est)
#'
#'
#'
sigma.fixest = function(object, ...){
  sqrt(deviance(object) / (object$nobs - object$nparams))
}


#' Extracts the deviance of a fixest estimation
#'
#' Returns the deviance from a `fixest` estimation.
#'
#' @inheritParams weights.fixest
#'
#' @return
#' Returns a numeric scalar equal to the deviance.
#'
#' @seealso
#' [`feols`], [`fepois`][fixest::feglm], [`feglm`], [`fenegbin`][fixest::femlm], [`feNmlm`].
#'
#' @examples
#'
#' est = feols(Petal.Length ~ Petal.Width, iris)
#' deviance(est)
#'
#' est_pois = fepois(Petal.Length ~ Petal.Width, iris)
#' deviance(est_pois)
#'
deviance.fixest = function(object, ...){

  if(isTRUE(object$lean)){
    # LATER: recompute it
    stop("The method 'deviance.fixest' cannot be applied to 'lean' fixest objects. Please re-estimate with 'lean = FALSE'.")
  }

  method = object$method
  family = object$family
  r = object$residuals
  w = object[["weights"]]
  if(is.null(w)) w = rep(1, length(r))

  if(is.null(r) && !method %in% c("fepois", "feglm")){
    stop("The method 'deviance.fixest' cannot be applied to a 'lean' summary. Please apply it to the estimation object directly.")
  }

  if(method %in% c("feols", "feols.fit") || (method %in% c("femlm", "feNmlm") && family == "gaussian")){
    res = sum(w * r**2)

  } else if(method %in% c("fepois", "feglm")){
    res = object$deviance

  } else {
    mu = object$fitted.values
    theta = ifelse(family == "negbin", object$theta, 1)

    # dev.resids function
    if(family == "poisson"){
      dev.resids = poisson()$dev.resids

    } else if(family == "logit"){
      dev.resids = binomial()$dev.resids

    } else if(family == "negbin"){
      dev.resids = function(y, mu, wt) 2 * wt * (y * log(pmax(1, y)/mu) - (y + theta) * log((y + theta)/(mu + theta)))

    }

    y = r + mu

    res = sum(dev.resids(y, mu, w))
  }

  res
}



#' Hat values for `fixest` objects
#'
#' Computes the hat values for [`feols`] or [`feglm`] estimations. Only works when 
#' there are no fixed-effects.
#'
#' @param model A fixest object. For instance from feols or feglm.
#' @param ... Not currently used.
#'
#' @details
#' Hat values are not available for [`fenegbin`][fixest::femlm], [`femlm`] 
#' and [`feNmlm`] estimations.
#'
#' When there are fixed-effects, the hat values of the reduced form are different from the 
#' hat values of the full model. And we cannot get costlessly the hat values of the full model 
#' from the reduced form. It would require to reestimate the model with the 
#' fixed-effects as regular variables.
#'
#' @return
#' Returns a vector of the same length as the number of observations used in the estimation.
#'
#' @examples
#'
#' est = feols(Petal.Length ~ Petal.Width + Sepal.Width, iris)
#' head(hatvalues(est))
#'
#'
hatvalues.fixest = function(model, ...){
  # Only works for feglm/feols objects + no fixed-effects
  # When there are fixed-effects the hatvalues of the reduced form is different from
  #  the hatvalues of the full model. And we cannot get costlessly the hatvalues of the full
  #  model from the reduced form. => we need to reestimate the model with the FEs as
  #  regular variables.

  if(isTRUE(model$lean)){
    # LATER: recompute it
    stop("The method 'hatvalues.fixest' cannot be applied to 'lean' fixest objects. Please re-estimate with 'lean = FALSE'.")
  }

  if(is_user_level_call()){
    validate_dots()
  }

  method = model$method_type
  family = model$family

  msg = "hatvalues.fixest: 'hatvalues' is not implemented for estimations with fixed-effects."

  # An error is in fact nicer than a message + NA return due to the interplay with sandwich
  if(!is.null(model$fixef_id)){
    stop(msg)
  }

  if(method == "feols"){
    X = model.matrix(model)

    res = cpp_diag_XUtX(X, model$cov.iid / model$sigma2)

  } else if(method == "feglm"){
    XW = model.matrix(model) * sqrt(model$irls_weights)
    res = cpp_diag_XUtX(XW, model$cov.iid)

  } else {
    stop("'hatvalues' is currently not implemented for function ", method, ".")
  }

  res
}



#' Residual degrees-of-freedom for `fixest` objects
#' 
#' Returns the residual degrees of freedom for a fitted `fixest` object
#' 
#' 
#' @param object A `fixest` estimation, e.g. from [`feols`] or [`feglm`].
#' @param ... Not currently used
#' 
#' @return 
#' It returns an integer scalar giving the residuals degrees of freedom of the estimation.
#' 
#' @seealso 
#' The function [`degrees_freedom`] in `fixest`.
#' 
#' @examples 
#' 
#' est = feols(mpg ~ hp, mtcars)
#' df.residual(est)
#' 
df.residual.fixest = function(object, ...){
  degrees_freedom_iid(object, type = "resid")
}

####
#### sandwich ####
####


#' Extracts the scores from a fixest estimation
#'
#' Extracts the scores from a fixest estimation.
#'
#' @param x A `fixest` object, obtained for instance from [`feols`].
#' @param ... Not currently used.
#'
#' @return
#' Returns a matrix of the same number of rows as the number of observations used for 
#' the estimation, and the same number of columns as there were variables.
#'
#' @examples
#'
#' data(iris)
#' est = feols(Petal.Length ~ Petal.Width + Sepal.Width, iris)
#' head(estfun(est))
#'
estfun.fixest = function(x, ...){
  # 'scores' is an object always contained in fixest estimations

  if(isTRUE(x$lean)){
    # LATER: recompute it
    stop("The method 'estfun.fixest' cannot be applied to 'lean' fixest objects. Please re-estimate with 'lean = FALSE'.")
  }

  x$scores
}


#' Functions exported from \pkg{sandwich} to implement \pkg{fixest} methods
#'
#' The package \pkg{fixest} does not use `estfun` or `bread` from \pkg{sandwich}, but these 
#' methods have been implemented to allow users to leverage the variances from \pkg{sandwich}.
#'
#' * Here is the help from package \pkg{sandwich}: [`estfun`][sandwich::estfun] 
#' and [`bread`][sandwich::bread]. The help from package \pkg{fixest} is 
#' here: [`estfun.fixest`] and [`bread.fixest`].
#'
#'
#' @name sandwich_reexported
#' @keywords internal
NULL

#' @rdname sandwich_reexported
#' @name estfun
NULL

#' @rdname sandwich_reexported
#' @name bread
NULL


#' Extracts the bread matrix from fixest objects
#'
#' Extracts the bread matrix from fixest objects to be used to compute sandwich variance-covariance matrices.
#'
#' @param x A `fixest` object, obtained for instance from [`feols`].
#' @param ... Not currently used.
#'
#' @return
#' Returns a matrix of the same dimension as the number of variables used in the estimation.
#'
#' @examples
#'
#' est = feols(Petal.Length ~ Petal.Width + Sepal.Width, iris)
#' bread(est)
#'
bread.fixest = function(x, ...){

  if(is_user_level_call()){
    validate_dots()
  }

  method = x$method_type
  family = x$family

  if(method == "feols"){

    res = x$cov.iid / x$sigma2 * x$nobs

  } else if(method == "feglm"){

    res = x$cov.iid * x$nobs

  } else {
    stop("'bread' is not currently implemented for function ", method, ".")
  }

  res
}

Try the fixest package in your browser

Any scripts or data that you put into this service are public.

fixest documentation built on June 22, 2024, 9:12 a.m.