R/fixest_multi.R

Defines functions confint.fixest_multi resid.fixest_multi pvalue.fixest_multi tstat.fixest_multi se.fixest_multi coeftable.fixest_multi coef.fixest_multi as.list.fixest_multi print.fixest_multi summary.fixest_multi n_models models rep_df set_index_multi reshape_multi index_from_tree setup_multi

Documented in as.list.fixest_multi coef.fixest_multi coeftable.fixest_multi confint.fixest_multi models n_models print.fixest_multi pvalue.fixest_multi resid.fixest_multi se.fixest_multi summary.fixest_multi tstat.fixest_multi

#----------------------------------------------#
# Author: Laurent Berge
# Date creation: Sat Nov 07 09:05:26 2020
# ~: fixest_multi
#----------------------------------------------#

setup_multi = function(data, values, var = NULL, tree = NULL){
  # the incoming data is ALWAYS strongly structured
  # => they all have the same number of elements
  # data:
  # either a list of fixest objects
  # either a list of fixest_multi objects
  #
  # values: must be strongly and properly formatted
  # its length is the nber of objects (length(data)), with the appropriate names
  # var: to keep the information on the variable (sample, subset)

  # We also add the $model_info variable in each model

  # To remove after development
  check_arg(data, "list")
  check_arg(values, "named list")
  check_arg(var, "NULL character vector no na")
  check_arg(tree, "NULL data.frame")

  n_models = length(data)
  IS_VAR = !is.null(var)
  IS_TREE = !is.null(tree)

  if(!IS_TREE){
    stopifnot(identical(class(data), "list"))
  }

  var_label = NULL
  if(IS_VAR){
    stopifnot(length(values) == 1)
    var_label = names(values)[1]
    if(length(var) == 1){
      var = rep(var, n_models)
    }
  }

  IS_NESTED = inherits(data[[1]], "fixest_multi")

  if(IS_TREE){
    # This is an internal call from [.fixest_multi
    # data = the final data
    # values = the new tree

    res = data
    tree$id = NULL # we re-create it later

    if(IS_NESTED){

      # We allow non balanced data lists
      res = vector("list", sum(lengths(data)))
      tree_left = list()
      tree_right = list()
      index = 1
      for(i in 1:n_models){
        data_i = data[[i]]

        # updating the tree
        tree_nested = attr(data_i, "tree")[, -1, drop = FALSE]
        n_nested = nrow(tree_nested)
        tree_left[[i]] = rep_df(tree[i, , drop = FALSE], each = n_nested)
        tree_right[[i]] = tree_nested

        for(j in 1:n_nested){
          res[[index]] = data_i[[j]]
          index = index + 1
        }
      }

      tree_left = do.call(rbind, tree_left)
      tree_right = do.call(rbind, tree_right)

      tree = cbind(tree_left, tree_right)
    }

  } else {

    v_names = names(values)
    tree = as.data.frame(values)

    if(IS_NESTED){
      # bookkeeping needed: note that we ensure beforehand that each element is STRONGLY consistent

      tree_left = list()
      tree_right = list()
      for(i in 1:n_models){
        # updating the tree
        tree_nested = attr(data[[i]], "tree")[, -1, drop = FALSE]
        n_nested = nrow(tree_nested)
        tree_left[[i]] = rep_df(tree[i, , drop = FALSE], each = n_nested)
        tree_right[[i]] = tree_nested
      }

      tree_left = do.call(rbind, tree_left)
      tree_right = do.call(rbind, tree_right)

      tree = cbind(tree_left, tree_right)
    } else if(!inherits(data[[1]], "fixest")){
      stop("Internal error: the current data type is not supportded by setup_multi.")
    }

    # res: a plain list containing all the models
    res = vector("list", nrow(tree))
    index = 1
    for(i in 1:n_models){
      data_i = data[[i]]

      # new model information
      new_info = list()
      for(v in v_names){
        if(IS_VAR){
          new_info[[v]] = list(var = var[i], value = values[[v]][i])
        } else {
          new_info[[v]] = values[[v]][i]
        }
      }

      n_j = if(IS_NESTED) length(data_i) else 1
      for(j in 1:n_j){

        if(IS_NESTED){
          mod = data_i[[j]]
        } else {
          mod = data_i
        }

        # updating the model information
        model_info = mod$model_info
        for(v in names(new_info)){
          model_info[[v]] = new_info[[v]]
        }

        mod$model_info = model_info
        res[[index]] = mod

        index = index + 1
      }
    }
  }

  if(IS_VAR){
    tree = cbind(var, tree)
    names(tree)[1] = paste0(var_label, ".var")
  }

  tree_names = mapply(function(x, y) paste0(x, ": ", y), names(tree), tree)
  if(is.vector(tree_names)){
    model_names = paste(tree_names, collapse = "; ")
  } else {
    tree_names = as.data.frame(tree_names)
    model_names = apply(tree_names, 1, paste0, collapse = "; ")
  }

  # indexes
  info = index_from_tree(tree)
  index_names = info$index_names
  tree_index = info$tree_index

  tree = cbind(id = 1:nrow(tree), tree)

  # Shouldn't I remove tree_index and index_names since they can be built from the tree?
  # It seems it can be useful if they're directly computed... We'll see.
  names(res) = model_names
  class(res) = "fixest_multi"
  attr(res, "tree") = tree
  attr(res, "tree_index") = tree_index
  attr(res, "index_names") = index_names

  res
}

index_from_tree = function(tree){
  index_names = list()
  tree_index = list()
  names_keep = names(tree)[!grepl("\\.var$|^id$", names(tree))]
  for(v in names_keep){
    z = tree[[v]]
    fact = factor(z, levels = unique(z))
    index_names[[v]] = levels(fact)
    tree_index[[v]] = as.integer(unclass(fact))
  }
  tree_index = as.data.frame(tree_index)

  list(index_names = index_names, tree_index = tree_index)
}

reshape_multi = function(x, obs, colorder = NULL){
  # x: fixest_multi object
  # obs: indexes to keep

   tree = attr(x, "tree")
   new_tree = tree[obs, , drop = FALSE]

   if(!is.null(colorder)){
     new_tree_list = list()
     for(i in seq_along(colorder)){
       # I use grep to catch ".var" variables
       qui = grepl(colorder[i], names(tree))
       new_tree_list[[i]] = new_tree[, qui, drop = FALSE]
     }
     new_tree_list[[i + 1]] = new_tree["id"]

     new_tree = do.call(cbind, new_tree_list)

   }

   n_models = nrow(new_tree)
   new_data = vector("list", n_models)
   for(i in 1:n_models){
     new_data[[i]] = x[[new_tree$id[i]]]
   }

  setup_multi(new_data, tree = new_tree)
}


set_index_multi = function(x, index_names){
  # Function specific to [.fixest_multi => global assignments!!!
  arg = deparse(substitute(x))
  
  if(!arg %in% names(index_names)){
    
    if(!( identical(x, 1) || identical(x, 0) || identical(x, TRUE) )){
      stopi("The index {bq?arg} is not valid for this list of results (the valid one{$s, are, enum.bq ? names(index_names)}). ",
            "\nYou can, however, set it to 1 if necessary, as in `{arg} = 1`.")
    }
    
    return(NULL)
  }

  NAMES = index_names[[arg]]
  vmax = length(NAMES)

  if(is.logical(x)){
    if(isFALSE(x)){
      last = get("last", parent.frame())
      last[length(last) + 1] = arg
      assign("last", last, parent.frame())
    }
    res = 1:vmax
  } else if(is.character(x)){
    dict = 1:vmax
    names(dict) = NAMES
    vars = keep_apply(NAMES, x)
    vars = order_apply(vars, x)

    res = as.integer(dict[vars])

    if(length(res) == 0){
      stop_up("The set of regular expressions (equal to: {Q?x}) in {bq?arg} ", 
              "didn't match any choice.")
    }

  } else if(any(abs(x) > vmax)){
    stop_up("The index '", arg, "' cannot be greater than ", vmax, 
            ". Currently ", x[which.max(abs(x))], " is not valid.")
  } else {
    res = x
  }

  res
}




rep_df = function(x, times = 1, each = 1, ...){
  if(identical(times, 1) && identical(each, 1)){
    return(x)
  }

  as.data.frame(lapply(x, rep, times = times, each = each))
}

####
#### USER LEVEL ####
####



#' Extracts the models tree from a `fixest_multi` object
#'
#' Extracts the meta information on all the models contained in a `fixest_multi` estimation.
#'
#' @inheritParams print.fixest_multi
#' @param simplify Logical, default is `FALSE`. The default behavior is to display all the meta 
#' information, even if they are identical across models. By using `simplify = TRUE`, only the 
#' information with some variation is kept.
#'
#' @return
#' It returns a `data.frame` whose first column (named `id`) is the index of the models and 
#' the other columns contain the information specific to each model (e.g. which sample, 
#' which RHS,  which dependent variable, etc).
#' 
#' @seealso 
#' multiple estimations in [`feols`], [`n_models`]
#'
#' @examples
#'
#' # a multiple estimation
#' base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
#' est = feols(y ~ csw(x.[, 1:3]), base, fsplit = ~species)
#'
#' # All the meta information
#' models(est)
#'
#' # Illustration: Why use simplify
#' est_sub = est[sample = 2]
#' models(est_sub)
#' models(est_sub, simplify = TRUE)
#'
#'
#'
models = function(x, simplify = FALSE){
  check_arg(x, "class(fixest_multi)")

  res = attr(x, "tree")
  if(simplify){
    who_keep = sapply(res, function(x) length(unique(x)) != 1)

    if(!all(who_keep)){
      # we need to handle the behavior with the .var thing
      names_keep = names(res)[who_keep]
      pattern = sma("^({'|'c ? names_keep})")

      res = res[, grepl(pattern, names(res)), drop = FALSE]
    }

  }

  res
}



#' Gets the dimension of `fixest_multi` objects
#' 
#' Otabin the number of unique models of a `fixest_multi` object, depending on the 
#' type requested.
#' 
#' 
#' @param x A `fixest_mutli` object, obtained e.g. from [`feols`].
#' @param lhs Logical scalar, default is `FALSE`. If `TRUE`, the number of different 
#' left hand sides is returned.
#' @param rhs Logical scalar, default is `FALSE`. If `TRUE`, the number of different 
#' right hand sides is returned.
#' @param sample Logical scalar, default is `FALSE`. If `TRUE`, the number of different 
#' samples is returned.
#' @param fixef Logical scalar, default is `FALSE`. If `TRUE`, the number of different 
#' types of fixed-effects is returned.
#' @param iv Logical scalar, default is `FALSE`. If `TRUE`, the number of different 
#' IV stages is returned.
#' 
#' @return 
#' It returns an integer scalar. If no argument is provided, the total number of 
#' models is returned.
#' 
#' @seealso 
#' Multiple estimations in [`feols`], [`models`]
#' 
#' @examples 
#' 
#' base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
#' est = feols(y ~ csw(x1, x2, x3), base, fsplit = ~species)
#' 
#' # there are 3 different RHSs and 4 different samples
#' models(est)
#' 
#' # We can obtain these numbers with n_models
#' n_models(est, rhs = TRUE)
#' n_models(est, sample = TRUE)
#' 
#' 
n_models = function(x, lhs = FALSE, rhs = FALSE, sample = FALSE, 
                    fixef = FALSE, iv = FALSE){
  
  check_arg(x, "class(fixest_multi) mbt")
  check_arg("logical scalar", lhs, rhs, sample, fixef, iv)
  
  request = c(lhs = lhs, rhs = rhs, sample = sample, fixef = fixef, iv = iv)
  if(sum(request) == 0){
    return(length(x))
  }
  
  dimension = names(request)[request]
  
  if(length(dimension) > 1){
    stopi("You can request the number of different models on **only one** dimension. Currently ", 
          "{$enum.bq, are ? dimension} `TRUE`.\n Please only set one to `TRUE`.")
  }  
  
  tree = attr(x, "tree")
  if(!dimension %in% names(tree)){
    return(1)
  }
  
  length(unique(tree[[dimension]]))
}


####
#### METHODS ####
####



#' Summary for fixest_multi objects
#'
#' Summary information for fixest_multi objects. In particular, this is used to specify the 
#' type of standard-errors to be computed.
#'
#' @method summary fixest_multi
#'
#' @inheritParams summary.fixest
#'
#' @inherit print.fixest_multi seealso
#'
#' @param object A `fixest_multi` object, obtained from a `fixest` estimation leading to 
#' multiple results.
#' @param type A character either equal to `"short"`, `"long"`, `"compact"`, `"se_compact"` 
#' or `"se_long"`. If `short`, only the table of coefficients is displayed for each estimation. 
#' If `long`, then the full results are displayed for each estimation. If `compact`, 
#' a `data.frame` is returned with one line per model and the formatted 
#' coefficients + standard-errors in the columns. If `se_compact`, a `data.frame` is 
#' returned with one line per model, one numeric column for each coefficient and one numeric 
#' column for each standard-error. If `"se_long"`, same as `"se_compact"` but the data is in a 
#' long format instead of wide.
#' @param ... Not currently used.
#'
#' @return
#' It returns either an object of class `fixest_multi` (if `type` equals `short` or `long`), 
#' either a `data.frame` (if type equals `compact` or `se_compact`).
#'
#' @examples
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' # Multiple estimation
#' res = feols(y ~ csw(x1, x2, x3), base, split = ~species)
#'
#' # By default, the type is "short"
#' # You can still use the arguments from summary.fixest
#' summary(res, se = "hetero")
#'
#' summary(res, type = "long")
#'
#' summary(res, type = "compact")
#'
#' summary(res, type = "se_compact")
#'
#' summary(res, type = "se_long")
#'
#'
summary.fixest_multi = function(object, type = "short", vcov = NULL, se = NULL, 
                                cluster = NULL, ssc = NULL,
                                .vcov = NULL, stage = 2, lean = FALSE, n = 1000, ...){
  dots = list(...)

  check_set_arg(type, "match(short, long, compact, se_compact, se_long)")

  if(!missing(type) || is.null(attr(object, "print_request"))){
    attr(object, "print_request") = type
  }

  if(is_user_level_call()){
    validate_dots(suggest_args = c("type", "vcov"),
            valid_args = c("agg", "forceCovariance", "keepBounded", "nthreads"))
  }

  est_1 = object[[1]]
  if(is.null(est_1$cov.scaled) || !isTRUE(dots$fromPrint)){

    for(i in 1:length(object)){
      object[[i]] = summary(object[[i]], vcov = vcov, se = se, cluster = cluster, ssc = ssc,
                            .vcov = .vcov, stage = stage, lean = lean, n = n, ...)
    }

    # In IV: multiple estimations can be returned

    if("fixest_multi" %in% class(object[[1]])){
      tree = attr(object, "tree")
      object = setup_multi(object, tree = tree)
    }

  }

  if(type %in% c("compact", "se_compact", "se_long")){
    tree = attr(object, "tree")
    tree_index = attr(object, "tree_index")

    res = data.frame(i = tree$id)

    if(!"lhs" %in% names(tree_index)){
      res$lhs = sapply(object, function(x) as.character(x$fml[[2]]))
    }

    for(my_dim in names(tree_index)){
      res[[my_dim]] = sfill(tree[[my_dim]], right = TRUE)
    }
    res$i = NULL

    if(type == "se_long"){
      res$type = "coef"
    }

    n_start = ncol(res)

    signifCode = c("***"=0.001, "**"=0.01, "*"=0.05, "."=0.1)

    ct_all = list()
    for(i in seq_along(object)){
      ct = object[[i]]$coeftable
      vname = row.names(ct)

      if(type == "compact"){
        stars = cut(ct[, 4], breaks = c(-1, signifCode, 100), labels = c(names(signifCode), ""))
        stars[is.na(stars)] = ""

        value = paste0(format_number(ct[, 1], 3), stars, " (", format_number(ct[, 2], 3), ")")
        names(value) = vname

      } else if(type %in% c("se_compact", "se_long")){
        n = length(vname)
        vname_tmp = character(2 * n)
        qui_coef = seq(1, by = 2, length.out = n)
        qui_se   = seq(2, by = 2, length.out = n)
        vname_tmp[qui_coef] = vname
        vname_tmp[qui_se]   = paste0(vname, "__se")
        vname = vname_tmp

        value = numeric(2 * n)
        value[qui_coef] = ct[, 1]
        value[qui_se]   = ct[, 2]
        names(value) = vname
      }

      ct_all[[i]] = value
    }

    vname_all = unique(unlist(lapply(ct_all, names)))

    tmp = lapply(ct_all, function(x) x[vname_all])
    my_ct = do.call("rbind", tmp)
    colnames(my_ct) = vname_all
    if(type == "compact"){
      my_ct[is.na(my_ct)] = ""
    }

    for(i in seq_along(vname_all)){
      if(type == "compact"){
        res[[vname_all[i]]] = sfill(my_ct[, i], anchor = "(", right = TRUE)
      } else {
        res[[vname_all[i]]] = my_ct[, i]
      }
    }

    if(type == "se_long"){
      # clumsy... but works
      who_se = which(grepl("__se", names(res)))
      se_all = res[, c(1:n_start, who_se)]
      se_all$type = "se"
      names(se_all) = gsub("__se$", "", names(se_all))
      coef_all = res[, -who_se]

      quoi = rbind(coef_all, se_all)
      n = nrow(coef_all)
      res = quoi[rep(1:n, each = 2) + rep(c(0, n), n), ]
      row.names(res) = NULL
    }

    return(res)
  }

  return(object)
}


#' Print method for fixest_multi objects
#'
#' Displays summary information on fixest_multi objects in the R console.
#'
#' @method print fixest_multi
#'
#' @param x A `fixest_multi` object, obtained from a `fixest` estimation leading to 
#' multiple results.
#' @param ... Other arguments to be passed to [`summary.fixest_multi`].
#'
#' @seealso
#' The main fixest estimation functions: [`feols`], [`fepois`][fixest::feglm], 
#' [`fenegbin`][fixest::femlm], [`feglm`], [`feNmlm`]. Tools for mutliple fixest 
#' estimations: [`summary.fixest_multi`], [`print.fixest_multi`], [`as.list.fixest_multi`], 
#' \code{\link[fixest]{sub-sub-.fixest_multi}}, \code{\link[fixest]{sub-.fixest_multi}}.
#'
#' @examples
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' # Multiple estimation
#' res = feols(y ~ csw(x1, x2, x3), base, split = ~species)
#'
#' # Let's print all that
#' res
#'
print.fixest_multi = function(x, ...){

  if(is_user_level_call()){
    validate_dots(valid_args = stvec("/type, vcov, se, cluster, ssc, stage, lean, agg, forceCovariance, keepBounded, n, nthreads"))
  }

  x = summary(x, fromPrint = TRUE, ...)

  # Type = compact
  if(is.data.frame(x)){
    return(x)
  }

  is_short = identical(attr(x, "print_request"), "short")

  tree = attr(x, "tree")
  tree_index = attr(x, "tree_index")

  # Finding out the type of SEs
  if(is_short){

    all_se = unique(unlist(sapply(x, function(x) attr(x$cov.scaled, "type"))))

    if(length(all_se) > 1){
      cat("Standard-errors: mixed (use summary() with arg. 'vcov' to harmonize them) \n")
    } else if(length(all_se) == 1){
      cat("Standard-errors:", all_se, "\n")
    }
  }

  dict_title = c("sample" = "Sample", "lhs" = "Dep. var.", "rhs" = "Expl. vars.",
                 "iv" = "IV", "fixef" = "Fixed-effects", sample.var = "Sample var.")

  qui_drop = apply(tree_index, 2, max) == 1

  if(any(qui_drop) && !all(qui_drop)){
    var2drop = names(tree_index)[qui_drop]
    for(d in var2drop){
      cat(dict_title[d], ": ", tree[[d]][1], "\n", sep = "")
    }

    tree = tree[, !names(tree) %in% var2drop, drop = FALSE]
    tree_index = tree_index[, !qui_drop, drop = FALSE]
  }

  depth = ncol(tree_index)

  headers = list()
  headers[[1]] = function(d, i) cat(dict_title[d], ": ", tree[[d]][i], "\n", sep = "")
  headers[[2]] = function(d, i) cat("\n### ", dict_title[d], ": ", tree[[d]][i], "\n\n", sep = "")
  headers[[3]] = function(d, i) cat("\n\n# ", toupper(dict_title[d]), ": ", tree[[d]][i], "\n\n", sep = "")
  headers[[4]] = function(d, i) cat("\n\n#\n# ", toupper(dict_title[d]), ": ", tree[[d]][i], "\n#\n\n", sep = "")
  headers[[5]] = function(d, i) cat("\n\n#===\n# ", toupper(dict_title[d]), ": ", tree[[d]][i], "\n#===\n\n", sep = "")

  for(i in 1:nrow(tree)){
    for(j in 1:depth){
      d = names(tree_index)[j]
      if(i == 1 || tree_index[i - 1, j] != tree_index[i, j]){
        headers[[depth - j + 1]](d, i)
      }
    }

    if(is_short){
      if(isTRUE(x[[i]]$onlyFixef)){
        cat("No variable (only the fixed-effects).\n")
      } else {
        print_coeftable(coeftable = coeftable(x[[i]]), show_signif = FALSE)
      }
      if(tree_index[i, depth] != max(tree_index[, depth])) cat("---\n")
    } else {
      print(x[[i]])
      if(tree_index[i, depth] != max(tree_index[, depth])) cat("\n")
    }

  }

}


####
#### sub-fixest_multi ####
####



#' Extracts one element from a `fixest_multi` object
#'
#' Extracts single elements from multiple `fixest` estimations.
#'
#' @inherit print.fixest_multi seealso
#' @inheritParams print.fixest_multi
#'
#' @param i An integer scalar. The identifier of the estimation to extract.
#'
#' @return
#' A `fixest` object is returned.
#'
#' @examples
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' # Multiple estimation
#' res = feols(y ~ csw(x1, x2, x3), base, split = ~species)
#'
#' # The first estimation
#' res[[1]]
#'
#' # The second one, etc
#' res[[2]]
#'
"[[.fixest_multi" = function(x, i){

  n = length(x)
  check_set_arg(i, "evalset integer scalar mbt", .data = list(.N = n))
  if(i < 0 || i > length(x)){
    stop("Index 'i' must lie within [1; ", n, "]. Problem: it is equal to ", i, ".")
  }

  `[[.data.frame`(x, i)
}

#' Subsets a fixest_multi object
#'
#' Subsets a fixest_multi object using different keys.
#'
#'
#' @inherit print.fixest_multi seealso
#' @inheritParams print.fixest_multi
#'
#' @param sample An integer vector, a logical scalar, or a character vector. It represents 
#' the `sample` identifiers for which the results should be extracted. Only valid when the 
#' `fixest` estimation was a split sample. You can use `.N` to refer to the last element. 
#' If logical, all elements are selected in both cases, but `FALSE` leads `sample` to become 
#' the rightmost key (just try it out).
#' @param lhs An integer vector, a logical scalar, or a character vector. It represents 
#' the left-hand-sides identifiers for which the results should be extracted. Only valid when 
#' the `fixest` estimation contained multiple left-hand-sides. You can use `.N` to refer to 
#' the last element. If logical, all elements are selected in both cases, but `FALSE` 
#' leads `lhs` to become the rightmost key (just try it out).
#' @param rhs An integer vector or a logical scalar. It represents the right-hand-sides 
#' identifiers for which the results should be extracted. Only valid when the `fixest` 
#' estimation contained multiple right-hand-sides. You can use `.N` to refer to the last 
#' element. If logical, all elements are selected in both cases, but `FALSE` leads `rhs` to 
#' become the rightmost key (just try it out).
#' @param fixef An integer vector or a logical scalar. It represents the fixed-effects 
#' identifiers for which the results should be extracted. Only valid when the `fixest` 
#' estimation contained fixed-effects in a stepwise fashion. You can use `.N` to refer to the 
#' last element. If logical, all elements are selected in both cases, but `FALSE` leads `fixef` 
#' to become the rightmost key (just try it out).
#' @param iv An integer vector or a logical scalar. It represent the stages of the IV. Note 
#' that the length can be greater than 2 when there are multiple endogenous regressors (the 
#' first stage corresponding to multiple estimations). Note that the order of the stages depends 
#' on the `stage` argument from [`summary.fixest`]. If logical, all elements are selected in 
#' both cases, but `FALSE` leads `iv` to become the rightmost key (just try it out).
#' @param i An integer vector. Represents the estimations to extract.
#' @param I An integer vector. Represents the root element to extract.
#' @param reorder Logical, default is `TRUE`. Indicates whether reordering of the results 
#' should be performed depending on the user input.
#' @param drop Logical, default is `FALSE`. If the result contains only one estimation, 
#' then if `drop = TRUE` it will be transformed into a `fixest` object (instead of `fixest_multi`).
#' Its default value can be modified with the function [`setFixest_multi`].
#'
#' @details
#' The order with we we use the keys matter. Every time a key `sample`, `lhs`, `rhs`, 
#' `fixef` or `iv` is used, a reordering is performed to consider the leftmost-side key 
#' to be the new root.
#'
#' Use logical keys to easily reorder. For example, say the object `res` contains a 
#' multiple estimation with multiple left-hand-sides, right-hand-sides and fixed-effects. 
#' By default the results are ordered as follows: `lhs`, `fixef`, `rhs`. 
#' If you use `res[lhs = FALSE]`, then the new order is: `fixef`, `rhs`, `lhs`. 
#' With `res[rhs = TRUE, lhs = FALSE]` it becomes: `rhs`, `fixef`, `lhs`. In both cases 
#' you keep all estimations.
#'
#' @return
#' It returns a `fixest_multi` object. If there is only one estimation left in the object, then 
#' the result is simplified into a `fixest` object only with `drop = TRUE`.
#'
#' @examples
#'
#' # Estimation with multiple samples/LHS/RHS
#' aq = airquality[airquality$Month %in% 5:6, ]
#' est_split = feols(c(Ozone, Solar.R) ~ sw(poly(Wind, 2), poly(Temp, 2)),
#'                   aq, split = ~ Month)
#'
#' # By default: sample is the root
#' etable(est_split)
#'
#' # Let's reorder, by considering lhs the root
#' etable(est_split[lhs = 1:.N])
#'
#' # Selecting only one LHS and RHS
#' etable(est_split[lhs = "Ozone", rhs = 1])
#'
#' # Taking the first root (here sample = 5)
#' etable(est_split[I = 1])
#'
#' # The first and last estimations
#' etable(est_split[i = c(1, .N)])
#'
"[.fixest_multi" = function(x, i, sample, lhs, rhs, fixef, iv, I, reorder = TRUE, drop = FALSE){

  core_args = c("sample", "lhs", "rhs", "fixef", "iv")
  check_arg(reorder, drop, "logical scalar")
  extra_args = c("reorder", "drop")
  
  if(missing(drop)){
    drop = getFixest_multi()$drop
  }

  mc = match.call()
  if(!any(c(core_args, "i", "I") %in% names(mc))){
    return(x)
  }

  use_i = "i" %in% names(mc)
  if(use_i && any(c(core_args, "I") %in% names(mc))){
    stopi("The index 'i' cannot be used with any other index ({enum.q.or ? c(core_args, 'I')}).")
  }

  use_I = "I" %in% names(mc)
  if(use_I && any(core_args %in% names(mc))){
    stopi("The index 'I' cannot be used with any other index ({enum.q.or ? c('i', core_args)}).")
  }

  # We get the meta information
  tree = attr(x, "tree")
  tree_index = attr(x, "tree_index")
  index_names = attr(x, "index_names")
  index_n = lapply(index_names, length)

  # tree_index does not contain extra info like id or .var
  args = c(names(tree_index), extra_args)

  nc = ncol(tree)
  n = nrow(tree)

  if(use_i){
    check_set_arg(i, "evalset integer vector l0 no na", .data = list(.N = n))

    if(length(i) == 0) return(list())

    if(any(abs(i) > n)){
      stop("The index 'i' cannot have values greater than ", n, ". Currently ", i[which.max(abs(i))], " is not valid.")
    }

    obs = (1:n)[i]

    res = reshape_multi(x, obs)

    return(res)
  }

  if(use_I){
    I_max = index_n[[1]]
    check_set_arg(I, "evalset integer vector no na", .data = list(.N = I_max))

    if(any(abs(I) > I_max)){
      stop("The index 'I' refers to root elements (here ", names(index_n)[1], "), and thus cannot be greater than ", I_max, ". Currently ", I[which.max(abs(I))], " is not valid.")
    }

    obs = (1:I_max)[I]

    tree_index$obs = 1:nrow(tree_index)
    new_tree = list()
    for(i in seq_along(obs)){
      new_tree[[i]] = tree_index[tree_index[[1]] == obs[i], ]
    }
    tree_index = do.call(base::rbind, new_tree)

    res = reshape_multi(x, tree_index$obs)

    return(res)
  }

  # Here:
  # We take care of reordering properly

  is_sample = !missing(sample)
  is_lhs    = !missing(lhs)
  is_rhs    = !missing(rhs)
  is_fixef  = !missing(fixef)
  is_iv     = !missing(iv)

  selection = list()

  last = c()

  s_max = index_n[["sample"]]
  if(is_sample){
    check_set_arg(sample, "evalset logical scalar | vector(character, integer) no na", 
                  .data = list(.N = s_max))
    sample = set_index_multi(sample, index_names)

    if(!is.null(sample)){
      # sample can be NULL if it wasn't in the tree
      selection$sample = (1:s_max)[sample]
    }

  } else if("sample" %in% names(index_n)){
    selection$sample = 1:s_max
  }

  lhs_max = index_n[["lhs"]]
  if(is_lhs){
    check_set_arg(lhs, "evalset logical scalar | vector(character, integer) no na", 
                  .data = list(.N = lhs_max))
    lhs = set_index_multi(lhs, index_names)
    
    if(!is.null(lhs)){
      selection$lhs = (1:lhs_max)[lhs]
    }
    
  } else if("lhs" %in% names(index_n)){
    selection$lhs = 1:lhs_max
  }

  rhs_max = index_n[["rhs"]]
  if(is_rhs){
    check_set_arg(rhs, "evalset logical scalar | vector(character, integer) no na", 
                  .data = list(.N = rhs_max))
    rhs = set_index_multi(rhs, index_names)

    if(!is.null(rhs)){
      selection$rhs = (1:rhs_max)[rhs]
    }
    
  } else if("rhs" %in% names(index_n)){
    selection$rhs = 1:rhs_max
  }

  fixef_max = index_n[["fixef"]]
  if(is_fixef){
    check_set_arg(fixef, "evalset logical scalar | vector(character, integer) no na", 
                  .data = list(.N = fixef_max))
    fixef = set_index_multi(fixef, index_names)
    
    if(!is.null(fixef)){
      selection$fixef = (1:fixef_max)[fixef]
    }

  } else if("fixef" %in% names(index_n)){
    selection$fixef = 1:fixef_max
  }

  iv_max = index_n[["iv"]]
  if(is_iv){
    check_set_arg(iv, "evalset logical scalar | vector(character, integer) no na", 
                  .data = list(.N = iv_max))
    iv = set_index_multi(iv, index_names)
    
    if(!is.null(iv)){
      selection$iv = (1:iv_max)[iv]
    }
    
  } else if("iv" %in% names(index_n)){
    selection$iv = 1:iv_max
  }

  # We keep the order of the user!!!!!
  sc = sys.call()
  user_order = setdiff(names(sc)[-(1:2)], extra_args)
  if(reorder == FALSE){
    user_order = names(index_n)
  } else {
    user_order = c(user_order, setdiff(names(index_n), user_order))
    if(length(last) > 0){
      user_order = c(setdiff(user_order, last), last)
    }
  }
  
  # avoid issues when a non-tree element is the only one used in argument
  user_order = intersect(user_order, names(index_n))  

  tree_index$obs = 1:nrow(tree_index)
  for(my_dim in rev(user_order)){
    # 1) we prune the tree
    obs_keep = tree_index[[my_dim]] %in% selection[[my_dim]]

    if(!any(obs_keep)){
      stop("No models ended up selected: revise selection?")
    }

    tree_index = tree_index[obs_keep, , drop = FALSE]

    # 2) we reorder it according to the order of the user
    new_tree = list()
    dim_order = selection[[my_dim]]
    for(i in seq_along(dim_order)){
      new_tree[[i]] = tree_index[tree_index[[my_dim]] == dim_order[i], , drop = FALSE]
    }
    tree_index = do.call(base::rbind, new_tree)
  }

  n_models = nrow(tree_index)
  if(n_models == 1 && drop){
    return(x[[tree_index$obs]])
  }

  # Reshaping a fixest_multi object properly
  res = reshape_multi(x, tree_index$obs, user_order)

  return(res)
}


#' Transforms a fixest_multi object into a list
#'
#' Extracts the results from a `fixest_multi` object and place them into a list.
#'
#' @inheritParams print.fixest_multi
#' @inherit print.fixest_multi seealso
#'
#' @method as.list fixest_multi
#'
#' @param ... Not currently used.
#'
#' @return
#' Returns a list containing all the results of the multiple estimations.
#'
#' @examples
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' # Multiple estimation
#' res = feols(y ~ csw(x1, x2, x3), base, split = ~species)
#'
#' # All the results at once
#' as.list(res)
#'
#'
as.list.fixest_multi = function(x, ...){
  nm = names(x)
  attributes(x) = NULL
  names(x) = nm
  x
}

#' Extracts the coefficients of fixest_multi objects
#'
#' Utility to extract the coefficients of multiple estimations and rearrange them into a matrix.
#'
#' @inheritParams etable
#' @inheritParams coef.fixest
#'
#' @param object A `fixest_multi` object. Obtained from a multiple estimation.
#' @param long Logical, default is `FALSE`. Whether the results should be displayed 
#' in a long format.
#' @param na.rm Logical, default is `TRUE`. Only applies when `long = TRUE`: whether to remove 
#' the coefficients with `NA` values.
#' @param ... Not currently used.
#'
#'
#' @examples
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' # A multiple estimation
#' est = feols(y ~ x1 + csw0(x2, x3), base)
#'
#' # Getting all the coefficients at once,
#' # each row is a model
#' coef(est)
#'
#' # Example of keep/drop/order
#' coef(est, keep = "Int|x1", order = "x1")
#'
#'
#' # To change the order of the model, use fixest_multi
#' # extraction tools:
#' coef(est[rhs = .N:1])
#'
#' # collin + long + na.rm
#' base$x1_bis = base$x1 # => collinear
#' est = feols(y ~ x1_bis + csw0(x1, x2, x3), base, split = ~species)
#'
#' # does not display x1 since it is always collinear
#' coef(est)
#' # now it does
#' coef(est, collin = TRUE)
#'
#' # long
#' coef(est, long = TRUE)
#'
#' # long but balanced (with NAs then)
#' coef(est, long = TRUE, na.rm = FALSE)
#'
#'
coef.fixest_multi = function(object, keep, drop, order, collin = FALSE,
                             long = FALSE, na.rm = TRUE, ...){
  # row: model
  # col: coefficient

  check_arg(keep, drop, order, "NULL character vector no na")
  check_arg(collin, "logical scalar")

  res_list = list()
  for(i in seq_along(object)){
    res_list[[i]] = coef(object[[i]], collin = collin)
  }

  all_names = unique(unlist(lapply(res_list, names)))

  if(!missnull(keep)) all_names = keep_apply(all_names, keep)
  if(!missnull(drop)) all_names = drop_apply(all_names, drop)
  if(!missnull(order)) all_names = order_apply(all_names, order)

  if(length(all_names) == 0) return(NULL)

  nr = length(res_list)
  nc = length(all_names)

  res_list = lapply(res_list, function(x) x[all_names])
  res = do.call(rbind, res_list)
  colnames(res) = all_names

  # model information
  mod = models(object)

  if(long){
    res_long = c(t(res), recursive = TRUE)
    tmp = data.frame(coefficient =  res_long)
    mod_long = rep_df(mod, each = ncol(res))
    res = cbind(mod_long, coefficient = rep(all_names, NROW(res)),
          estimate = res_long)

    if(na.rm && anyNA(res$estimate)){
      res = res[!is.na(res$estimate), , drop = FALSE]
    }

  } else {
    res = cbind(mod, res)
  }

  res
}

#' @rdname coef.fixest_multi
coefficients.fixest_multi <- coef.fixest_multi

#' Extracts the coefficients tables from `fixest_multi` estimations
#'
#' Series of methods to extract the coefficients table or its sub-components from a 
#' `fixest_multi` objects (i.e. the outcome of multiple estimations).
#'
#' @inheritParams etable
#'
#' @param object A `fixest_multi` object, coming from a `fixest` multiple estimation.
#' @param wide A logical scalar, default is `FALSE`. If `TRUE`, then a list is returned: 
#' the elements of the list are coef/se/tstat/pvalue. Each element of the list is a wide 
#' table with a column per coefficient.
#' @param long Logical scalar, default is `FALSE`. If `TRUE`, then all the information 
#' is stacked, with two columns containing the information: `"param"` and `"value"`. 
#' The column `param` contains the values `coef`/`se`/`tstat`/`pvalue`.
#' @param ... Other arguments to be passed to [`summary.fixest`].
#'
#' @return
#' It returns a `data.frame` containing the coefficients tables (or just the se/pvalue/tstat) 
#' along with the information on which model was estimated.
#'
#' If `wide = TRUE`, then a list is returned. The elements of the list are 
#' coef/se/tstat/pvalue. Each element of the list is a wide table with a column per coefficient.
#'
#' If `long = TRUE`, then all the information is stacked. This removes the 4 columns 
#' containing the coefficient estimates to the p-values, and replace them with two 
#' new columns: `"param"` and `"value"`. The column `param` contains the 
#' values `coef`/`se`/`tstat`/`pvalue`, and the column `values` the 
#' associated numerical information.
#'
#' @examples
#'
#' base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
#' est_multi = feols(y ~ csw(x.[,1:3]), base, split = ~species)
#'
#' # we get all the coefficient tables at once
#' coeftable(est_multi)
#'
#' # Now just the standard-errors
#' se(est_multi)
#'
#' # wide = TRUE => leads toa  list of wide tables
#' coeftable(est_multi, wide = TRUE)
#'
#' # long = TRUE, all the information is stacked
#' coeftable(est_multi, long = TRUE)
#'
#'
#'
coeftable.fixest_multi = function(object, vcov = NULL, keep = NULL, drop = NULL, 
                                  order = NULL, long = FALSE, wide = FALSE, ...){

  check_arg(keep, drop, order, "NULL character vector no na")
  check_arg(wide, "logical scalar | charin(se, pvalue, tstat)")
  check_arg(long, "logical scalar")

  if(long && !isFALSE(wide)){
    stop("You cannot have 'wide = TRUE' with 'long = TRUE', please choose.")
  }

  mod = models(object)

  res_list = list()
  for(i in seq_along(object)){
    ct = coeftable(object[[i]], vcov = vcov, keep = keep, drop = drop, order = order, ...)
    if(is.null(ct)){
      next
    }

    ct = cbind(coefficient = row.names(ct), as.data.frame(ct))

    mod_current = rep_df(mod[i, , drop = FALSE], each = nrow(ct))
    res_list[[length(res_list) + 1]] = cbind(mod_current, ct)
  }

  if(length(res_list) == 0){
    stop("Not any variable was selected: revise you 'keep'/'drop' arguments?")
  }

  res = do.call(rbind, res_list)
  row.names(res) = NULL

  if(!isFALSE(wide)){
    # we return a list of wide tables
    res_list = list()

    roots = c("coef", "se", "tstat", "pvalue")
    if(isTRUE(wide)) wide = roots

    i_coef = which(names(res) == "coefficient")

    for(i_select in which(roots %in% wide)){

      all_coef = unique(res$coefficient)
      all_id = unique(res$id)

      key = paste0(res$id, "; ", res$coefficient)
      value = res[, i_coef + i_select]
      names(value) = key

      df_xpd = expand.grid(coef = all_coef, id = all_id)
      new_key = paste0(df_xpd$id, "; ", df_xpd$coef)


      res_wide = matrix(value[new_key], nrow = length(all_id), ncol = length(all_coef),
                dimnames = list(NULL, all_coef), byrow = TRUE)

      item = cbind(mod[all_id, , drop = FALSE], as.data.frame(res_wide))
      if(length(wide) == 1){
        # internal call
        return(item)
      }

      res_list[[roots[i_select]]] = item
    }

    res = res_list
  }

  if(long){
    i_coef = which(names(res) == "coefficient")
    values = res[, i_coef + 1:4]

    values_all = c(t(values), recursive = TRUE)

    params = data.frame(param = rep(c("coef", "se", "tstat", "pvalue"), nrow(res)))

    info = rep_df(res[, 1:i_coef, drop = FALSE], each = 4)

    res = cbind(info, params, value = values_all)

  }


  res
}


#' @describeIn coeftable.fixest_multi Extracts the standard-errors from `fixest_multi` estimations
se.fixest_multi = function(object, vcov = NULL, keep = NULL, drop = NULL, 
                           order = NULL, long = FALSE, ...){
  # Default is wide format => same as with coef

  check_arg(keep, drop, order, "NULL character vector no na")
  check_arg(long, "logical scalar")

  mc = match.call()
  if("wide" %in% names(mc)){
    stop("The argument 'wide' is not a valid argument.")
  }

  wide = if(long) FALSE else "se"

  res = coeftable(object, vcov = vcov, keep = keep, drop = drop, 
                  order = order, wide = wide, ...)

  if(long){
    i_coef = which(names(res) == "coefficient")
    res = res[, c(1:i_coef, i_coef + 2)]
  }

  res
}

#' @describeIn coeftable.fixest_multi Extracts the t-stats from `fixest_multi` estimations
tstat.fixest_multi = function(object, vcov = NULL, keep = NULL, drop = NULL, 
                              order = NULL, long = FALSE, ...){
  # Default is wide format => same as with coef

  check_arg(keep, drop, order, "NULL character vector no na")
  check_arg(long, "logical scalar")

  mc = match.call()
  if("wide" %in% names(mc)){
    stop("The argument 'wide' is not a valid argument.")
  }

  wide = if(long) FALSE else "tstat"

  res = coeftable(object, vcov = vcov, keep = keep, drop = drop, 
                  order = order, wide = wide, ...)

  if(long){
    i_coef = which(names(res) == "coefficient")
    res = res[, c(1:i_coef, i_coef + 3)]
  }

  res
}

#' @describeIn coeftable.fixest_multi Extracts the p-values from `fixest_multi` estimations
pvalue.fixest_multi = function(object, vcov = NULL, keep = NULL, drop = NULL, 
                               order = NULL, long = FALSE, ...){
  # Default is wide format => same as with coef

  check_arg(keep, drop, order, "NULL character vector no na")
  check_arg(long, "logical scalar")

  mc = match.call()
  if("wide" %in% names(mc)){
    stop("The argument 'wide' is not a valid argument.")
  }

  wide = if(long) FALSE else "pvalue"

  res = coeftable(object, vcov = vcov, keep = keep, drop = drop, 
                  order = order, wide = wide, ...)

  if(long){
    i_coef = which(names(res) == "coefficient")
    res = res[, c(1:i_coef, i_coef + 4)]
  }

  res
}


#' Extracts the residuals from a `fixest_multi` object
#'
#' Utility to extract the residuals from multiple `fixest` estimations. If possible, 
#' all the residuals are coerced into a matrix.
#'
#' @inheritParams resid.fixest
#'
#' @param object A `fixes_multi` object.
#' @param na.rm Logical, default is `FALSE`. Should the NAs be kept? If `TRUE`, they are removed.
#'
#' @return
#' If all the models return residuals of the same length, a matrix is returned. Otherwise, 
#' a `list` is returned.
#'
#' @examples
#'
#' base = iris
#' names(base) = c("y", "x1", "x2", "x3", "species")
#'
#' # A multiple estimation
#' est = feols(y ~ x1 + csw0(x2, x3), base)
#'
#' # We can get all the residuals at once,
#' # each column is a model
#' head(resid(est))
#'
#' # We can select/order the model using fixest_multi extraction
#' head(resid(est[rhs = .N:1]))
#'
resid.fixest_multi = function(object, type = c("response", "deviance", "pearson", "working"), 
                              na.rm = FALSE, ...){

  # Je fais un prototype pour le moment, je l'ameliorerai apres (07-04-2021)

  check_set_arg(type, "match")
  check_set_arg(na.rm, "logical scalar")

  res_list = list()
  for(i in seq_along(object)){
    res_list[[i]] = resid(object[[i]], type = type, na.rm = na.rm)
  }

  n_all = sapply(res_list, length)

  if(all(n_all == n_all[1])){
    res = do.call(cbind, res_list)
  } else {
    res = res_list
  }

  res
}


#' @rdname resid.fixest_multi
residuals.fixest_multi = resid.fixest_multi




#' Confidence intervals for `fixest_multi` objects
#'
#' Computes the confidence intervals of parameter estimates for `fixest`'s multiple 
#' estimation objects (aka `fixest_multi`).
#'
#' @inheritParams confint.fixest
#'
#' @param object A `fixest_multi` object obtained from a multiple estimation in `fixest`.
#'
#' @return
#' It returns a data frame whose first columns indicate which model has been estimated. 
#' The last three columns indicate the coefficient name, and the lower and upper 
#' confidence intervals.
#'
#' @examples
#'
#' base = setNames(iris, c("y", "x1", "x2", "x3", "species"))
#' est = feols(y ~ csw(x.[,1:3]) | sw0(species), base, vcov = "iid")
#'
#' confint(est)
#'
#' # focusing only on the coefficient 'x3'
#' confint(est, "x3")
#'
#' # the 'id' provides the index of the estimation
#' est[c(3, 6)]
#'
confint.fixest_multi = function(object, parm, level = 0.95, vcov = NULL, se = NULL,
                                cluster = NULL, ssc = NULL, ...){

  n = length(object)
  confint_all = vector("list", n)
  for(i in 1:n){
    confint_all[[i]] = confint(object[[i]], parm = parm, level = level,
                   vcov = vcov, se = se, cluster = cluster,
                   ssc = ssc, coef.col = TRUE, internal = TRUE, ...)
  }

  n_all = sapply(confint_all, NROW)

  if(max(n_all) == 0){
    stop("No coefficient could be selected. Revise the argument `parm`?")
  }

  mod = models(object)

  # Formatting
  mod_all = rep_df(mod, times = n_all)
  res = do.call(base::rbind, confint_all)
  res = cbind(mod_all, res)

  attr(res, "type") = attr(confint_all[n_all > 0][[1]], "type")

  res
}

Try the fixest package in your browser

Any scripts or data that you put into this service are public.

fixest documentation built on June 22, 2024, 9:12 a.m.