R/estimate.R

Defines functions estimate

Documented in estimate

#' Compute the paths coefficients for analayses of moderated mediations.
#' @param sample  Tibble. Table containing the data.
#' @param md_imputation Character. Kind of imputation ("multiple","single","deletion")
#' @param md_method Character. Method for the imputation of missing data ("pmm", "mean", "sample", "deletion").
#' @param size Integer. Size of the sample for each draw. If NA, use the sample size.
#' @param formula Formula or list of formulas for a system of equations.
#' @param method  Character. Method for the regression ("glm", "felm", or "sysfit").
#' @param family  Character. Family for glm (e.g. "gaussian", "binomial") .
#' @param draws Integer. Number of draws for the bootstrap.
#' @param cores Integer. Number of cores used for parallel threads.
#' @return A tibble gathering the regression coefficients for each draw.
#' @references Arne Henningsen and Jeff D. Hamann (2007). systemfit: A Package for Estimating Systems of Simultaneous Equations in R. Journal of Statistical Software 23(4), 1-40. http://www.jstatsoft.org/v23/i04/.
#' @importFrom systemfit systemfit
#' @importFrom lfe felm
#' @importFrom stats na.omit
#' @importFrom stats formula
#' @importFrom tibble tibble
#' @importFrom tibble is_tibble
#' @importFrom tibble as_tibble
#' @importFrom tidyr unnest
#' @importFrom tidyr spread
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom dplyr sample_n
#' @importFrom dplyr %>%
#' @importFrom broom tidy
#' @importFrom parallel parLapply
#' @importFrom parallel makeCluster
#' @importFrom parallel stopCluster
#' @importFrom purrr map
#' @importFrom purrr is_formula
#' @importFrom mice complete
#' @importFrom mice mice
#' @importFrom stats glm
#' @export


estimate <- function(sample = NA,
                     md_imputation = "multiple",
                     md_method = "pmm",
                     size = NA,
                     formula = NA,
                     method = "felm",
                     family = NA,
                     draws = 1000,
                     cores = 2){
  
  # Chech that the input is correct
  stopifnot(
    tibble::is_tibble(sample),
    purrr::is_formula(formula) | is.list(formula),
    method %in% c("glm","felm","sysfit"),
    method == "sysfit" & is.list(formula) |
      method != "sysfit" & purrr::is_formula(formula),
    md_imputation %in% c("multiple","single","deletion"),
    md_method %in% c("mean","pmm","sample","deletion"),
    md_imputation == "deletion" & md_method == "deletion" |
      md_imputation != "deletion" & md_method != "deletion"
  )
  
  
  run_model <- function(sample = NA,
                        md_imputation = "multiple", md_method = "pmm", size = NA,
                        formula = NA, method = "felm", family = NA){
    sample <- modestim::prepare_sample(sample, md_imputation = md_imputation, md_method = md_method, size = size)
    model <- modestim::fit_regression(sample, formula = formula, method = method, family = family)
    rm(sample)
    gc()
    return(model)
  }
  
  
  base <- tibble(draw = 1:draws, model = rep(list(sample), draws))
  
  clust <- parallel::makeCluster(cores)
  base$model <- parallel::parLapply(
    cl = clust,
    X = base$model,
    fun = run_model,
    md_imputation = md_imputation,
    md_method = md_method,
    size = size,
    formula = formula,
    method = method,
    family = family
  )
  parallel::stopCluster(clust)
  rm(clust)
  
  base %>%
    tidyr::unnest() %>%
    return()
}
NicolasJBM/modestim documentation built on Aug. 26, 2019, 5:29 a.m.