R/diffs.R
In panelr: Regression Models and Utilities for Repeated Measures and Panel Data

Documented in fdm glance.fdm make_diff_data tidy.fdm

#' @title Estimate first differences models using GLS
#' @description The function fits first difference models using GLS estimation.
#' @inheritParams asym
#' @references 
#' Allison, P. D. (2019). Asymmetric fixed-effects models for panel data.
#' *Socius*, *5*, 1-12. https://doi.org/10.1177/2378023119826441
#' @examples 
#' 
#' if (requireNamespace("clubSandwich")) {
#'   data("teen_poverty")
#'   # Convert to long format
#'   teen <- long_panel(teen_poverty, begin = 1, end = 5)
#'   model <- fdm(hours ~ lag(pov) + spouse, data = teen)
#'   summary(model)
#' }
#' 
#' @rdname fdm
#' @export 
fdm <- function(formula, data, id = NULL, wave = NULL, use.wave = FALSE,
                min.waves = 1, 
                variance = c("toeplitz-1", "constrained", "unconstrained"), 
                error.type = c("CR2", "CR1S"), ...) {
  
  if (!requireNamespace("nlme")) need_package("nlme")
  if (!requireNamespace("clubSandwich")) need_package("clubSandwich")
  
  the_call <- match.call()
  the_env <- parent.frame()
  
  formula <- Formula::Formula(formula)
  
  # Send to helper function for data prep
  prepped <- diff_data(formula = formula, data = data, id = id, 
                       wave = wave,  min.waves = min.waves, 
                       weights = NULL, use.wave = use.wave, escape = TRUE)
  
  e <- prepped$e
  pf <- prepped$pf
  data <- e$data
  wave <- prepped$wave
  id <- prepped$id
  dv <- prepped$dv

  # Use helper function to generate formula to pass to lme4
  fin_formula <- as.formula(e$fin_formula)
  if (!is.null(pf$constants)) {
    constants <- paste(pf$constants, collapse = " - ")
    up_form <- as.formula(paste(". ~ . -", constants))
    fin_formula <- update(fin_formula, up_form)
  }
  
  if (variance[1] == "unconstrained") {
    cor_form <- as.formula(paste("~ 1 |", id))
    var_form <- as.formula(paste("~ 1 |", wave))
    corr <- nlme::corSymm(form = cor_form)       # unstructured covariance
    weights <- nlme::varIdent(form = var_form)
  } else if (variance[1] == "constrained") {
    cor_form <- as.formula(paste(" ~ 1 |", id))
    corr <- nlme::corARMA(value = -.9999, form = cor_form, p = 0, q = 1,
                          fixed = TRUE)
    weights <- NULL
  } else if (variance[1] == "toeplitz-1") {
    cor_form <- as.formula(paste(" ~ 1 |", id))
    corr <- nlme::corARMA(form = cor_form, p = 0, q = 1)
    weights <- NULL
  }
  
  gls_mod <- nlme::gls(fin_formula, data = as.data.frame(data),
                       na.action = na.omit, 
                       correlation = corr, weights = weights)
  
  gls_mod$call$model <- substitute(fin_formula)
  # gls_mod$call <- gls_mod$call[names(gls_mod$call) %not% "data"]
  the_vcov <- vcov_CR(gls_mod, cluster = data[[id]], type = error.type[1],
                      data = data)
  coef_table <- clubSandwich::coef_test(gls_mod, vcov = the_vcov,
                                        test = "naive-t", cluster = data[[id]])
  coef_table <- as.data.frame(coef_table[c("beta","SE","tstat","p_t")])
  if ("tstat" %nin% names(coef_table)) { # old version of clubSandwich
    names(coef_table) <- c("estimate", "std.error", "p.value")
    coef_table["statistic"] <- coef_table$estimate / coef_table$std.error
  } else {
    names(coef_table) <- c("estimate", "std.error", "statistic", "p.value")
    coef_table["term"] <- rownames(coef_table)
  }
  
  mod_info <- list(dv = dv, min.wave = prepped$minwave, 
                   max.wave = prepped$maxwave, 
                   num_distinct = prepped$num_distinct,
                   AIC = AIC(gls_mod), BIC = BIC(gls_mod), 
                   variance = variance[1], errors = error.type[1])
  gls_mod$mod_info <- mod_info
  gls_mod$coef_table <- coef_table
  colnames(the_vcov) <- rownames(coef_table)
  rownames(the_vcov) <- rownames(coef_table)
  gls_mod$vcov <- the_vcov
  # Make it a fdm object
  class(gls_mod) <- c("fdm", "gls")
  gls_mod
}

#' @export
summary.fdm <- function(object, ...) {
  
  dots <- list(...)
  if ("digits" %in% names(dots)) {
    digits <- dots$digits
  } else {
    digits <- getOption("jtools-digits", 2)
  }
  
  x <- object
  
  mod_fit <- paste0(bold("MODEL FIT:\n"),
                    italic("AIC = "), round(x$mod_info$AIC, digits),
                    italic(", BIC = "), round(x$mod_info$BIC, digits),  "\n")
  
  if (x$mod_info$variance == "toeplitz-1") {
    corstruct <- object$modelStruct$corStruct
    class(corstruct) <- "corARMA"
    theta <- coef(corstruct, unconstrained = FALSE)
    variance <- paste0(italic("Variance structure: "), x$mod_info$variance,
                       " (theta = ", round(theta, digits), ")")
  } else {
    variance <- paste0(italic("Variance structure: "), x$mod_info$variance)
  }

  mod_info <- paste0(bold("MODEL INFO:\n"),
                     italic("Entities: "), x$mod_info$num_distinct, "\n",
                     italic("Time periods: "), paste0(x$mod_info$min.wave, "-",
                                                      x$mod_info$max.wave), "\n",
                     italic("Dependent variable: "), x$mod_info$dv, "\n",
                     variance
  )
  
  mod_info_list <- list(N = x$mod_info$num_distinct, 
                        min_wave = x$mod_info$min.wave, 
                        max_wave = x$mod_info$max.wave, 
                        variance = x$mod_info$variance, AIC = x$mod_info$AIC,
                        BIC = x$mod_info$BIC)
  
  coef_table <- x$coef_table
  names(coef_table) <- sapply(names(coef_table), function(x) {switch(x,
    "estimate" = "Est.",
    "std.error" = "S.E.",
    "p.value" = "p",
    "statistic" = "t val.",
    x
  )})
  rownames(coef_table) <- coef_table$term
  coef_table <- coef_table[c("Est.", "S.E.", "t val.", "p")]
      
  out <- list(mod_info = mod_info, coef_table = coef_table, digits = digits,
              mod_fit = mod_fit, errors = x$mod_info$errors,
              mod_info_list = mod_info_list)
  class(out) <- "summary.fdm"
  out
}

#' @export 
print.summary.fdm <- function(x, ...) {
  cat(x$mod_info, "\n\n")
  cat(x$mod_fit, "\n")
  cat(italic("Standard errors:"), x$errors, "\n")
  print(md_table(x$coef_table, digits = x$digits, sig.digits = FALSE,
                 format = getOption("panelr.table.format", "multiline")))
}

#' Generate differenced and asymmetric effects data
#' 
#' This is an interface to the internal functions that process data for
#' [fdm()], [asym()], and [asym_gee()].
#' 
#' @inheritParams asym
#' @inheritParams wbgee
#' @param asym Return asymmetric effects transformed data? Default is FALSE.
#' @param cumulative Return cumulative positive/negative differences, most
#'  useful for fixed effects estimation and/or generalized linear models? 
#'  Default is FALSE.
#' @param escape.names Return only syntactically valid variable names? 
#'  Default is FALSE.
#'  
#' @examples 
#' 
#' data("WageData")
#' wages <- panel_data(WageData, id = id, wave = t)
#' make_diff_data(wks ~ lwage + union, data = wages)
#'  
#' @export
make_diff_data <- function(formula, data, id = NULL, wave = NULL,
                           use.wave = FALSE, min.waves = 1, weights = NULL,
                           offset = NULL, asym = FALSE, cumulative = FALSE, 
                           escape.names = FALSE, ...) {
  the_call <- match.call()
  the_env <- parent.frame()
  
  formula <- Formula::Formula(formula)
  
  # Send to helper function for data prep
  prepped <- diff_data(formula = formula, data = data, id = id, 
                       wave = wave,  min.waves = min.waves, 
                       weights = !! enquo(weights), offset = !! enquo(offset), 
                       use.wave = use.wave, ignore.lhs = cumulative, 
                       asym = asym, escape = escape.names)
  
  prepped$e$data
}

#' @importFrom stats contr.treatment "contrasts<-"
diff_data <- function(formula, data, id = NULL, wave = NULL, min.waves = 2, 
                      weights = NULL, offset = NULL, use.wave = FALSE, 
                      asym = FALSE, ignore.lhs = FALSE, escape = TRUE, ...) {
    
  # Get data prepped
  if ("panel_data" %in% class(data)) {
    id <- attr(data, "id")
    wave <- attr(data, "wave")
  }
  
  if ("data.frame" %nin% class(data)) {
    stop("data argument must be a data frame.")
  }
  
  # Coerce to panel_data
  data <- panel_data(data, id = !! sym(id), wave = !! sym(wave))
  # Save this input data separately
  orig_data <- data
  
  # Get the weights argument like lm() does (can be name or object)
  weights <- eval_tidy(enquo(weights), data)
  # Append to data with special name
  if (!is.null(weights)) {data[".weights"] <- weights}
  # Get the offset argument like lm() does (can be name or object)
  offset <- eval_tidy(enquo(offset), data)
  # Append to data with special name
  if (!is.null(offset)) {data[".offset"] <- offset}
  
  # Get the left-hand side
  dv <- as.character((attr(formula, "lhs")))
  # Pass to helper function
  pf <- wb_formula_parser(formula, dv, data)
  
  # if (!is.null(pf$constants)) {
  #   warn_wrap("Constants are ignored with first difference models")
  # }
  
  # check for wave in formula
  wave_terms <- if (!is.numeric(data[[wave]])) expand_labels(data, wave) else {
    wave
  }
  if (any(wave_terms %in% pf$varying)) {
    pf$varying <- pf$varying %not% wave_terms
    use.wave <- TRUE
  }
  
  # Create formula to pass to model_frame
  mf_form <- paste(" ~ ",
                   paste(pf$allvars, collapse = " + "),
                   collapse = ""
  )
  # Escape non-syntactic variables that are in the data
  mf_form <- formula_ticks(mf_form, names(pf$data))
  
  # Add weights to keep it in the DF
  if (!is.null(weights)) {
    mf_form <- paste(mf_form, "+", ".weights")
  }
  
  # Pass to special model_frame function that respects tibble groupings
  data <- model_frame(update(as.formula(mf_form),
                             as.formula(paste("~ . -", id, "-", wave))), 
                      data = pf$data)
  
  pf$varying <- un_bt(pf$varying)
  
  # Send to helper that will demean, etc.
  e <- diff_model(pf, dv, data, use.wave, asym, ignore.lhs, escape)
  
  # Drop missing cases, collect some metadata 
  e$data <- complete_cases(e$data, min.waves = min.waves)
  num_distinct <- length(unique(e$data[[id]]))
  maxwave <- max(e$data[[wave]])
  minwave <- min(e$data[[wave]])
  if (use.wave == TRUE & !is.numeric(e$data[[wave]])) {
    e$data[[wave]] <- droplevels(e$data[[wave]])
    contrasts(e$data[[wave]]) <- contr.treatment(levels(e$data[[wave]]))
  }
  
  # Modify weights object to reflect missing data drops
  if (!is.null(weights)) {
    weights <- e$data$.weights
  }
  
  # Modify weights object to reflect missing data drops
  if (!is.null(offset)) {
    offset <- e$data$.offset
  }
  
  list(e = e, num_distinct = num_distinct, maxwave = maxwave,
       minwave = minwave, weights = weights,
       pf = pf, id = id, wave = wave, dv = dv, orig_data = orig_data,
       mf_form = mf_form)
    
}

diff_model <- function(pf, dv, data, use.wave = FALSE, asym = FALSE,
                       ignore.lhs = FALSE, escape = TRUE) {
  
  # Extract wave and id
  wave <- get_wave(data)
  id <- get_id(data)
  
  vars <- pf$varying
  if (ignore.lhs == FALSE) vars <- c(dv, vars)
  for (var in vars %not% wave) {
    lag_var <- paste0(bt(var), " - lag(`", var, "`)")
    data <- mutate(data, 
                   !! var := !! parse_expr(lag_var)
            )
  }
  
  asym_list <- list()
  if (asym == TRUE) {
    for (var in c(pf$varying) %not% wave) {
      plus_var <- if (escape) paste0("plus__", var) else paste0("+", var)
      minus_var <- if (escape) paste0("minus__", var) else paste0("-", var)
      plus_expr <- paste0(bt(var), "* (", bt(var), "> 0)")
      neg_expr <- paste0("-", bt(var), "* (", bt(var), "< 0)")
      data <- mutate(data,
                     !! plus_var := !! parse_expr(plus_expr),
                     !! minus_var := !! parse_expr(neg_expr)
              )
      asym_list[[var]] <- c(plus_var, minus_var)
    } 
  }
  
  # Put the pieces together
  if (asym == FALSE) {
    the_terms <- make_names(un_bt(c(pf$varying, pf$wint_labs, pf$cint_labs)), 
                            int = TRUE)
    the_terms <- str_replace_all(the_terms, "_by_", "\\*")
    names(data) <- make_names(names(data))
    # Deal with interactions
    if (!is.null(c(pf$wint_labs, pf$cint_labs))) {
      ints <- asym_ints(data = data, pf = pf, asym_list = list())
      data <- ints$data
      names(data) <- make_names(un_bt(names(data)), int = TRUE)
      the_terms <- c(the_terms, ints$new_terms)
    }
  } else {
    the_terms <- unlist(asym_list)
    # Drop non-varying ones for monotonically increasing/decreasing vars
    varyings <- are_varying(data)
    # Want to let user know what's going on with those
    which_not_varying <- the_terms %just% un_bt(names(varyings %just% FALSE))
    if (length(which_not_varying) > 0) {
      for (var in which_not_varying) {
        plus <- if (escape) "plus__" else "^\\+"
        minus <- if (escape) "minus__" else "^\\-"
        if (stringr::str_detect(var, paste0("^", plus))) {
          bare_var <- stringr::str_extract(var, paste0("(?<=(", plus, ")).*"))
          msg_wrap(bare_var, " does not increase over time so +", bare_var, 
                   " is not included in the model.")
        } else if (stringr::str_detect(var, paste0("^", minus))) {
          bare_var <- stringr::str_extract(var, paste0("(?<=(", minus, ")).*"))
          msg_wrap(bare_var, " does not decrease over time so -", bare_var, 
                   " is not included in the model.")
        }
      }
    }
    # Drop those terms now
    the_terms <- the_terms %not% un_bt(names(varyings %just% FALSE))
    # Edit the list of asymmetric effects
    asym_list <- lapply(asym_list, function(x) {
      x %not% un_bt(names(varyings %just% FALSE))
    })
    lengths <- sapply(asym_list, length) == 2
    asym_list <- asym_list[lengths]
    if (escape) {
      asym_list <- lapply(asym_list, make_names)
      names(asym_list) <- make_names(names(asym_list))
      the_terms <- make_names(the_terms)
      names(data) <- make_names(names(data))
    } else {
      the_terms <- bt(the_terms)
    }
    # Deal with interactions
    if (!is.null(c(pf$wint_labs, pf$cint_labs))) {
      ints <- asym_ints(data = data, pf = pf, asym_list = asym_list,
                        escape = escape)
      data <- ints$data
      if (escape) {
        names(data) <- make_names(names(data))
      }
      the_terms <- c(the_terms, ints$new_terms)
    }
  }
  
  # Now do cumulative sums if appropriate
  if (asym == TRUE & ignore.lhs == TRUE) {
    for (var in c(pf$varying) %not% wave) {
      plus_var <- if (escape) paste0("plus__", var) else paste0("+", var)
      minus_var <- if (escape) paste0("minus__", var) else paste0("-", var)
      # First need to set the wave 1 values to 0 instead of NA
      set_zero_plus <- paste0("case_when(", wave, " == ", get_periods(data)[1],
                              " ~ 0, TRUE ~", bt(plus_var), ")")
      set_zero_neg <- paste0("case_when(", wave, " == ", get_periods(data)[1],
                              " ~ 0, TRUE ~", bt(minus_var), ")")
      plus_expr <- paste0("cumsum(", bt(plus_var), ")")
      neg_expr <- paste0("cumsum(", bt(minus_var), ")")
      data <- mutate(data,
                     !! plus_var := !! parse_expr(set_zero_plus),
                     !! minus_var := !! parse_expr(set_zero_neg),
                     !! plus_var := !! parse_expr(plus_expr),
                     !! minus_var := !! parse_expr(neg_expr)
      )
    }
  }
  
  fin_formula <- paste(bt(dv), "~", paste(the_terms, collapse = " + "))
  if (use.wave == TRUE) fin_formula <- paste(fin_formula, "+", wave)
  
  out <- list(data = data, fin_formula = fin_formula, asym_list = asym_list)
  return(out)
  
}

asym_ints <- function(data, pf, asym_list, escape = TRUE) {
  all_ints <- c(pf$wint_labs, pf$cint_labs)
  all_terms <- NULL
  for (int in all_ints) {
    name_make <- if (escape) make_names else function(x) return(x)
    splits <- as.list(name_make(un_bt(str_split(int, "\\*")[[1]])))
    names(splits) <- splits
    for (spl in names(splits)) {
      if (un_bt(spl) %in% names(asym_list)) {
        splits[[spl]] <- asym_list[[un_bt(spl)]]
      }
    }
    term_grid <- expand.grid(splits)
    term_grid <- as.data.frame(lapply(term_grid, bt))
    new_terms <- rep(NA, nrow(term_grid))
    for (r in 1:nrow(term_grid)) {
      new_terms[r] <- paste0(unlist(term_grid[r, , drop = TRUE]), collapse = "*")
      # data <- mutate(data,
      #   !! un_bt(new_terms[r]) := !! parse_expr(new_terms[r])
      # )
    }
    all_terms <- c(all_terms, new_terms)
  }
  list(data = data, new_terms = all_terms)
}

#' @export
coef.fdm <- function(object, ...) {
  out <- object$coef_table$estimate
  names(out) <- rownames(object$coef_table)
  out
}

#' @export
vcov.fdm <- function(object, ...) {
  object$vcov
}

#' @export
confint.fdm <- function(object, parm, level = .95, ...) {
  confint.wbgee(object, parm = parm, level = level, ...)
}

#' @title Tidy methods for `fdm` and `asym` models
#' @description `panelr` provides methods to access `fdm` and `asym` data in a
#'  tidy format
#' @rdname fdm_tidiers
#' @param x An `fdm` or `asym` object.
#' @param conf.int Logical indicating whether or not to include a confidence
#'  interval in the tidied output. Defaults to `FALSE`.
#' @param conf.level The confidence level to use for the confidence interval if
#'  `conf.int = TRUE`. Must be strictly greater than 0 and less than 1. Defaults
#'  to 0.95, which corresponds to a 95 percent confidence interval.
#' @param ... Ignored
#' @examples 
#' 
#' if (requireNamespace("clubSandwich")) {
#'   data("WageData")
#'   wages <- panel_data(WageData, id = id, wave = t)
#'   model <- fdm(lwage ~ wks + union, data = wages)
#'   if (requireNamespace("generics")) {
#'     generics::tidy(model)
#'   }
#' }
#' 
#' @rawNamespace 
#' if (getRversion() >= "3.6.0") {
#'   S3method(generics::tidy, fdm)
#' } else {
#'   export(tidy.fdm)
#' }

tidy.fdm <- function(x, conf.int = FALSE, conf.level = .95, ...) {
  
  if (!requireNamespace("generics")) {
    stop_wrap("You must have the generics package to use tidy methods.")
  }
  
  params <- x$coef_table
  # params$term <- rownames(params)
  
  # Getting confidence intervals if requested
  if (conf.int == TRUE) {
    ints <- as.data.frame(confint(x, level = conf.level))
    # Renaming the columns to fit the tidy model
    names(ints) <- c("conf.low", "conf.high")
    # Renaming the terms to remove the backticks to match the params d.f.
    ints$term <- stringr::str_remove_all(rownames(ints), "`")
    # Put things together
    params <- dplyr::left_join(params, ints, by = "term")
  }
  return(tibble::as_tibble( # Return a tibble
    # Only return the relevant columns
    params %just% c("term", "estimate", "statistic", "std.error", 
                    "conf.low", "conf.high", "p.value", "group")
  ))
}

#' @rdname fdm_tidiers
#' @rawNamespace 
#' if (getRversion() >= "3.6.0") {
#'   S3method(generics::glance, fdm)
#' } else {
#'   export(glance.fdm)
#' }
glance.fdm <- function(x, ...) {
  sum <- summary(x)
  mod_info_list <- sum$mod_info_list
  mod_info_list[sapply(mod_info_list, is.null)] <- NA
  return(tibble::as_tibble(mod_info_list))
}
Any scripts or data that you put into this service are public.
panelr documentation built on Aug. 22, 2023, 5:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
panelr
Regression Models and Utilities for Repeated Measures and Panel Data

R/diffs.R
In panelr: Regression Models and Utilities for Repeated Measures and Panel Data

Defines functions glance.fdm tidy.fdm confint.fdm vcov.fdm coef.fdm asym_ints diff_model diff_data make_diff_data print.summary.fdm summary.fdm fdm

Documented in fdm glance.fdm make_diff_data tidy.fdm

Try the panelr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

panelr Regression Models and Utilities for Repeated Measures and Panel Data

R/diffs.R In panelr: Regression Models and Utilities for Repeated Measures and Panel Data

Defines functions glance.fdm tidy.fdm confint.fdm vcov.fdm coef.fdm asym_ints diff_model diff_data make_diff_data print.summary.fdm summary.fdm fdm

Documented in fdm glance.fdm make_diff_data tidy.fdm

Try the panelr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

panelr
Regression Models and Utilities for Repeated Measures and Panel Data

R/diffs.R
In panelr: Regression Models and Utilities for Repeated Measures and Panel Data