Nothing
#' Regression-based Hausman test
#'
#' \lifecycle{experimental}
#'
#' Calculates the regression-based Hausman test to be used to compare
#' OLS to 2SLS estimates or 2SLS to 3SLS estimates. See e.g., \insertCite{Wooldridge2010;textual}{cSEM}
#' (pages 131 f.) for details.
#'
#' The function is somewhat experimental. Only use if you know what you are doing.
#'
#' @usage testHausman(
#' .object = NULL,
#' .eval_plan = c("sequential", "multicore", "multisession"),
#' .handle_inadmissibles = c("drop", "ignore", "replace"),
#' .R = 499,
#' .resample_method = c("bootstrap", "jackknife"),
#' .seed = NULL
#' )
#'
#' @inheritParams csem_arguments
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso [csem()], [cSEMResults]
#'
#' @example inst/examples/example_testHausman.R
#'
#' @export
testHausman <- function(
.object = NULL,
.eval_plan = c("sequential", "multicore", "multisession"),
.handle_inadmissibles = c("drop", "ignore", "replace"),
.R = 499,
.resample_method = c("bootstrap", "jackknife"),
.seed = NULL
) {
.eval_plan <- match.arg(.eval_plan)
.handle_inadmissibles <- match.arg(.handle_inadmissibles)
.resample_method <- match.arg(.resample_method)
if(!inherits(.object, "cSEMResults_default")) {
stop2("Hausman test currently only available for linear models.")
}
# Define function to bootstrap
fun_residuals <- function(.object) {
## Extract relevant quantities
m <- .object$Information$Model
P <- .object$Estimates$Construct_VCV
# Dependent (LHS) variables of the structural equations
dep_vars <- rownames(m$structural)[rowSums(m$structural) != 0]
res <- lapply(dep_vars, function(y) {
# Which of the variables in dep_vars have instruments specified, i.e.
# have endogenous variables on the RHS. By default: FALSE.
endo_in_RHS <- FALSE
if(!is.null(m$instruments)) {
endo_in_RHS <- y %in% names(m$instruments)
} else {
stop2("The following error occured in the testHausman() function:\n",
"Instruments required.")
}
# All independent variables (X) of the structural equation of construct y
# (including the endogenous RHS variables)
names_X <- colnames(m$structural[y, m$structural[y, ] != 0, drop = FALSE])
## Only for equations with endogenous variables on the RHS do we need to compute
## the Hausman test.
if(endo_in_RHS & (.object$Information$Arguments$.approach_paths == "2SLS")) {
## First stage
# Note: Technically, we only need to regress the P endogenous variables
# (y1) on the L instruments and the K exogenous independent variables
# (which must be part of Z).
# Therefore: y1 (N x P) and Z (N x (L + K)) and
# beta_1st = (Z'Z)^-1*(Z'y1)
#
# beta_1st would be ((L + K) x P), i.e. each columns represents the
# first stage estimates for a regression of the instruments on
# on the p'th endogenous variable.
names_endo <- rownames(m$instruments[[y]]) # names of the endogenous variables (y1's)
names_Z <- colnames(m$instruments[[y]]) # names of the instruments
# Assuming that P (the construct correlation matrix) also contains
# the instruments (ensured if only internal instruments are allowed)
beta_1st <- solve(P[names_Z, names_Z, drop = FALSE],
P[names_Z, names_endo, drop = FALSE])
## Second stage
# Note: we have to use (construct) correlations here since using
# the plain score would yield inconsistent estimates for concepts
# modeled as common factors. Hence we need to "translate" the
# regression based second stage estimation
# y = X * beta_1 + v_hat*beta_2 + u
# and the estimator
# beta_2nd = (W'W)^-1W'y
# into correlations.
#
# y1_hat = Z*beta_1st
# v_hat = y1 - y1_hat = y1 - Z*beta_1st
#
# y = X * beta_1 + v_hat*beta_2 + u
#
# Define W = [X; v_hat]
#
# E(W'W ) = E[X'X ; X'v_hat
# v_hat'X ; v_hat'v_hat]
# E(W'y) = E[X'y ; v_hat'y]'
ww11 <- P[names_X, names_X, drop = FALSE]
ww12 <- P[names_X, names_endo, drop = FALSE] - P[names_X, names_Z, drop = FALSE] %*% beta_1st
ww21 <- t(ww12)
ww22 <- P[names_endo, names_endo, drop = FALSE] -
P[names_endo, names_Z, drop = FALSE] %*% beta_1st -
t(beta_1st) %*% P[names_Z, names_endo, drop = FALSE] +
t(beta_1st) %*% P[names_Z, names_Z, drop = FALSE] %*% beta_1st
WW <- rbind(cbind(ww11, ww12), cbind(ww21, ww22))
wy1 <- P[names_X, y, drop = FALSE]
wy2 <- P[names_endo, y, drop = FALSE] - t(beta_1st) %*% P[names_Z, y, drop = FALSE]
Wy <- rbind(wy1, wy2)
## Estimate the second stage estimation
beta_2nd <- solve(WW, Wy)
rownames(beta_2nd) <- c(names_X, paste0("Resid_", names_endo))
colnames(beta_2nd) <- y
return(beta_2nd)
} else {
NA
}
})
names(res) <- dep_vars
res <- Filter(Negate(anyNA), res)
# Vectorize "res" to be able to use resamplecSEMResults (which requires
# output to be a vector or a matrix)
# 1. Assign a unique name to each element
res <- lapply(res, function(x) {
rownames(x) <- paste0(colnames(x), "_", rownames(x))
x
})
# 2. Combine, vectorize and assign names
res2 <- do.call(rbind, res)
res <- c(res2)
names(res) <- rownames(res2)
res
}
## Resample to get standard errors
out <- resamplecSEMResults(
.object = .object,
.resample_method = .resample_method,
.R = .R,
.handle_inadmissibles = .handle_inadmissibles,
.user_funs = fun_residuals,
.eval_plan = .eval_plan,
.seed = .seed
)
## Get relevant quantities
out_infer <- infer(out)
beta <- out$Estimates$Estimates_resample$Estimates1$User_fun$Original
se <- out_infer$User_fun$sd
t <- beta/se
p_normal <- 2*pnorm(abs(t), mean = 0, sd = 1, lower.tail = FALSE)
# ci_percentile <- out_infer$User_fun$CI_percentile
out_data_frame <- data.frame(
"Dep_construct" = rep(out$Information$Model$cons_endo,
sapply(out$Information$Model$instruments, sum)),
"Name" = names(beta),
"Estimate" = beta,
"Std_error" = se,
"t_stat" = t,
"p_value" = p_normal,
# "Ci_perc_L" = ci_percentile[1, ],
# "Ci_perc_H" = ci_percentile[2, ],
stringsAsFactors = FALSE
)
rownames(out_data_frame) <- NULL
## Split data frames
out_temp <- split(out_data_frame, out_data_frame$Dep_construct)
for(i in names(out_temp)) {
out_temp[[i]] <- out_temp[[i]][, -1] # remove Dep_construct column
out_temp[[i]][, "Name"] <- gsub(paste0(i, "_"), "", out_temp[[i]][, "Name"])
}
out <- list(
"Regressions" = out_temp,
"Information" = list(
"Seed" = .seed,
"Number_of_runs" = .R
)
)
class(out) <- "cSEMTestHausman"
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.