##### Functions for regimes ####
#' @title Estimation of regime models with endogenous variables
#' @description The function \code{ivregimes} deals with
#' the estimation of regime models.
#' Most of the times the variable identifying the regimes
#' reveals some spatial aspects of the data (e.g., administrative boundaries).
#' The model includes exogenous as well as endogenous
#' variables among the regressors.
#' @name ivregimes
#' @param formula a symbolic description of the model of the form \code{y ~ x_f | x_v | h_f | h_v} where \code{y} is the dependent variable, \code{x_f} are the regressors that do not vary by regimes, \code{x_v} are the regressors that vary by regimes, \code{h_f} are the fixed instruments and \code{h_v} are the instruments that vary by regimes.
#' @param data the data of class \code{data.frame}.
#' @param rgv an object of class \code{formula} to identify the regime variables
#' @param vc one of \code{c("homoskedastic", "robust", "OGMM")}. If \code{"OGMM"} an optimal weighted GMM is used to estimate the VC matrix.
#'
#'
#' @details
#'
#' The basic (non spatial) model with endogenous variables
#' can be written in a general way as:
#' \deqn{
#' y
#' =
#' \begin{bmatrix}
#' X_1& 0 \\
#' 0 & X_2 \\
#' \end{bmatrix}
#' \begin{bmatrix}
#' \beta_1 \\
#' \beta_2 \\
#' \end{bmatrix}
#' + X\beta +
#' \begin{bmatrix}
#' Y_1& 0 \\
#' 0 & Y_2 \\
#' \end{bmatrix}
#' \begin{bmatrix}
#' \pi_1 \\
#' \pi_2 \\
#' \end{bmatrix}
#' + Y\pi +
#' \varepsilon
#' }
#' where \eqn{y = [y_1^\prime,y_2^\prime]^\prime},
#' and the \eqn{n_1 \times 1} vector \eqn{y_1} contains the observations
#' on the dependent variable for the first regime,
#' and the \eqn{n_2 \times 1} vector \eqn{y_2} (with \eqn{n_1 + n_2 = n})
#' contains the observations on the dependent variable for the second regime.
#' The \eqn{n_1 \times k} matrix \eqn{X_1} and the \eqn{n_2 \times k}
#' matrix \eqn{X_2} are blocks of a block diagonal matrix,
#' the vectors of parameters \eqn{\beta_1} and \eqn{\beta_2} have
#' dimension \eqn{k_1 \times 1} and \eqn{k_2 \times 1}, respectively,
#' \eqn{X} is the \eqn{n \times p} matrix of regressors that do not vary by regime,
#' \eqn{\beta} is a \eqn{p\times 1} vector of parameters.
#' The three matrices \eqn{Y_1} (\eqn{n_1 \times q}),
#' \eqn{Y_2} (\eqn{n_2 \times q}) and \eqn{Y} (\eqn{n \times r})
#' with corresponding vectors of parameters \eqn{\pi_1}, \eqn{\pi_2} and \eqn{\pi},
#' contain the endogenous variables.
#' Finally, \eqn{\varepsilon = [\varepsilon_1^\prime,\varepsilon_2^\prime]^\prime}
#' is the \eqn{n\times 1} vector of innovations.
#' The model is estimated by two stage least square.
#' In particular:
#' \itemize{
#' \item If \code{vc = "homoskedastic"},
#' the variance-covariance matrix is estimated by \eqn{\sigma^2(\hat Z^\prime \hat Z)^{-1}},
#' where \eqn{\hat Z= PZ}, \eqn{P= H(H^\prime H)^{-1}H^\prime}, \eqn{H} is the matrix of instruments,
#' and \eqn{Z} is the matrix of all exogenous and endogenous variables in the model.
#'
#' \item If \code{vc = "robust"}, the variance-covariance matrix is estimated by
#' \eqn{(\hat Z^\prime \hat Z)^{-1}(\hat Z^\prime \hat\Sigma \hat Z) (\hat Z^\prime \hat Z)^{-1}},
#' where \eqn{\hat\Sigma} is a diagonal matrix with diagonal elements \eqn{\hat\sigma_i},
#' for \eqn{i=1,...,n}.
#' \item Finally, if \code{vc = "OGMM"}, the model is estimated in two steps.
#' In the first step, the model is estimated by 2SLS yielding
#' the residuals \eqn{\hat \varepsilon}.
#' With the residuals, the diagonal matrix \eqn{\hat \Sigma} is estimated and is
#' used to construct the matrix \eqn{\hat S = H^\prime \hat \Sigma H}.
#' Then \eqn{\eta_{OWGMM}=(Z^\prime H\hat S^{-1}H^\prime Z)^{-1}Z^\prime H\hat S^{-1}H^\prime y}, where \eqn{\eta_{OWGMM}}
#' is the vector of all the parameters in the model,
#' The variance-covariance matrix is: \eqn{n(Z^\prime H\hat S^{-1}H^\prime Z)^{-1}}.}
#' @examples
#' data("natreg")
#' form <- HR90 ~ 0 | MA90 + PS90 + RD90 + UE90 | 0 | MA90 + PS90 + RD90 + FH90 + FP89 + GI89
#' split <- ~ REGIONS
#' mod <- ivregimes(formula = form, data = natreg, rgv = split, vc = "robust")
#' summary(mod)
#' mod1 <- ivregimes(formula = form, data = natreg, rgv = split, vc = "OGMM")
#' summary(mod1)
#' form1 <- HR90 ~ MA90 + PS90 | RD90 + UE90 -1 | MA90 + PS90 | RD90 + FH90 + FP89 + GI89 -1
#' mod2 <- ivregimes(formula = form1, data = natreg, rgv = split, vc = "homoskedastic")
#' summary(mod2)
#'
#' @author Gianfranco Piras and Mauricio Sarrias
#' @return An object of class \code{ivregimes}. A \code{list} of five elements. The first element of the list contains the estimation results. The other elements are needed for printing the results.
#' @import Formula sphet stats
#' @export
ivregimes <- function(formula, data, rgv = NULL,
vc = c("homoskedastic", "robust", "OGMM")){
cl <- match.call()
#Obtain arguments
vc <- match.arg(vc)
#process the data
intro <- iv.data.prep.regimes(formula = formula, data = data,
rgv = rgv)
#split the object list intro:
# data
# formula
# k tot number of variables in model
# totr tot of variables varying by regime
y <- intro[[1]]
Hmat <- intro[[2]]
Zmat <- intro[[3]]
colnames.end.f <- intro[[4]]
colnames.end.v <- intro[[5]]
colnames.instr <- intro[[6]]
ct <- intro[[7]][[6]]
res <- tsls_regimes(y, Hmat, Zmat, vc)
res <- Matchgroups(res, ct)
colnames.end.v <- Matchnames(colnames.end.v, ct)
colnames.instr <- Matchnames(colnames.instr, ct)
res <- list(res, cl, colnames.end.f, colnames.end.v, colnames.instr)
class(res) <- c("spregimes","ivregimes")
return(res)
}
tsls_regimes <- function(y, Hmat, Zmat, vc){
df <- nrow(Zmat) - ncol(Zmat)
n <- nrow(Zmat)
HH <- crossprod(Hmat)
Hye <- crossprod(Hmat, Zmat)
bz <- solve(HH, Hye)
Zp <- Hmat %*% bz
ZpZpi <- solve(crossprod(Zp))
betaiv <- ZpZpi %*% crossprod(Zp, as.matrix(y))
yp <- Zmat %*% betaiv
yp <- array(yp, dim = c(length(yp),1),
dimnames = list(seq(1, length(yp)), ""))
e <- as.matrix(y) - yp
e <- array(e, dim = c(length(e),1),
dimnames = list(seq(1, length(e)), ""))
##simple
if(vc == "homoskedastic"){
cpe <- crossprod(e)
vcmatrix <- (as.numeric(cpe) /df) * ZpZpi
results <- list(coefficients = betaiv,
var = vcmatrix,
residuals = e, X = Zmat,
y = y, yp = yp)
}
if(vc == "robust"){
e2 <- e^2
ZoZ <- crossprod(Zp, (as.matrix(Zp) * as.numeric(e2)))
vcmatrix <- ZpZpi %*% ZoZ %*% ZpZpi * (n/df)
results <- list(coefficients = betaiv,
var = vcmatrix,
residuals = e, X = Zmat,
y = y, yp = yp)
}
if(vc == "OGMM"){
e2 <- e^2
uH <- matrix(0, nrow = dim(Hmat)[1], ncol = dim(Hmat)[2])
for (i in 1 : dim(Hmat)[2]) uH[,i] <- e2 * Hmat[,i]
Smat <- crossprod(Hmat, uH)
Smati <- solve(Smat)
ZpH <- crossprod(Zmat, Hmat)
HpZ <- crossprod(Hmat, Zmat)
fp <- solve(ZpH %*% Smati %*% HpZ)
Hpy <- crossprod(Hmat, as.matrix(y))
sp <- ZpH %*% Smati %*% Hpy
b_OWGMM <- fp %*% sp
vcmatrix <- solve(ZpH %*% Smati %*% HpZ)
results <- list(coefficients = b_OWGMM,
var = vcmatrix,
residuals = e, X = Zmat,
y = y, yp = yp)
}
return(results)
}
iv.data.prep.regimes <- function(formula, data, rgv){
n <- dim(data)[1]
splitvar <- as.matrix(lm(rgv, data, method="model.frame"))
sv <- length(unique(splitvar))
svm <- sort(as.numeric(unique(splitvar)), decreasing = F)
rgm <- matrix(,nrow = nrow(data), ncol = 0)
for(i in svm) rgm <- cbind(rgm, ifelse(splitvar == i, 1, 0))
n <- dim(data)[1]
splitvar <- as.matrix(lm(rgv, data, method="model.frame"))
sv <- length(unique(splitvar))
svm <- as.numeric(unique(splitvar))
rgm <- matrix(,nrow = nrow(data), ncol = 0)
for(i in svm) rgm <- cbind(rgm, ifelse(splitvar == i, 1, 0))
mt <- cbind(1:sv, as.numeric(unique(splitvar)))
ct <- as.numeric(mt[order(mt[,2], decreasing = F),1])
ct <- cbind(ct, svm)
l.split <- list(n, splitvar, sv, svm, rgm, ct)
F1 <- Formula(formula)
parts <- length(F1)
if(parts[2] != 4) stop("Formula should have four parts")
mf <- model.frame(F1, data = data)
y <- model.part(F1, data = mf, lhs = 1, drop = FALSE)
## extract X fixed
Xf <- model.matrix(F1, data = mf, rhs = 1, drop = FALSE)
namesxf <- colnames(Xf)
## extract X variable
Xv <- model.matrix(F1, data = mf, rhs = 2, drop = FALSE)
namesxv <- colnames(Xv)
if(any(namesxf == "(Intercept)") && any(namesxv == "(Intercept)"))
stop("(Intercept) cannot be specified as fixed and variable regressor at the same time!")
if(!("(Intercept)" %in% c(namesxf, namesxv)))
warning("The model has been specified without an intercept")
k2 <- dim(Xv)[2]
totc <- sv*k2
XV <- matrix(0, ncol = totc, nrow = n)
seq_1 <- seq(1, totc, k2)
seq_2 <- seq(k2, totc, k2)
for(i in 1:sv) XV[,seq_1[i]:seq_2[i]] <- Xv * rgm[,i]
namesxv <- paste(namesxv, rep(1:sv, each = k2), sep = "_")
colnames(XV) <- namesxv
namesZ <- c(namesxf, namesxv)
## extract instrument
Zf <- model.matrix(F1, data = mf, rhs = 3, drop = FALSE)
nameszf <- colnames(Zf)
Zv <- model.matrix(F1, data = mf, rhs = 4, drop = FALSE)
nameszv <- colnames(Zv)
if(any(nameszf == "(Intercept)") && any(nameszv == "(Intercept)"))
stop("(Intercept) cannot be specified as fixed and variable instruments at the same time!")
if(any(namesxf == "(Intercept)") && any(nameszv == "(Intercept)"))
stop("(Intercept) in instruments should not vary")
if(any(namesxv == "(Intercept)") && any(nameszf == "(Intercept)"))
stop("(Intercept) in instruments should vary")
if(!("(Intercept)" %in% c(nameszf, nameszv)))
warning("The model has been specified without an intercept but the instruments include the intercept")
k3 <- dim(Zv)[2]
totz <- sv*k3
ZV <- matrix(0, ncol = totz, nrow = n)
seq_1 <- seq(1, totz, k3)
seq_2 <- seq(k3, totz, k3)
for(i in 1:sv) ZV[,seq_1[i]:seq_2[i]] <- Zv * rgm[,i]
nameszV <- paste(nameszv,rep(1:sv,each = k3), sep = "_")
namesH <- c(nameszf, nameszV)
## if instruments check identification
end.f <- Xf[, !(colnames(Xf) %in% colnames(Zf)), drop = FALSE]
end.v <- Xv[, !(colnames(Xv) %in% colnames(Zv)), drop = FALSE]
colnames.end.f <- colnames(end.f)
colnames.end.v <- colnames(end.v)
colnames.end.v <- paste(rep(colnames.end.v, each = sv), "_", 1:sv, sep = "")
# cat("the endogenous fixed variables are ", colnames(end.f), "\n")
# cat("and the instruments are ", colnames(Zf), "\n")
#
# cat("the endogenous split variables are ", colnames(end.v), "\n")
# cat("and the instruments are ", colnames(Zv), "\n")
Hmat <- cbind(as.matrix(Zf), as.matrix(ZV))
Zmat <- cbind(as.matrix(Xf), as.matrix(XV))
colinst <- namesH[-which((namesH %in% namesZ))]
if(length(colinst) < length(c(colnames.end.f, colnames.end.v)))
stop("Not enough instruments specified: the model is not identified")
ret <- list(y = y, Hmat = Hmat, Zmat = Zmat, colnames.end.f, colnames.end.v, colinst, l.split)
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.