##### Functions for regimes ####
#' @title Estimation of regimes models
#' @description The function \code{regimes} deals with
#' the estimation of regime models.
#' Most of the times the variable identifying the regimes
#' reveals some spatial aspects of the data (e.g., administrative boundaries).
#'
#'
#' @name regimes
#' @param formula a symbolic description of the model of the form \code{y ~ x_f | x_v} where \code{y} is the dependent variable, \code{x_f} are the regressors that do not vary by regimes and \code{x_v} are the regressors that vary by regimes
#' @param data the data of class \code{data.frame}.
#' @param rgv an object of class \code{formula} to identify the regime variables
#' @param vc one of \code{c("homoskedastic", "groupwise")}. If \code{groupwise}, the model VC matrix is estimated by weighted least square.
#' @details
#' For convenience and without loss of generality,
#' we assume the presence of only two regimes.
#' In this case,
#' the basic (non-spatial) is:
#'\deqn{
#' y
#' =
#' \begin{bmatrix}
#' X_1& 0 \\
#' 0 & X_2 \\
#' \end{bmatrix}
#' \begin{bmatrix}
#' \beta_1 \\
#' \beta_2 \\
#' \end{bmatrix}
#' + X\beta +
#' \varepsilon
#' }
#' where \eqn{y = [y_1^\prime,y_2^\prime]^\prime},
#' and the \eqn{n_1 \times 1} vector \eqn{y_1} contains the observations
#' on the dependent variable for the first regime,
#' and the \eqn{n_2 \times 1} vector \eqn{y_2} (with \eqn{n_1 + n_2 = n})
#' contains the observations on the dependent variable for the second regime.
#' The \eqn{n_1 \times k} matrix \eqn{X_1} and the \eqn{n_2 \times k}
#' matrix \eqn{X_2} are blocks of a block diagonal matrix,
#' the vectors of parameters \eqn{\beta_1} and \eqn{\beta_2} have
#' dimension \eqn{k_1 \times 1} and \eqn{k_2 \times 1}, respectively,
#' \eqn{X} is the \eqn{n \times p} matrix of regressors that do not vary by regime,
#' \eqn{\beta} is a \eqn{p\times 1} vector of parameters
#' and \eqn{\varepsilon = [\varepsilon_1^\prime,\varepsilon_2^\prime]^\prime}
#' is the \eqn{n\times 1} vector of innovations.
#' \itemize{
#' \item If \code{vc = "homoskedastic"}, the model is estimated by OLS.
#' \item If \code{vc = "groupwise"}, the model is estimated in two steps.
#' In the first step, the model is estimated by OLS. In the second step, the
#' inverse of the (groupwise) residuals from the first step are employed
#' as weights in a weighted least square procedure.}
#'
#' @examples
#' data("baltim")
#' form <- PRICE ~ NROOM + NBATH + PATIO + FIREPL + AC + GAR + AGE + LOTSZ + SQFT
#' split <- ~ CITCOU
#' mod <- regimes(formula = form, data = baltim, rgv = split, vc = "groupwise")
#' summary(mod)
#' form <- PRICE ~ AC + AGE + NROOM + PATIO + FIREPL + SQFT | NBATH + GAR + LOTSZ - 1
#' mod <- regimes(form, baltim, split, vc = "homoskedastic")
#' summary(mod)
#'
#'
#' @author Gianfranco Piras and Mauricio Sarrias
#' @return An object of class \code{lm} and \code{spregimes}.
#' @import Formula sphet stats spdep
#' @export
regimes <- function(formula, data, rgv = NULL,
vc = c("homoskedastic", "groupwise")){
if(is.null(rgv)) stop("regimes variable not specified")
if(!inherits(rgv, "formula")) stop("regimes variable has to be a formula")
#Obtain arguments
vc <- match.arg(vc)
cl <- match.call()
#process the data
intro <- data.prep.regimes(formula = formula, data = data, rgv = rgv)
#split the object list intro:
# data
# formula
# k tot number of variables in model
# totr tot of variables varying by regime
dataset <- intro[[1]]
form <- intro[[2]]
k <- intro[[3]]
k1 <- intro[[4]]
k2 <- intro[[5]]
rgm <- intro[[6]]
sv <- intro[[7]]
if (vc == "groupwise") res <- groupwise.regimes(form, dataset, k, k1, k2, rgm, sv)
if (vc == "homoskedastic") res <- lm(form, dataset)
res <- list(res, cl)
class(res) <- c("spregimes", "lm")
return(res)
}
groupwise.regimes <- function(formula, data, k, k1, k2, rgm, sv){
if (is.null(k2)) df <- k
else df <- k1 + 2*k2
fs <- lm(formula, data)
nobsg <- colSums(rgm)
omega <- vector("numeric", length = nrow(data))
for (i in 1: sv) omega[which(as.numeric(rgm[,i])==1)] <- 1/(crossprod(residuals(fs)[which(as.numeric(rgm[,i])==1)])/(nobsg[i]-df))
data$omega <- omega
res <- lm(formula, data, weights = omega)
# uhat <- vector("numeric", length = nrow(data))
# for (i in 1: sv) uhat[which(as.numeric(rgm[,i])==1)] <- residuals(lm(formula, data[which(as.numeric(rgm[,i])==1),] ))
# fs$residuals <- uhat
return(res)
}
###homoskedastic.regimes <- function(formula, dataset){
# res <- lm(formula, dataset)
# return(res)
#}
##
data.prep.regimes <- function(formula, data, rgv){
splitvar <- as.matrix(lm(rgv, data, method="model.frame"))
sv <- length(unique(splitvar))
svm <- sort(as.numeric(unique(splitvar)), decreasing = F)
rgm <- matrix(,nrow = nrow(data), ncol = 0)
for(i in svm) rgm <- cbind(rgm, ifelse(splitvar == i, 1, 0))
f1 <- Formula(formula)
mt <- terms(f1, data = data)
mf1 <- model.frame(f1, data = data)
y <- as.matrix(model.response(mf1))
namey <- colnames(y) <- all.vars(f1)[[1]]
k1 <- NULL
k2 <- NULL
if(length(f1)[2L] == 2L){
x1 <- model.matrix(f1, data = mf1, rhs = 1)
namesx1 <- colnames(x1)
x2 <- model.matrix(f1, data = mf1, rhs = 2)
namesx <- colnames(x2)
if(any(namesx1 == "(Intercept)") && any(colnames(x2) == "(Intercept)"))
stop("(Intercept) cannot be specified in both formula")
n <- dim(x1)[1]
k1 <- dim(x1)[2]
k2 <- dim(x2)[2]
if(any(namesx1 == "(Intercept)")) namesx1[which(namesx1 == "(Intercept)")] = "Intercept"
if(any(namesx == "(Intercept)")) namesx[which(namesx == "(Intercept)")] = "Intercept"
namesxr <- paste(namesx, rep(svm, each = k2), sep = "_")
totc <- sv*k2
k <- k1 + totc
xr <- matrix(0, ncol = totc, nrow = n)
seq_1 <- seq(1, totc, k2)
seq_2 <- seq(k2, totc, k2)
for(i in 1:sv) xr[,seq_1[i]:seq_2[i]] <- x2 * rgm[,i]
data <- data.frame(y, x1,xr)
colnames(data) <- c(namey, namesx1, namesxr)
form <- as.formula(paste(namey," ~", paste(names(data)[2:(k+1)], collapse = " + "), "-1"))
}
else{
x <- model.matrix(f1, data = mf1, rhs = 1)
n <- dim(x)[1]
k <- dim(x)[2]
namesx <- colnames(x)
Int <- FALSE
if(any(namesx == "(Intercept)")) {
Int <- TRUE
namesx[ which(namesx == "(Intercept)")] = "Intercept"
Int <- TRUE
}
namesxr <- paste(namesx, rep(svm, each = k), sep = "_")
totc <- sv*k
xr <- matrix(0, ncol = totc, nrow = n)
seq_1 <- seq(1, totc, k)
seq_2 <- seq(k, totc, k)
for(i in 1:sv) xr[,seq_1[i]:seq_2[i]] <- x * rgm[,i]
data <- data.frame(y,xr)
colnames(data) <- c(namey ,namesxr)
if(isTRUE(Int)) form <- as.formula(paste(namey,"~", paste(names(data)[2:(totc+1)], collapse = " + "), "-1"))
else form <- as.formula(paste(namey,"~", paste(names(data)[2:(totc+1)], collapse = " + ")))
}
ret <- list(data, form, k, k1, k2, rgm, sv)
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.