Nothing
#' Testing for Breaks and Estimating Break Dates and Sizes with Confidence Intervals
#'
#' @description
#' This is the main function of this package for testing breaks in quantile regression models
#' and estimating break dates and break sizes with corresponding confidence intervals.
#'
#' @usage rq.break(y, x, vec.tau, N, trim.e, vec.time, m.max, v.a, v.b, verbose)
#'
#' @param y a numeric vector, the outcome variable (NT x 1), the first N units are from the first period,
#' the next N from the second period, and so forth.
#' @param x A matrix of regressors (NT x p), structured in the same way as y, a column of ones will be automatically added to x.
#' @param vec.tau a numeric vector, quantiles used for break estimation, for example \code{vec.tau = seq(0.20, 0.80, by = 0.10)}
#' @param N a numeric vector, the number of cross-sectional units. Set to 1 for a time series quantile regression.
#' @param trim.e a scalar between 0 and 1, the trimming proportion.
#' For example, if \code{trim.e=0.1}, the minimum regime length is 0.1 times the data span.
#' @param vec.time a vector of dates, needed for reporting the estimated break dates, in the format of (starting date...ending date);
#' If set to NULL, the break dates will be reported as indices (e.g., 55 for the 55th observation in the sample).
#' @param m.max the maximum number of breaks allowed.
#' @param v.a the significance level used for determining the number of breaks; 1, 2 or 3 for 10%, 5% or 1%, respectively
#' @param v.b the coverage level for constructing the confidence intervals of break dates; 1 or 2 for 90% and 95%, respectively.
#' @param verbose Logical; set to TRUE to print estimates to the console. Default is FALSE.
#'
#' @return A list containing:
#' - `$s.out`: A list with break testing results, estimated break dates, confidence intervals, and coefficient estimates
#' based on individual quantiles.
#' - `$m.out`: A list with break testing results, estimated break dates, confidence intervals, and coefficient estimates
#' obtained by testing and estimating breaks using multiple quantiles simultaneously.
#'
#' Each list (`s.out` or `m.out`) contains:
#' - `test_tau`: A matrix of test statistics and critical values for break detection at quantile `tau`.
#' - `nbreak_tau`: The number of detected breaks at quantile `tau`.
#' - `br_est_tau`: A matrix of estimated break dates and their confidence intervals at quantile `tau`.
#' - `br_est_time_tau`: The same as `br_est_tau`, but with break dates reported in calendar format (if `vec.time` is provided and is not NULL).
#' - `coef_tau`: Estimated regression coefficients for each regime at quantile `tau`.
#' - `bsize_tau`: Break size estimates for each transition between regimes at quantile `tau`.
#'
#'
#' @import quantreg
#' @importFrom stats runif
#' @importFrom utils read.table write.table
#'
#' @references
#' Koenker, R. and G. Bassett Jr. (1978).
#' Regression quantiles. \emph{Econometrica}, 46(1), 33-50.
#'
#' Oka, T. and Z. Qu (2011). Estimating Structural Changes in Regression Quantiles.
#' \emph{Journal of Econometrics}, 162(2), 248-267.
#'
#' Qu, Z. (2008). Testing for Structural Change in Regression Quantiles.
#' \emph{Journal of Econometrics}, 146(1), 170-184.
#'
#' @examples
#' \donttest{
#' ## Example 1
#' ## Time series example, using GDP data
#' ## data
#' data(gdp)
#' y = gdp$gdp
#' x = gdp[,c("lag1", "lag2")]
#' vec.time = gdp$yq
#'
#' ## the maximum number of breaks allowed
#' m.max = 3
#'
#' ## the signifance level for sequenatial testing
#' ## 1, 2 or 3 for 10%, 5% or 1%, respectively
#' v.a = 2
#'
#' ## the significance level for the confidence intervals of estimated break dates.
#' ## 1 or 2 for 90% and 95%, respectively.
#' v.b = 2
#'
#' ## the size of the cross-section
#' N = 1
#'
#' ## the trimming proportion for estimating the break dates
#' ## (represents the minimum length of a regime; used to exclude
#' ## the boundaries of the sample)
#' trim.e = 0.15
#'
#' ## quantiles
#' vec.tau = seq(0.20, 0.80, by = 0.150)
#'
#' verbose = FALSE #do not print
#'
#' ## main estimation
#' res = rq.break(y, x, vec.tau, N, trim.e, vec.time, m.max, v.a, v.b, verbose)
#'
#' }
#'
#' @export
rq.break = function(y, x, vec.tau, N=1, trim.e, vec.time, m.max, v.a, v.b, verbose=FALSE)
{
x <- as.matrix(x)
if (nrow(x) != length(y)) {
stop("Error: The number of rows in x must match the length of y.")
}
if (length(vec.tau) == 0) {
stop("Error: vec.tau must contain at least one quantile for estimation.")
}
if (N < 1) {
stop("Error: N must be at least 1.")
}
if (trim.e < 0 || trim.e > 0.5) {
stop("Error: trim.e must be between 0 and 0.5 (inclusive).")
}
if (m.max*trim.e > 1) {
stop("Error: m.max*trim.e exceeds 1. This occurs because too many regimes are allowed or the minimum length of a regime is too large. Consider decreasing m.max, trim.e, or both.")
}
if (!(v.a %in% c(1, 2, 3))) {
stop("Error: v.a must be 1, 2, or 3 to select a significance level of 10%, 5%, or 1%.")
}
if (!(v.b %in% c(1, 2))) {
stop("Error: v.b must be 1 or 2 to select a significance level of 10% or 5%.")
}
n.size = N # cross section size, 1 for time series data
N.tau = length(vec.tau) #number of quantiles used in the estimation
T.size = length(y) / n.size # number of time periods
trim.size = round(T.size * trim.e) #minimum length of a regime
p.size = ncol(x) + 1 # user specified regressors plus an intercept
if (!is.null(vec.time) && length(vec.time) != T.size) {
stop("Error: The length of vec.time must match the length of y.")
}
if (trim.e * nrow(x) < p.size) {
stop("Error: trim.e * nrow(x) must be at least the number of regressors; otherwise, the estimation results are not unique. Consider increasing trim.e.")
}
if (p.size > 100) {
stop("The number of regressors cannot exceed 100.")
}
if (m.max > 10) {
stop("The maximum number of breaks can not exceed 10.")
}
q.L = min(vec.tau) #lower bound of quantile range
q.R = max(vec.tau) #upper bound of quantile range
##===========================================================
## Analysis begins
##===========================================================
time = proc.time() ## starting time
s.out <- list() #output
m.out <- list() #output
## obtain value of objective function
out.long = gen.long(y, x, vec.tau, n.size, trim.size)
mat.long.s = out.long$mat.long ## for individual quantile
vec.long.m = out.long$vec.long ## for multiple quantiles
## Single Quantile
if (verbose){
cat('================================================================== \n')
cat('===== Analysis based on a single conditional quantile ===== \n')
}
vec.nb = matrix(0, (N.tau*3), 1)
mat.loc.opt = matrix(0, (N.tau*3), m.max)
for(i in 1:N.tau){
v.tau = vec.tau[i]
if (verbose){
cat('================================================================== \n')
cat('For quantile level: ', v.tau, '\n')
}
## break dates given the maximum number of breaks
vec.long.s = mat.long.s[,i]
mat.date = brdate(y, x, n.size, m.max, trim.size, vec.long.s)
## determine the number of breaks
out.s = sq(y, x, v.tau, n.size, m.max, trim.size, mat.date)
## estimation and inference
vec.level = c(10, 5, 1)
if (verbose){
cat('--- Break testing results at the', vec.level[v.a],'% significance level---\n' )
cat('(Note: The k-th column and beyond display zero if the (k-1)-th break is insignificant at 10% level)\n')
}
# Determine the number of columns dynamically
num_cols <- length(out.s$test) # Assuming test and cv have the same number of columns
# Generate column names dynamically
col_names <- paste0(seq_len(num_cols), " Breaks")
table_matrix <- matrix(
c(out.s$test, out.s$cv[v.a, ]), # Combine SQ test and Critical values
nrow = 2, byrow = TRUE,
dimnames = list(c("SQ test", "Critical values"), col_names) # Set row and column names
)
name_test <- paste0("test_", v.tau)
s.out[[name_test]]<-table_matrix
if (verbose){
print(format(table_matrix, digits = 4), quote = FALSE)
cat('The number of breaks detected (SQ above the critical value): ', out.s$nbreak[v.a], '\n')
}
name_break <- paste0("nbreak_", v.tau)
s.out[[name_break]]<-out.s$nbreak[v.a]
## estimation
n.break = out.s$nbreak[v.a]
ind01 = i + N.tau * (v.a - 1)
vec.nb[ind01]= n.break
if (n.break >= 1){
vec.date = out.s$date[v.a,1:n.break]
fit = rq.est.full(y, x, v.tau, vec.date, n.size)
result = summary.rq(fit, se = "nid", covariance = TRUE)
mat.ci = ci.date.m(y, x, v.tau, vec.date, n.size, v.b)
if (verbose){
cat('\n')
cat('--- Estimated Break Dates and their', 100-vec.level[v.b], '% Confidence Intervals ---\n')
}
# number of rows = number of breaks
p_num_breaks <- nrow(mat.ci) # Get the number of rows dynamically
# Generate row names dynamically
rownames(mat.ci) <- paste0("Break ", seq_len(p_num_breaks))
# Assign fixed column names
colnames(mat.ci) <- c("Estimate", "CI_Lower_Bound", "CI_Upper_Bound")
if (verbose){
cat('\n')
cat('(a) Breaks as indices (e.g., 50 for the 50th observation in the sample):')
cat('\n')
}
name_ci <- paste0("br_est_", v.tau)
s.out[[name_ci]]<-mat.ci
if (verbose){
print( mat.ci)
}
if (min(mat.ci[,2]) < 1 | T.size < max(mat.ci[,3])){
warning(' confidence interval is out of the range\n')
} else if (is.null(vec.time)) {
# Do nothing (explicitly left empty)
}
else
{
mat.ci.time <- matrix( vec.time[mat.ci], ncol = 3)
# Generate row names dynamically
rownames(mat.ci.time) <- paste0("Break ", seq_len(p_num_breaks))
# Assign fixed column names
colnames(mat.ci.time) <- c("Estimate", "CI_Lower_Bound", "CI_Upper_Bound")
name_ci <- paste0("br_est_time_", v.tau)
s.out[[name_ci]]<-mat.ci.time
if (verbose){
cat('\n')
cat ('(b) Breaks in date format:')
cat ('\n')
print( mat.ci.time)
}
}
if (verbose){
cat ('\n')
cat('- - - - - - Coeffcient Estimation Results - - - - - - \n')
## Estimates for each regime
cat ('\n')
cat(' (a) Coefficients estimates for each regime \n')
cat ('[rows: intercept, first regressor, second regressor ...] \n')
cat ('\n')
}
coef.est<-rq.est.regime(y, x, v.tau, vec.date, n.size)
if (verbose){
# Properly formatted print
for (name in names(coef.est)) {
cat("\n", name, "\n")
print(format(coef.est[[name]], digits = 4), quote = FALSE)
}
}
name_coef <- paste0("coef_", v.tau)
s.out[[name_coef]]<-coef.est
## Estimates for the break sizes
if (verbose){
cat(' (b) Break sizes \n')
cat ('[rows: break in intercept, break in first coef, break in second coef ...] \n ')
cat ('\n')
}
for (j in 1:n.break){
beg01 = 1 + p.size * j
end01 = p.size * (j+1)
coef_subset <- result$coef[beg01:end01,]
# Rename the rows
rownames(coef_subset) <- c("Intercept", paste0("x", seq_len(nrow(coef_subset) - 1)))
# Print the updated coefficient matrix
if (verbose){
cat(' < Regime', (j + 1), ' minus Regime', j, '>\n')
print(format(coef_subset, digits = 4), quote = FALSE)
cat(' \n')
}
name_coef <- paste0("bsize_", v.tau,'_Regime_', (j + 1), '_minus_Regime_', j)
s.out[[name_coef]]<-result$coef[beg01:end01,]
rownames(s.out[[name_coef]]) <- c("Intercept", paste0("x", seq_len(nrow(result$coef[beg01:end01,]) - 1)))
}
mat.loc.opt[ind01,1:n.break] = vec.date
}
}
## figure for single quantile
## fig.s(y, x, vec.tau, vec.nb, mat.date, vec.time, n.size)
if (length(vec.tau) > 1) {
## For the DQ test; d.Sym=TRUE if vec.tau specifies a symmetric quantile range
## i.e, q.L = (1 - q.R); Otherwise, d.Sym=FALSE; for the latter case, the critical values of the DQ test is generated via simulations
d.Sym <- abs(q.L - (1 - q.R)) <= 1e-5
d.Sim <- !d.Sym
## simulation to generate critical values, not needed if the analysis is for a single quantile only
if ((d.Sym == FALSE & d.Sim == TRUE) | p.size>20 | m.max>5){
if (verbose){
cat("The critical values of the DQ test is obtained via simulations\n")
}
table.cv<-critical.DQtest_specific(x, m.max, vec.tau) #simulate critical value for the DQ test;
#if not true, compute critical values from a response surface
} else {
table.cv<-NULL
}
## Multiple quantiles
if (verbose){
cat('================================================================== \n')
cat('===== Analysis based on multiple conditional quantiles ===== \n')
cat('================================================================== \n')
}
mat.date = brdate(y, x, n.size, m.max, trim.size, vec.long.m)
## (i) determine the number of breaks
out.m = dq(y, x, vec.tau, q.L, q.R, n.size, m.max, trim.size, mat.date,d.Sym,table.cv)
vec.level = c(10, 5, 1) # significance level
## the number of breaks
n.break = out.m$nbreak[v.a]
if (verbose){
cat('----- Break testing results at the', vec.level[v.a], '% significance level: ----\n' )
cat('(Note: The k-th column and beyond display zero if the (k-1)-th break is insignificant at 10% level)\n')
}
# Determine the number of columns dynamically
num_cols <- length(out.m$test) # Assuming test and cv have the same number of columns
# Generate column names dynamically
col_names <- paste0(seq_len(num_cols), " Breaks")
table_matrix <- matrix(
c(out.m$test, out.m$cv[v.a, ]), # Combine test and Critical values
nrow = 2, byrow = TRUE,
dimnames = list(c("DQ test", "Critical values"), col_names) # Set row and column names
)
if (verbose){
print(format(table_matrix, digits = 4), quote = FALSE)
}
name_test <- paste0("test_joint")
m.out[[name_test]] <- table_matrix
if (verbose){
cat('The number of breaks detected (DQ above the critical value): ', out.m$nbreak[v.a], '\n')
}
name_break <- paste0("nbreak_joint")
m.out[[name_break]] <- out.m$nbreak[v.a]
if (n.break >= 1){
vec.date = out.m$date[v.a,1:n.break]
## confidence interval
mat.ci = ci.date.m(y, x, vec.tau, vec.date, n.size, v.b)
if (verbose){
cat('\n')
cat('--- Estimated Break Dates and their', 100-vec.level[v.b], '% Confidence Intervals:---\n')
}
p_num_breaks <- nrow(mat.ci) # Get the number of rows dynamically
# Generate row names dynamically
rownames(mat.ci) <- paste0("Break ", seq_len(p_num_breaks))
# Assign fixed column names
colnames(mat.ci) <- c("Estimate", "CI_Lower_Bound", "CI_Upper_Bound")
name_ci <- paste0("br_est_joint")
m.out[[name_ci]] <- mat.ci
if (verbose){
cat ('\n')
cat('(a) Breaks as indices (e.g., 50 for the 50th observation in the sample):')
cat ('\n')
print( mat.ci)
}
if (min(mat.ci[,2]) < 1 | T.size < max(mat.ci[,3])){
warning(' confidence interval is out of the range\n')
} else if (is.null(vec.time)) {
# Do nothing (explicitly left empty)
}
else {
mat.ci.time <- matrix( vec.time[mat.ci], ncol = 3)
# Generate row names dynamically
rownames(mat.ci.time) <- paste0("Break ", seq_len(p_num_breaks))
# Assign fixed column names
colnames(mat.ci.time) <- c("Estimate", "CI_Lower_Bound", "CI_Upper_Bound")
name_ci <- paste0("br_est_joint_time")
m.out[[name_ci]] <- mat.ci.time
if (verbose){
cat ('\n')
cat('(b) Breaks in date format: \n')
print( mat.ci.time)
}
}
if (verbose){
cat ('\n')
cat('------------ Coefficient Estimation Results ------------\n')
}
for(k in 1:N.tau){
v.tau = vec.tau[k]
if (verbose){
cat('========================================================== \n')
cat('- For quantile level: ', v.tau ,'-\n')
cat ('\n')
## Estimates for each regime by splitting sample
cat(' (a) Coefficients estimates for each regime \n')
cat ('[rows: intercept, first regressor, second regressor...] \n')
}
coef.est<-rq.est.regime(y, x, v.tau, vec.date, n.size)
#name_coef <- paste0("coef") #update version 1.0.2
name_coef <- paste0("coef_", v.tau)
m.out[[name_coef]] <- coef.est
if (verbose){
# Properly formatted print
for (name in names(coef.est)) {
cat("\n", name, "\n")
print(format(coef.est[[name]], digits = 4), quote = FALSE)
}
}
if (verbose){
## Estimates of the break sizes
cat(' (b) Break sizes \n')
cat ('[break in intercept, break in first coef, break in second coef...] \n')
}
fit = rq.est.full(y, x, v.tau, vec.date, n.size)
result = summary.rq(fit, se = 'nid', covariance = TRUE)
for (j in 1:n.break){
beg01 = 1 + p.size * j
end01 = p.size * (j+1)
coef_subset <- result$coef[beg01:end01,]
# Rename the rows
rownames(coef_subset) <- c("Intercept", paste0("x", seq_len(nrow(coef_subset) - 1)))
# Print the updated coefficient matrix
if (verbose){
print(format(coef_subset, digits = 4), quote = FALSE)
cat(' \n')
}
name_coef <- paste0("bsize_", v.tau,'_Regime_', (j + 1), '_minus_Regime_', j)
m.out[[name_coef]]<-result$coef[beg01:end01,]
rownames(m.out[[name_coef]]) <- c("Intercept", paste0("x", seq_len(nrow(result$coef[beg01:end01,]) - 1)))
}
}
}
}
return(list(s.out = s.out, m.out= m.out))
if (verbose){
cat('================================================================== \n')
cat('Estimation Time (min) \n')
print( (proc.time() - time) / 60)
cat('================================================================== \n')
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.