Nothing
#' High and Low-Frequency Data Generating Processes
#'
#' This function generates a high-frequency response vector \eqn{y}, following the relationship \eqn{y = X\beta + \epsilon}, where \eqn{X} is a matrix of indicator series and \eqn{\beta} is a potentially sparse coefficient vector. The low-frequency vector \eqn{Y} is generated by aggregating \eqn{y} according to a specified aggregation method.
#'
#' The aggregation ratio (`aggRatio`) determines the ratio between the low and high-frequency series (e.g., `aggRatio = 4` for annual-to-quarterly). If the number of observations \eqn{n} exceeds \eqn{aggRatio \times n_l}, the aggregation matrix will include zero columns for the extrapolated values.
#'
#' The function supports several data generating processes (DGP) for the residuals, including 'Denton', 'Denton-Cholette', 'Chow-Lin', 'Fernandez', and 'Litterman'. These methods differ in how they generate the high-frequency data and residuals, with optional autocorrelation specified by `rho`.
#'
#' @param n_l Integer. Size of the low-frequency series.
#' @param n Integer. Size of the high-frequency series.
#' @param aggRatio Integer. Aggregation ratio between low and high frequency (default is 4).
#' @param p Integer. Number of high-frequency indicator series to include.
#' @param beta Numeric. Value for the positive and negative elements of the coefficient vector.
#' @param sparsity Numeric. Sparsity percentage of the coefficient vector (value between 0 and 1).
#' @param method Character. The DGP of residuals to use ('Denton', 'Denton-Cholette', 'Chow-Lin', 'Fernandez', 'Litterman').
#' @param aggMat Character. Aggregation matrix type ('first', 'sum', 'average', 'last').
#' @param rho Numeric. Residual autocorrelation coefficient (default is 0).
#' @param mean_X Numeric. Mean of the design matrix (default is 0).
#' @param sd_X Numeric. Standard deviation of the design matrix (default is 1).
#' @param sd_e Numeric. Standard deviation of the errors (default is 1).
#' @param simul Logical. If `TRUE`, the design matrix and the coefficient vector are fixed (default is `FALSE`).
#' @param sparse_option Character or Integer. Option to specify sparsity in the coefficient vector ('random' or integer value). Default is "random".
#' @param setSeed Integer. Seed value for reproducibility when `simul` is set to `TRUE`.
#' @return A list containing the following components:
#' \itemize{
#' \item \code{y_Gen}: Generated high-frequency response series (an \eqn{n \times 1} matrix).
#' \item \code{Y_Gen}: Generated low-frequency response series (an \eqn{n_l \times 1} matrix).
#' \item \code{X_Gen}: Generated high-frequency indicator series (an \eqn{n \times p} matrix).
#' \item \code{Beta_Gen}: Generated coefficient vector (a \eqn{p \times 1} matrix).
#' \item \code{e_Gen}: Generated high-frequency residual series (an \eqn{n \times 1} matrix).
#' }
#' @keywords DGP sparse high-frequency low-frequency
#' @import zoo withr
#' @export
#' @examples
#' data <- TempDisaggDGP(n_l=25,n=100,p=10,rho=0.5)
#' X <- data$X_Gen
#' Y <- data$Y_Gen
#' @importFrom Rdpack reprompt
#' @importFrom stats lm rbinom rnorm
TempDisaggDGP <- function(n_l, n, aggRatio = 4, p = 1, beta = 1, sparsity = 1, method = 'Chow-Lin', aggMat = 'sum', rho = 0, mean_X = 0, sd_X = 1, sd_e = 1, simul = FALSE, sparse_option = "random", setSeed = 42){
# Check if rho is valid
if(rho >= 1 || rho <= -1) {
stop("For the Chow-Lin method 'rho' must be between -1 and 1.")
}
# Generate random vector of coefficients for the DGP
if(simul == TRUE){
w <- with_seed(setSeed, matrix(data = rbinom(n = p, size = 1, prob = 0.5), ncol = 1))
} else {
w <- matrix(data = rbinom(n = p, size = 1, prob = 0.5), ncol = 1)
}
beta <- w * beta - (1 - w) * beta
# Handle sparsity
if(sparse_option == "random") {
if(sparsity != 1) {
if(sparsity > 1) {
stop("The 'sparsity' input can only take values in (0,1].")
} else {
s <- round(sparsity * p)
if(simul == TRUE){
toReplace <- with_seed(setSeed, sample(p, size = s))
} else {
toReplace <- sample(p, size = s)
}
beta <- replace(beta, list = toReplace, values = 0)
}
}
} else if(is.numeric(sparse_option) && sparse_option == as.integer(sparse_option)) {
if(sparse_option > p) {
stop("The 'sparse_option' integer input should be less than or equal to 'p'.")
} else {
toReplace <- (length(beta) - sparse_option + 1):length(beta)
beta <- replace(beta, list = toReplace, values = 0)
}
} else {
stop("If 'sparse_option' is not 'random', it must be an integer.")
}
# Generate X matrix based on the method
if(method == 'Denton-Cholette'){
if(p > 1){
stop("For the Denton-Cholette method, p must be 1.")
}
if(simul == TRUE){
X <- with_seed(setSeed, matrix(data = rnorm(n, mean = mean_X, sd = sd_X), ncol = 1))
} else {
X <- matrix(data = rnorm(n, mean = mean_X, sd = sd_X), ncol = 1)
}
} else {
if(simul == TRUE){
X <- with_seed(setSeed, matrix(data = rnorm(n * p, mean = mean_X, sd = sd_X), ncol = p, nrow = n))
} else {
X <- matrix(data = rnorm(n * p, mean = mean_X, sd = sd_X), ncol = p, nrow = n)
}
}
# Generate residuals and high-frequency series based on the method
if(method == 'Denton-Cholette'){
e <- matrix(data = rnorm(n, mean = 0, sd = sd_e), ncol = 1)
y <- matrix(data = (X + e), ncol = 1)
beta <- 1
} else if(method == 'Chow-Lin'){
e <- matrix(data = 0, nrow = n)
e[1] <- rnorm(1, mean = 0, sd = 1)/sqrt(1 - rho^2)
for(i in 2:nrow(e)){
u <- rnorm(1, mean = 0, sd = 1)
e[i] <- rho * e[i-1] + u
}
y <- matrix(data = (X %*% beta + e), ncol = 1)
} else if(method == 'Fernandez' || method == 'Litterman'){
if(method == 'Fernandez'){
rho <- 0
}
e <- matrix(data = 0, nrow = n)
u <- matrix(data = 0, nrow = n)
e[1] <- rnorm(1, mean = 0, sd = 1)
u[1] <- rnorm(1, mean = 0, sd = 1)/sqrt(1 - rho^2)
for(i in 2:nrow(e)){
nu <- rnorm(1, mean = 0, sd = 1)
u[i] <- rho * u[i-1] + nu
e[i] <- e[i-1] + u[i]
}
y <- matrix(data = (X %*% beta + e), ncol = 1)
}
# Check the number of full observations and generate the aggregation matrix
nfull <- aggRatio * n_l
extr <- n - nfull
if(nfull > n) {
stop("X does not have enough observations.")
}
if(aggMat == 'sum'){
C <- kronecker(diag(n_l), matrix(data = 1, nrow = 1, ncol = aggRatio))
C <- cbind(C, matrix(0L, n_l, extr))
} else if(aggMat == 'average'){
C <- kronecker(diag(n_l), matrix(data = 1/aggRatio, nrow = 1, ncol = aggRatio))
C <- cbind(C, matrix(0L, n_l, extr))
} else if(aggMat == 'first'){
C <- kronecker(diag(n_l), matrix(data = c(1, rep(0, times = aggRatio - 1)), nrow = 1, ncol = aggRatio))
C <- cbind(C, matrix(0L, n_l, extr))
} else if(aggMat == 'last'){
C <- kronecker(diag(n_l), matrix(data = c(rep(0, times = aggRatio - 1), 1), nrow = 1, ncol = aggRatio))
C <- cbind(C, matrix(0L, n_l, extr))
}
Y <- C %*% y
# Return the generated data
data_list <- list(y, Y, X, beta, e)
names(data_list) <- c("y_Gen", "Y_Gen", "X_Gen", "Beta_Gen", "e_Gen")
return(data_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.