#' High and low-frequency data generating processes
#'
#' This function generates the high-frequency \eqn{mn_l \times 1} response vector \eqn{y}, according to \eqn{y=X\beta+\epsilon}, where \eqn{X} is an \eqn{mn_l\times p} matrix of indicator
#' series, and the \eqn{mn_l\times 1} coefficient vector may be sparse. The low-frequency \eqn{n_l\times 1} vector
#' can be generated by pre-multiplying a disaggregation matrix \eqn{n_l\times mn_l} matrix, such that the sum, the average, the last or the first value of \eqn{y} equates the
#' corresponding \eqn{Y} observation (see \insertCite{sax2013temporal;textual}{HDTempDisagg}).
#'
#' @param n_l size of the low frequency series
#' @param m the integer multiple for generating the high-frequency series
#' @param beta the positive and negative beta elements for the coefficient vector
#' @param sparsity sparsity percentage of the coefficient vector
#' @param method nominates the choice of the DGP
#' @param annualMat choice of the disaggregation matix
#' @param rho the autocorrelation coefficient
#' @param mean_X mean of the design matrix
#' @param sd_X standard deviation of the design matrix
#' @param sd_e standard deviation of the errors
#' @param simul when TRUE the design matrix and the coefficient vector are fixed
#' @keywords DGP sparse high-frequency low-frequency
#' @import zoo
#' @examples
#' TempDisaggDGP(n_l = 10, m = 4, p = 4, method = 'Chow-Lin', annualMat = 'sum', mean_X = 0.5, sd_X = 1, sd_e = 1 , rho = 0.5)
#' @references
#' \insertAllCited{}
#' @importFrom Rdpack reprompt
TempDisaggDGP <- function(n_l, m, p = 1, beta = 0.5, sparsity = 1, method = 'Denton-Cholette', annualMat = 'sum', rho = 0, mean_X = 0.5, sd_X = 1, sd_e = 1, simul = FALSE){
if(rho >= 1 || rho <= -1){
stop("For the Chow-Lin method 'rho' < |1| and for the Litterman method 'rho' corresponds to the autocorrelation of the errors of the random walk, where 'rho' < |1|! \n")
}else{
# Generate the random vector of coefficients for the DGP with +beta and -beta with equal probability.
if(simul == TRUE){
set.seed(42)
}
w <- matrix(data = rbinom(n = p, size = 1, prob = 0.5), ncol = 1)
beta <- w*beta-(1-w)*beta
if(sparsity != 1){
if(sparsity > 1){
stop("The 'sparsity' input can only take values in (0,1].\n")
}else{
s <- round((1-sparsity)*p)
toReplace <- sample(p, size = s)
beta <- replace(beta, list = toReplace, values = 0)
}
}
if(method == 'Denton-Cholette'){
if(p > 1){
stop("For the Denton-Cholette method p = 1! \n")
}else{
# Generate the random vector of indicator series.
X <- matrix(data = rnorm(n_l*m, mean = mean_X, sd = sd_X), ncol = 1)
}
}else{
# Generate the random p-columned matrix of indicator series.
X <- matrix(data = rnorm ((n_l*m) * p, mean = mean_X, sd = sd_X), ncol = p, nrow = n_l*m)
}
if(simul == TRUE){
rm(.Random.seed, envir = globalenv())
}
if(method == 'Denton-Cholette'){
# Generate the high-frequency series according to a normal distribution with some random indicators.
# Generate the purturbations.
e <- matrix(data = rnorm(n_l*m, mean = 0, sd = sd_e), ncol = 1)
# Generate the high-frequency series by y = X + e.
y <- matrix(data = (X + e), ncol = 1)
# Beta = 1 for the Denton-Cholette method
beta <- 1
}else if(method == 'Chow-Lin'){
# Generate the high-frequency series with autocorrelated errors according to a normal distribution with some random indicators.
# Generate the autocorrelated purturbations with the autocorrelation coefficient rho.
e <- matrix(data = 0, nrow = n_l*m)
e[1] <- rnorm(1, mean = 0, sd = 1)/sqrt(1-rho^2)
for(i in 2:nrow(e)){
u <- rnorm(1, mean = 0, sd = 1)
e[i] <- rho * e[i-1] + u
}
# Generate the high-frequency series by y = XB + e.
y <- matrix(data = (X %*% beta + e), ncol = 1)
}else if(method == 'Fernandez' || method == 'Litterman'){
if(method == 'Fernandez'){
rho <- 0
}
# Generate the high-frequency series with errors exhibiting random walk and potentially autocorrelated errors, according to a normal distribution with some random indicators.
# Generate the random vector of coefficients for the DGP.
# Generate the purturbations exhibiting a random walk model.
e <- matrix(data = 0, nrow = n_l*m)
u <- matrix(data = 0, nrow = n_l*m)
e[1] <- rnorm(1, mean = 0, sd = 1)
u[1] <- rnorm(1, mean = 0, sd = 1)/sqrt(1-rho^2)
for(i in 2:nrow(e)){
nu <- rnorm(1, mean = 0, sd = 1)
u[i] <- rho * u[i-1] + nu
e[i] <- e[i-1] + u[i]
}
# Generate the high-frequency series by y = XB + e.
y <- matrix(data = (X %*% beta + e), ncol = 1)
}
if(annualMat == 'sum'){
# Every m-th observations 'y' will add up to generate 'Y'.
Y <- rollapply(y, m, 'sum', by = m)
}else if(annualMat == 'avg'){
# Every m-th observations 'y' will be averaged to generate 'Y'.
Y <- rollapply(y, m, 'mean', by = m)
}else if(annualMat == 'first'){
# The first value of every m-th observations 'y' will generate 'Y'.
Y <- matrix(data = y[seq(from = 1, to = nrow(y), m)], ncol = 1)
}else if(annualMat == 'last'){
# The last value of every m-th observations 'y' will generate 'Y'.
Y <- matrix(data = y[seq(from = m, to = nrow(y), m)], ncol = 1)
}
#Store the output
data_list <- list(y, Y, X, beta, e)
names(data_list) <- c("y_Gen", "Y_Gen", "X_Gen", "Beta_Gen", "e_Gen")
TempDisaggSim <<- data_list
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.