HDTempDisagg: High-dimensional temporal disaggregation

Documented in TempDisaggDGP

#' High and low-frequency data generating processes
#' 
#' This function generates the high-frequency \eqn{mn_l \times 1} response vector \eqn{y}, according to \eqn{y=X\beta+\epsilon}, where \eqn{X} is an \eqn{mn_l\times p} matrix of indicator
#' series, and the \eqn{mn_l\times 1} coefficient vector may be sparse. The low-frequency \eqn{n_l\times 1} vector
#' can be generated by pre-multiplying a disaggregation matrix \eqn{n_l\times mn_l} matrix, such that the sum, the average, the last or the first value of \eqn{y} equates the
#' corresponding \eqn{Y} observation (see \insertCite{sax2013temporal;textual}{HDTempDisagg}). 
#' 
#' @param n_l 		size of the low frequency series
#' @param m   		the integer multiple for generating the high-frequency series
#' @param beta  	the positive and negative beta elements for the coefficient vector
#' @param sparsity	sparsity percentage of the coefficient vector
#' @param method 	nominates the choice of the DGP
#' @param annualMat choice of the disaggregation matix
#' @param rho		the autocorrelation coefficient 
#' @param mean_X 	mean of the design matrix
#' @param sd_X 		standard deviation of the design matrix
#' @param sd_e		standard deviation of the errors
#' @param simul     when TRUE the design matrix and the coefficient vector are fixed
#' @keywords DGP sparse high-frequency low-frequency 
#' @import zoo
#' @examples
#' TempDisaggDGP(n_l = 10, m = 4, p = 4, method = 'Chow-Lin', annualMat = 'sum', mean_X = 0.5, sd_X = 1, sd_e = 1 , rho = 0.5)
#' @references
#' \insertAllCited{}
#' @importFrom Rdpack reprompt

TempDisaggDGP <- function(n_l, m, p = 1, beta = 0.5, sparsity = 1, method = 'Denton-Cholette', annualMat = 'sum', rho = 0, mean_X = 0.5, sd_X = 1, sd_e = 1, simul = FALSE){


	if(rho >= 1 || rho <= -1){

		stop("For the Chow-Lin method 'rho' < |1| and for the Litterman method 'rho' corresponds to the autocorrelation of the errors of the random walk, where 'rho' < |1|! \n")

	}else{

		# Generate the random vector of coefficients for the DGP with +beta and -beta with equal probability.

		if(simul == TRUE){

			set.seed(42)

		}

		w <- matrix(data = rbinom(n = p, size = 1, prob = 0.5), ncol = 1)

		beta <- w*beta-(1-w)*beta

		if(sparsity != 1){

			if(sparsity > 1){

				stop("The 'sparsity' input can only take values in (0,1].\n")

			}else{

				s <- round((1-sparsity)*p)					

				toReplace <- sample(p, size = s)

				beta <- replace(beta, list = toReplace, values = 0)

			}

		}

		if(method == 'Denton-Cholette'){

			if(p > 1){

				stop("For the Denton-Cholette method p = 1! \n")

			}else{

				# Generate the random vector of indicator series.

				X <- matrix(data = rnorm(n_l*m, mean = mean_X, sd = sd_X), ncol = 1)

			}

		}else{

			# Generate the random p-columned matrix of indicator series. 

			X <- matrix(data = rnorm ((n_l*m) * p, mean = mean_X, sd = sd_X), ncol = p, nrow = n_l*m)

		}

		if(simul == TRUE){

			rm(.Random.seed, envir = globalenv())

		}

		if(method == 'Denton-Cholette'){

		# Generate the high-frequency series according to a normal distribution with some random indicators.

			# Generate the purturbations. 

			e <- matrix(data = rnorm(n_l*m, mean = 0, sd = sd_e), ncol = 1) 

			# Generate the high-frequency series by y = X + e.

			y <- matrix(data = (X + e), ncol = 1) 

			# Beta = 1 for the Denton-Cholette method

			beta <- 1

		}else if(method == 'Chow-Lin'){

		# Generate the high-frequency series with autocorrelated errors according to a normal distribution with some random indicators.

			# Generate the autocorrelated purturbations with the autocorrelation coefficient rho.

			e <- matrix(data = 0, nrow = n_l*m)

			e[1] <- rnorm(1, mean = 0, sd = 1)/sqrt(1-rho^2)

			for(i in 2:nrow(e)){

				u <- rnorm(1, mean = 0, sd = 1)

				e[i] <- rho * e[i-1] + u 

			}

			# Generate the high-frequency series by y = XB + e.

			y <- matrix(data = (X %*% beta + e), ncol = 1) 



		}else if(method == 'Fernandez' || method == 'Litterman'){

			if(method == 'Fernandez'){

				rho <- 0

			}

		# Generate the high-frequency series with errors exhibiting random walk and potentially autocorrelated errors, according to a normal distribution with some random indicators.

			# Generate the random vector of coefficients for the DGP.

			# Generate the purturbations exhibiting a random walk model.

			e <- matrix(data = 0, nrow = n_l*m)

			u <- matrix(data = 0, nrow = n_l*m)

			e[1] <- rnorm(1, mean = 0, sd = 1)

			u[1] <- rnorm(1, mean = 0, sd = 1)/sqrt(1-rho^2)

			for(i in 2:nrow(e)){

				nu <- rnorm(1, mean = 0, sd = 1)

				u[i] <- rho * u[i-1] + nu

				e[i] <- e[i-1] + u[i]

			}

			# Generate the high-frequency series by y = XB + e.

			y <- matrix(data = (X %*% beta + e), ncol = 1) 


		}


		if(annualMat == 'sum'){

			# Every m-th observations 'y' will add up to generate 'Y'.

			Y <- rollapply(y, m, 'sum', by = m)

		}else if(annualMat == 'avg'){

			# Every m-th observations 'y' will be averaged to generate 'Y'.

			Y <- rollapply(y, m, 'mean', by = m)

		}else if(annualMat == 'first'){

			# The first value of every m-th observations 'y' will generate 'Y'.	

			Y <- matrix(data = y[seq(from = 1, to = nrow(y), m)], ncol = 1) 


		}else if(annualMat == 'last'){

			# The last value of every m-th observations 'y' will generate 'Y'.	

			Y <- matrix(data = y[seq(from = m, to = nrow(y), m)], ncol = 1)

		}


		#Store the output

		data_list <- list(y, Y, X, beta, e)
		names(data_list) <- c("y_Gen", "Y_Gen", "X_Gen", "Beta_Gen", "e_Gen")
		TempDisaggSim <<- data_list

	}
}