Nothing
#'SPECS
#'
#'This function estimates the Single-equation Penalized Error Correction Selector
#'as described in Smeekes and Wijler (2020). The function takes a dependent variable \eqn{y} and a matrix of independent
#'variables x as input, and transforms it to a conditional error correction model. This model is estimated by means of
#'penalized regression, involving \eqn{L1}-penalty on individual coefficients and a potential \eqn{L2}-penalty
#'on the coefficients of the lagged levels in the model, see Smeekes and Wijler (2020) for details.
#'
#'The function can generate an automated sequence of penalty parameters and offers the option to compute and include adaptive
#'penalty weights. In addition, it is possible to estimate a penalized ADL model in differences by excluding the lagged levels
#'from the model. For automated selection of an optimal penalty value, see the function specs_opt(...).
#'
#'@param y A vector containing the dependent variable in levels.
#'@param x A matrix containing the independent variables in levels.
#'@param p Integer indicating the desired number of lagged differences to include. Default is 1.
#'@param deterministics A character object indicating which deterministic variables should be added
#'("none","constant","trend","both"). Default is "constant".
#'@param ADL Logical object indicating whether an ADL model without error-correction term should be estimated.
#'Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied or no weights can be applied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Default is \eqn{10^5}.
#'@param max_iter_pi Maximum number of updates for pi. Default is \eqn{10^5}.
#'@param max_iter_gamma Maximum number of updates for gamma. Default is \eqn{10^5}.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{Mv}{A matrix containing the independent variables, after regressing out the deterministic components.}
#'\item{My_d}{A vector containing the dependent variable, after regressing out the deterministic components.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{v}{A matrix containing the independent variables (excluding deterministic components).}
#'\item{weights}{The vector of penalty weights.}
#'\item{y_d}{A vector containing the dependent variable, i.e. the differences of y.}
#'
#'@examples
#'
#'#Estimate a model for unemployment and ten google trends
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'
#'#Estimate a CECM with 1 lagged differences
#'my_specs <- specs(y,x,p=1)
#'
#'#Estimate a CECM with 1 lagged differences and no group penalty
#'my_specs2 <- specs(y,x,p=1,lambda_g=0)
#'
#'#Estimate an autoregressive distributed lag model with 2 lagged differences
#'my_specs3 <- specs(y,x,ADL=TRUE,p=2)
#'@export
specs <- function(y,x,p=1,deterministics = c("constant","trend","both","none"),
ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,lambda_g=NULL,lambda_i=NULL,
thresh=1e-4,max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {
#Dimensions
if(!is.null(x) & is.null(ncol(x))){
x <- as.matrix(x)
}
t <- length(y); n <- ncol(x)+1; m <- (p+2)*n-1
#Set deterministic choice
deterministics <- match.arg(deterministics)
#Check group lambda sequence
if(ADL){
m <- m-n
lambda_g <- 0 #No group penalty for ADL model
}else if(is.null(lambda_g)){
lambda_g <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_g<0)){
stop("lambda_g cannot contain negative values")
}else if(!is.numeric(lambda_g)){
stop("lambda_g has to be numeric")
}
#Check individual lambda sequence
if(is.null(lambda_i)){
lambda_i <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_i<0)){
stop("lambda_i cannot contain negative values")
}else if(!is.numeric(lambda_i)){
stop("lambda_i has to be numeric")
}
#Pass weights onto Rcpp
if(is.numeric(weights)){
if(length(weights)!= m){
stop(paste("Number of weights (",length(weights),
") does not equal the number of stochastic variables (",m,")",sep=""))
}else if(is.numeric(weights) & any(weights<0)){
stop(paste("Negative weights are not allowed"))
}
}else if(is.character(weights)){
weights <- match.arg(weights)
if(weights == "ols" & t<(m-1)){
stop("Dimension is too large to use ols as an initial estimator.")
} else if(weights == "ols"){
weights <- -1 #Rcpp will compute weights via ols or ridge
} else if(weights == "ridge") {
weights <- -2
}else if(weights== "none"){
weights <- matrix(1,m)
}
}else{
stop("Invalid class of weights")
}
output <- specs_rcpp(y,x,p,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
max_iter_delta,max_iter_pi,max_iter_gamma)
output <- output[sort(names(output))]
output
}
#'SPECS with data transformation and penalty optimization
#'
#'This function estimates SPECS and selects the optimal penalty parameter based on a selection rule. All arguments correspond
#'to those of the function specs(...), but it contains the additional arguments rule and CV_cutoff.
#'Selection of the penalty parameter can be carried out by BIC or AIC or by time series cross-validation (TSCV). The degrees
#'of freedom for the information criteria (BIC or AIC) are approximated by the number of non-zero coefficients in the
#'estimated model. TSCV cuts the sample in two, based on the argument CV_cutoff which determines the proportion of
#'the training sample. SPECS is estimated on the first part and the estimated model is used to predict the values
#'in the second part. The selection is then based on the lowest Mean-Squared Forecast Error (MSFE) obtained over the test sample.
#'
#'@param y A vector containing the dependent variable in levels.
#'@param x A matrix containing the independent variables in levels.
#'@param p Integer indicating the desired number of lagged differences to include. Default is 1.
#'@param rule A charcater object indicating which selection rule the optimal choice of the penalty parameters is based on.
#'Default is "BIC".
#'@param CV_cutoff A numeric value between 0 and 1 that decides the proportion of the training sample as a fraction of the complete
#'sample. Applies only when rule="TSCV". Default is 2/3.
#'@param deterministics A character object indicating which deterministic variables should be added
#'("none","constant","trend","both"). Default is "constant".
#'@param ADL Logical object indicating whether an ADL model without error-correction term should be estimated.
#'Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Default is \eqn{10^5}.
#'@param max_iter_pi Maximum number of updates for pi. Default is \eqn{10^5}.
#'@param max_iter_gamma Maximum number of updates for gamma. Default is \eqn{10^5}.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{gamma_opt}{A vector containing the estimated coefficients corresponding to the optimal model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{Mv}{A matrix containing the independent variables, after regressing out the deterministic components.}
#'\item{My_d}{A vector containing the dependent variable, after regressing out the deterministic components.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{theta_opt}{The estimated coefficients for the constant and trend in the optimal model.}
#'\item{v}{A matrix containing the independent variables (excluding deterministic components).}
#'\item{weights}{The vector of penalty weights.}
#'\item{y_d}{A vector containing the dependent variable, i.e. the differences of y.}
#'
#'@examples
#'
#'#Estimate an automatically optimized model for unemployment and ten google trends
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'
#'#Estimate a CECM with 1 lagged difference and penalty chosen by the minimum BIC
#'my_specs <- specs_opt(y,x,p=1,rule="BIC")
#'coefs <- my_specs$gamma_opt
#'@export
specs_opt <- function(y,x,p=1,rule=c("BIC","AIC","TSCV"),CV_cutoff=2/3,deterministics = c("constant","trend","both","none"),
ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,lambda_g=NULL,lambda_i=NULL,
thresh=1e-4,max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {
#Dimensions
if(!is.null(x) & is.null(ncol(x))){
x <- as.matrix(x)
}
t <- length(y); n <- ncol(x)+1; m <- (p+2)*n-1
#Set deterministic choice
deterministics <- match.arg(deterministics)
#Check group lambda sequence
if(ADL){
m <- m-n
lambda_g <- 0 #No group penalty for ADL model
}else if(is.null(lambda_g)){
lambda_g <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_g<0)){
stop("lambda_g cannot contain negative values")
}else if(!is.numeric(lambda_g)){
stop("lambda_g has to be numeric")
}
#Check individual lambda sequence
if(is.null(lambda_i)){
lambda_i <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_i<0)){
stop("lambda_i cannot contain negative values")
}else if(!is.numeric(lambda_i)){
stop("lambda_i has to be numeric")
}
#Pass weights onto Rcpp
if(is.numeric(weights)){
if(length(weights)!= m){
stop(paste("Number of weights (",length(weights),
") does not equal the number of stochastic variables (",m,")",sep=""))
}else if(is.numeric(weights) & any(weights<0)){
stop(paste("Negative weights are not allowed"))
}
}else if(is.character(weights)){
weights <- match.arg(weights)
if(weights == "ols" & t<(m-1)){
stop("Dimension is too large to use ols as an initial estimator.")
} else if(weights == "ols"){
weights <- -1 #Rcpp will compute weights via ols or ridge
} else if(weights == "ridge") {
weights <- -2
}else if(weights== "none"){
weights <- matrix(1,m)
}
}else{
stop("Invalid class of weights")
}
#Obtain output for various selection rules
rule = match.arg(rule)
if(rule %in% c("BIC","AIC")){
output <- specs_rcpp(y,x,p,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
max_iter_delta,max_iter_pi,max_iter_gamma)
y_d <- output$y_d; X <- cbind(output$D,output$v);
gammas <- output$gammas
coefs <- rbind(output$thetas,gammas)
resids <- matrix(rep(y_d,ncol(coefs)),length(y_d)) - X%*%coefs
log_variances <- log(colMeans(resids^2))
ps <- apply(gammas,2,function(x){sum(x!=0)})
if(rule == "BIC"){
ICs <- log_variances + ps*log(length(y_d))/length(y_d)
}else{
ICs <- log_variances + 2*ps/length(y_d)
}
n_opt <- which.min(ICs) #index of optimal coefficients and penalty
lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
gamma_opt <- gammas[,n_opt] #optimal gamma
d_opt <- output$thetas[,n_opt] #optimal deterministics
lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
names(lambda_opt) <- c("lambda_g","lambda_i")
#Add optimized values to list
output[["gamma_opt"]] <- gamma_opt
output[["d_opt"]] <- d_opt
output[["lambda_opt"]] <- lambda_opt
output <- output[sort(names(output))]
}else{
#Estimate SPECS on training data
data <- cecm(y,x,p,ADL) #transform whole dataset to CECM
y_d <- data$y_d; z_l <- data$z_l; w <- data$w;
t <- length(y_d); t_train <- floor(length(y_d)*CV_cutoff)
y_d_train <- y_d[1:t_train]; z_l_train <- z_l[1:t_train,]; w_train <- w[1:t_train,]#create training data
output <- specs_tr_rcpp(y_d_train,z_l_train,w_train,deterministics,ADL,weights,k_delta,k_pi,
lambda_g,lambda_i,thresh,max_iter_delta,max_iter_pi,max_iter_gamma)
#Evaluate performance on test data
y_d_test <- y_d[(t_train+1):t]; v_test <- cbind(z_l[(t_train+1):t,],w[(t_train+1):t,])
d_test <- matrix(cbind(1,(t_train+1):t),ncol=2)
t_test <- length(y_d_test)
coefs <- rbind(output$thetas,output$gammas)
resids <- matrix(rep(y_d_test,ncol(coefs)),t_test) - cbind(d_test,v_test)%*%coefs
msfes <- colMeans(resids^2)
#Obtain optimal solution according to CV result
n_opt <- which.min(msfes)
lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
names(lambda_opt) <- c("lambda_g","lambda_i")
fit_full <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,
lambda_g=lambda_opt[1],lambda_i=lambda_opt[2],thresh,max_iter_delta,max_iter_pi,max_iter_gamma)
gamma_opt <- fit_full$gammas #optimal gamma
theta_opt <- fit_full$theta #optimal deterministics
#Add optimized values to list
output[["gamma_opt"]] <- gamma_opt
output[["theta_opt"]] <- theta_opt
output[["lambda_opt"]] <- lambda_opt
output <- output[sort(names(output))]
}
output
}
#'SPECS on pre-transformed data
#'
#'This function computes the Single-equation Penalized Error Correction Selector
#'as described in Smeekes and Wijler (2020) based on data that is already in the form of
#'a conditional error-correction model.
#'
#'@param y_d A vector containing the differences of the dependent variable.
#'@param z_l A matrix containing the lagged levels.
#'@param w A matrix containing the required difference
#'@param deterministics Indicates which deterministic variables should be added (0 = none, 1=constant, 2=constant and linear trend).
#'@param ADL Boolean indicating whether an ADL model without error-correction term should be estimated. Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Defaults is 1e5.
#'@param max_iter_pi Maximum number of updates for pi. Defaults is 1e5.
#'@param max_iter_gamma Maximum number of updates for gamma. Defaults is 1e5.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{gamma_opt}{A vector containing the estimated coefficients corresponding to the optimal model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{theta_opt}{The estimated coefficients for the constant and trend in the optimal model.}
#'\item{weights}{The vector of penalty weights.}
#'
#'@examples
#'
#'#Estimate a conditional error-correction model on pre-transformed data with a constant
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'y_d <- y[-1]-y[-100]
#'z_l <- cbind(y[-100],x[-100,])
#'w <- x[-1,]-x[-100,] #This w corresponds to a cecm with p=0 lagged differences
#'
#'my_specs <- specs_tr(y_d,z_l,w,deterministics="constant")
#'
#'#Estimate an ADL model on pre-transformed data with a constant
#'
#'my_specs <- specs_tr(y_d,NULL,w,ADL=TRUE,deterministics="constant")
#'@export
specs_tr <- function(y_d,z_l=NULL,w,deterministics = c("constant","trend","both","none"),
ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,
lambda_g=NULL,lambda_i=NULL,thresh=1e-4,
max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {
#Data check + dimensions
if(!is.null(z_l) & is.null(ncol(z_l))){
z_l <- as.matrix(z_l)
}else if(is.null(z_l)& !ADL ){
stop("z_l cannot be missing when ADL=FALSE")
}else if(is.null(z_l)){
z_l <- matrix(0,length(y_d))
}
t <- length(y_d); n <- ncol(z_l); m <- n+ncol(w)
#Set deterministic choice
deterministics <- match.arg(deterministics)
#Check group lambda sequence
if(ADL){
m <- ncol(w)
lambda_g <- 0 #No group penalty for ADL model
}else if(is.null(lambda_g)){
lambda_g <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_g<0)){
stop("lambda_g cannot contain negative values")
}else if(!is.numeric(lambda_g)){
stop("lambda_g has to be numeric")
}
#Check individual lambda sequence
if(is.null(lambda_i)){
lambda_i <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_i<0)){
stop("lambda_i cannot contain negative values")
}else if(!is.numeric(lambda_i)){
stop("lambda_i has to be numeric")
}
#Pass weights onto Rcpp
if(is.numeric(weights)){
if(length(weights)!= m){
stop(paste("Number of weights (",length(weights),
") does not equal the number of stochastic variables (",m,")",sep=""))
}else if(is.numeric(weights) & any(weights<0)){
stop(paste("Negative weights are not allowed"))
}
}else if(is.character(weights)){
weights <- match.arg(weights)
if(weights == "ols" & t<(m-1)){
stop("Dimension is too large to use ols as an initial estimator.")
} else if(weights == "ols"){
weights <- -1 #Rcpp will compute weights via ols or ridge
} else if(weights == "ridge") {
weights <- -2
}else if(weights== "none"){
weights <- matrix(1,m)
}
}else{
stop("Invalid class of weights")
}
output <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
max_iter_delta,max_iter_pi,max_iter_gamma)
output <- output[sort(names(output))]
output
}
#'SPECS with data transformation and penalty optimization
#'
#'The same function as specs_tr(...), but on data that is pre-transformed to a CECM.
#'
#'@param y_d A vector containing the differences of the dependent variable.
#'@param z_l A matrix containing the lagged levels.
#'@param w A matrix containing the required difference.
#'@param rule A charcater object indicating which selection rule the optimal choice of the penalty parameters is based on.
#'Default is "BIC".
#'@param CV_cutoff A numeric value between 0 and 1 that decides the proportion of the training sample as a fraction of the complete
#'sample. Applies only when rule="TSCV". Default is 2/3.
#'@param deterministics A character object indicating which deterministic variables should be added
#'("none","constant","trend","both"). Default is "constant".
#'@param ADL Logical object indicating whether an ADL model without error-correction term should be estimated.
#'Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Default is \eqn{10^5}.
#'@param max_iter_pi Maximum number of updates for pi. Default is \eqn{10^5}.
#'@param max_iter_gamma Maximum number of updates for gamma. Default is \eqn{10^5}.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{gamma_opt}{A vector containing the estimated coefficients corresponding to the optimal model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{theta_opt}{The estimated coefficients for the constant and trend in the optimal model.}
#'\item{v}{A matrix containing the independent variables (excluding deterministic components).}
#'\item{weights}{The vector of penalty weights.}
#'\item{y_d}{A vector containing the dependent variable, i.e. the differences of y.}
#'
#'@examples
#'#Estimate a CECM with a constant, ols initial weights and penalty chosen by the minimum AIC
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'y_d <- y[-1]-y[-100]
#'z_l <- cbind(y[-100],x[-100,])
#'w <- x[-1,]-x[-100,] #This w corresponds to a cecm with p=0 lagged differences
#'
#'my_specs <- specs_tr_opt(y_d,z_l,w,rule="AIC",weights="ols",deterministics="constant")
#'@export
specs_tr_opt <- function(y_d,z_l=NULL,w,rule=c("BIC","AIC","TSCV"),CV_cutoff=2/3,deterministics = c("constant","trend","both","none"),
ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,lambda_g=NULL,lambda_i=NULL,
thresh=1e-4,max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {
#Data check + dimensions
if(!is.null(z_l) & is.null(ncol(z_l))){
z_l <- as.matrix(z_l)
}else if(is.null(z_l)& !ADL ){
stop("z_l cannot be missing when ADL=FALSE")
}else if(is.null(z_l)){
z_l <- matrix(0,length(y_d))
}
z_l <- as.matrix(z_l); w <- as.matrix(w);
t <- length(y_d); n <- ncol(z_l); m <- n+ncol(w)
#Set deterministic choice
deterministics <- match.arg(deterministics)
#Check group lambda sequence
if(ADL){
m <- ncol(w)
lambda_g <- 0 #No group penalty for ADL model
}else if(is.null(lambda_g)){
lambda_g <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_g<0)){
stop("lambda_g cannot contain negative values")
}else if(!is.numeric(lambda_g)){
stop("lambda_g has to be numeric")
}
#Check individual lambda sequence
if(is.null(lambda_i)){
lambda_i <- -1 #This creates an automated sequence in Rcpp
}else if(any(lambda_i<0)){
stop("lambda_i cannot contain negative values")
}else if(!is.numeric(lambda_i)){
stop("lambda_i has to be numeric")
}
#Pass weights onto Rcpp
if(is.numeric(weights)){
if(length(weights)!= m){
stop(paste("Number of weights (",length(weights),
") does not equal the number of stochastic variables (",m,")",sep=""))
}else if(is.numeric(weights) & any(weights<0)){
stop(paste("Negative weights are not allowed"))
}
}else if(is.character(weights)){
weights <- match.arg(weights)
if(weights == "ols" & t<(m-1)){
stop("Dimension is too large to use ols as an initial estimator.")
} else if(weights == "ols"){
weights <- -1 #Rcpp will compute weights via ols or ridge
} else if(weights == "ridge") {
weights <- -2
}else if(weights== "none"){
weights <- matrix(1,m)
}
}else{
stop("Invalid class of weights")
}
#Obtain output for various selection rules
rule = match.arg(rule)
if(rule %in% c("BIC","AIC")){
output <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
max_iter_delta,max_iter_pi,max_iter_gamma)
y_d <- output$y_d; X <- cbind(output$D,output$v);
gammas <- output$gammas
coefs <- rbind(output$thetas,gammas)
resids <- matrix(rep(y_d,ncol(coefs)),length(y_d)) - X%*%coefs
log_variances <- log(colMeans(resids^2))
ps <- apply(gammas,2,function(x){sum(x!=0)})
if(rule == "BIC"){
ICs <- log_variances + ps*log(length(y_d))/length(y_d)
}else{
ICs <- log_variances + 2*ps/length(y_d)
}
n_opt <- which.min(ICs) #index of optimal coefficients and penalty
lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
gamma_opt <- gammas[,n_opt] #optimal gamma
d_opt <- output$thetas[,n_opt] #optimal deterministics
lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
names(lambda_opt) <- c("lambda_g","lambda_i")
#Add optimized values to list
output[["gamma_opt"]] <- gamma_opt
output[["d_opt"]] <- d_opt
output[["lambda_opt"]] <- lambda_opt
output <- output[sort(names(output))]
}else{
#Estimate SPECS on training data
t <- length(y_d); t_train <- floor(length(y_d)*CV_cutoff)
y_d_train <- y_d[1:t_train]; z_l_train <- z_l[1:t_train,]; w_train <- w[1:t_train,]#create training data
output <- specs_tr_rcpp(y_d_train,z_l_train,w_train,deterministics,ADL,weights,k_delta,k_pi,
lambda_g,lambda_i,thresh,max_iter_delta,max_iter_pi,max_iter_gamma)
#Evaluate performance on test data
y_d_test <- y_d[(t_train+1):t]; v_test <- cbind(z_l[(t_train+1):t,],w[(t_train+1):t,])
d_test <- matrix(cbind(1,(t_train+1):t),ncol=2)
t_test <- length(y_d_test)
coefs <- rbind(output$thetas,output$gammas)
resids <- matrix(rep(y_d_test,ncol(coefs)),t_test) - cbind(d_test,v_test)%*%coefs
msfes <- colMeans(resids^2)
#Obtain optimal solution according to CV result
n_opt <- which.min(msfes)
lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
names(lambda_opt) <- c("lambda_g","lambda_i")
fit_full <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,
lambda_g=lambda_opt[1],lambda_i=lambda_opt[2],thresh,max_iter_delta,max_iter_pi,max_iter_gamma)
gamma_opt <- fit_full$gammas #optimal gamma
theta_opt <- fit_full$theta #optimal deterministics
#Add optimized values to list
output[["gamma_opt"]] <- gamma_opt
output[["theta_opt"]] <- theta_opt
output[["lambda_opt"]] <- lambda_opt
output <- output[sort(names(output))]
}
output
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.