R/specs.R
In specs: Single-Equation Penalized Error-Correction Selector (SPECS)

Documented in specs specs_opt specs_tr specs_tr_opt

#'SPECS
#'
#'This function estimates the Single-equation Penalized Error Correction Selector
#'as described in Smeekes and Wijler (2020). The function takes a dependent variable \eqn{y} and a matrix of independent
#'variables x as input, and transforms it to a conditional error correction model. This model is estimated by means of
#'penalized regression, involving \eqn{L1}-penalty on individual coefficients and a potential \eqn{L2}-penalty
#'on the coefficients of the lagged levels in the model, see Smeekes and Wijler (2020) for details.
#'
#'The function can generate an automated sequence of penalty parameters and offers the option to compute and include adaptive
#'penalty weights. In addition, it is possible to estimate a penalized ADL model in differences by excluding the lagged levels
#'from the model. For automated selection of an optimal penalty value, see the function specs_opt(...).
#'
#'@param y A vector containing the dependent variable in levels.
#'@param x A matrix containing the independent variables in levels.
#'@param p Integer indicating the desired number of lagged differences to include. Default is 1.
#'@param deterministics A character object indicating which deterministic variables should be added
#'("none","constant","trend","both"). Default is "constant".
#'@param ADL Logical object indicating whether an ADL model without error-correction term should be estimated.
#'Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied or no weights can be applied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Default is \eqn{10^5}.
#'@param max_iter_pi Maximum number of updates for pi. Default is \eqn{10^5}.
#'@param max_iter_gamma Maximum number of updates for gamma. Default is \eqn{10^5}.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{Mv}{A matrix containing the independent variables, after regressing out the deterministic components.}
#'\item{My_d}{A vector containing the dependent variable, after regressing out the deterministic components.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{v}{A matrix containing the independent variables (excluding deterministic components).}
#'\item{weights}{The vector of penalty weights.}
#'\item{y_d}{A vector containing the dependent variable, i.e. the differences of y.}
#'
#'@examples
#'
#'#Estimate a model for unemployment and ten google trends
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'
#'#Estimate a CECM with 1 lagged differences
#'my_specs <- specs(y,x,p=1)
#'
#'#Estimate a CECM with 1 lagged differences and no group penalty
#'my_specs2 <- specs(y,x,p=1,lambda_g=0)
#'
#'#Estimate an autoregressive distributed lag model with 2 lagged differences
#'my_specs3 <- specs(y,x,ADL=TRUE,p=2)
#'@export
specs <- function(y,x,p=1,deterministics = c("constant","trend","both","none"),
                  ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,lambda_g=NULL,lambda_i=NULL,
                  thresh=1e-4,max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {

    #Dimensions
    if(!is.null(x) & is.null(ncol(x))){
        x <- as.matrix(x)
    }
    t <- length(y); n <- ncol(x)+1; m <- (p+2)*n-1

    #Set deterministic choice
    deterministics <- match.arg(deterministics)

    #Check group lambda sequence
    if(ADL){
        m <- m-n
        lambda_g <- 0 #No group penalty for ADL model
    }else if(is.null(lambda_g)){
        lambda_g <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_g<0)){
        stop("lambda_g cannot contain negative values")
    }else if(!is.numeric(lambda_g)){
        stop("lambda_g has to be numeric")
    }

    #Check individual lambda sequence
    if(is.null(lambda_i)){
        lambda_i <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_i<0)){
        stop("lambda_i cannot contain negative values")
    }else if(!is.numeric(lambda_i)){
        stop("lambda_i has to be numeric")
    }

    #Pass weights onto Rcpp
    if(is.numeric(weights)){
        if(length(weights)!= m){
            stop(paste("Number of weights (",length(weights),
                       ") does not equal the number of stochastic variables (",m,")",sep=""))
        }else if(is.numeric(weights) & any(weights<0)){
            stop(paste("Negative weights are not allowed"))
        }
    }else if(is.character(weights)){
        weights <- match.arg(weights)
        if(weights == "ols" & t<(m-1)){
            stop("Dimension is too large to use ols as an initial estimator.")
        } else if(weights == "ols"){
            weights <- -1 #Rcpp will compute weights via ols or ridge
        } else if(weights == "ridge") {
            weights <- -2
        }else if(weights== "none"){
            weights <- matrix(1,m)
        }
    }else{
        stop("Invalid class of weights")
    }

    output <- specs_rcpp(y,x,p,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
                         max_iter_delta,max_iter_pi,max_iter_gamma)
    output <- output[sort(names(output))]
    output
}

#'SPECS with data transformation and penalty optimization
#'
#'This function estimates SPECS and selects the optimal penalty parameter based on a selection rule. All arguments correspond
#'to those of the function specs(...), but it contains the additional arguments rule and CV_cutoff.
#'Selection of the penalty parameter can be carried out by BIC or AIC or by time series cross-validation (TSCV). The degrees
#'of freedom for the information criteria (BIC or AIC) are approximated by the number of non-zero coefficients in the
#'estimated model. TSCV cuts the sample in two, based on the argument CV_cutoff which determines the proportion of
#'the training sample. SPECS is estimated on the first part and the estimated model is used to predict the values
#'in the second part. The selection is then based on the lowest Mean-Squared Forecast Error (MSFE) obtained over the test sample.
#'
#'@param y A vector containing the dependent variable in levels.
#'@param x A matrix containing the independent variables in levels.
#'@param p Integer indicating the desired number of lagged differences to include. Default is 1.
#'@param rule A charcater object indicating which selection rule the optimal choice of the penalty parameters is based on.
#'Default is "BIC".
#'@param CV_cutoff A numeric value between 0 and 1 that decides the proportion of the training sample as a fraction of the complete
#'sample. Applies only when rule="TSCV". Default is 2/3.
#'@param deterministics A character object indicating which deterministic variables should be added
#'("none","constant","trend","both"). Default is "constant".
#'@param ADL Logical object indicating whether an ADL model without error-correction term should be estimated.
#'Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Default is \eqn{10^5}.
#'@param max_iter_pi Maximum number of updates for pi. Default is \eqn{10^5}.
#'@param max_iter_gamma Maximum number of updates for gamma. Default is \eqn{10^5}.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{gamma_opt}{A vector containing the estimated coefficients corresponding to the optimal model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{Mv}{A matrix containing the independent variables, after regressing out the deterministic components.}
#'\item{My_d}{A vector containing the dependent variable, after regressing out the deterministic components.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{theta_opt}{The estimated coefficients for the constant and trend in the optimal model.}
#'\item{v}{A matrix containing the independent variables (excluding deterministic components).}
#'\item{weights}{The vector of penalty weights.}
#'\item{y_d}{A vector containing the dependent variable, i.e. the differences of y.}
#'
#'@examples
#'
#'#Estimate an automatically optimized model for unemployment and ten google trends
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'
#'#Estimate a CECM with 1 lagged difference and penalty chosen by the minimum BIC
#'my_specs <- specs_opt(y,x,p=1,rule="BIC")
#'coefs <- my_specs$gamma_opt
#'@export
specs_opt <- function(y,x,p=1,rule=c("BIC","AIC","TSCV"),CV_cutoff=2/3,deterministics = c("constant","trend","both","none"),
                  ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,lambda_g=NULL,lambda_i=NULL,
                  thresh=1e-4,max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {

    #Dimensions
    if(!is.null(x) & is.null(ncol(x))){
        x <- as.matrix(x)
    }
    t <- length(y); n <- ncol(x)+1; m <- (p+2)*n-1

    #Set deterministic choice
    deterministics <- match.arg(deterministics)

    #Check group lambda sequence
    if(ADL){
        m <- m-n
        lambda_g <- 0 #No group penalty for ADL model
    }else if(is.null(lambda_g)){
        lambda_g <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_g<0)){
        stop("lambda_g cannot contain negative values")
    }else if(!is.numeric(lambda_g)){
        stop("lambda_g has to be numeric")
    }

    #Check individual lambda sequence
    if(is.null(lambda_i)){
        lambda_i <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_i<0)){
        stop("lambda_i cannot contain negative values")
    }else if(!is.numeric(lambda_i)){
        stop("lambda_i has to be numeric")
    }

    #Pass weights onto Rcpp
    if(is.numeric(weights)){
        if(length(weights)!= m){
            stop(paste("Number of weights (",length(weights),
                       ") does not equal the number of stochastic variables (",m,")",sep=""))
        }else if(is.numeric(weights) & any(weights<0)){
            stop(paste("Negative weights are not allowed"))
        }
    }else if(is.character(weights)){
        weights <- match.arg(weights)
        if(weights == "ols" & t<(m-1)){
            stop("Dimension is too large to use ols as an initial estimator.")
        } else if(weights == "ols"){
            weights <- -1 #Rcpp will compute weights via ols or ridge
        } else if(weights == "ridge") {
            weights <- -2
        }else if(weights== "none"){
            weights <- matrix(1,m)
        }
    }else{
        stop("Invalid class of weights")
    }

    #Obtain output for various selection rules
    rule = match.arg(rule)
    if(rule %in% c("BIC","AIC")){
        output <- specs_rcpp(y,x,p,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
                             max_iter_delta,max_iter_pi,max_iter_gamma)
        y_d <- output$y_d; X <- cbind(output$D,output$v);
        gammas <- output$gammas
        coefs <- rbind(output$thetas,gammas)
        resids <- matrix(rep(y_d,ncol(coefs)),length(y_d)) - X%*%coefs
        log_variances <- log(colMeans(resids^2))
        ps <- apply(gammas,2,function(x){sum(x!=0)})
        if(rule == "BIC"){
            ICs <- log_variances + ps*log(length(y_d))/length(y_d)
        }else{
            ICs <- log_variances + 2*ps/length(y_d)
        }
        n_opt <- which.min(ICs) #index of optimal coefficients and penalty
        lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
        gamma_opt <- gammas[,n_opt] #optimal gamma
        d_opt <- output$thetas[,n_opt] #optimal deterministics
        lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
        names(lambda_opt) <- c("lambda_g","lambda_i")

        #Add optimized values to list
        output[["gamma_opt"]] <- gamma_opt
        output[["d_opt"]] <- d_opt
        output[["lambda_opt"]] <- lambda_opt
        output <- output[sort(names(output))]

    }else{

        #Estimate SPECS on training data
        data <- cecm(y,x,p,ADL) #transform whole dataset to CECM
        y_d <- data$y_d; z_l <- data$z_l; w <- data$w;
        t <- length(y_d); t_train <- floor(length(y_d)*CV_cutoff)
        y_d_train <- y_d[1:t_train]; z_l_train <- z_l[1:t_train,]; w_train <- w[1:t_train,]#create training data
        output <- specs_tr_rcpp(y_d_train,z_l_train,w_train,deterministics,ADL,weights,k_delta,k_pi,
                                lambda_g,lambda_i,thresh,max_iter_delta,max_iter_pi,max_iter_gamma)

        #Evaluate performance on test data
        y_d_test <- y_d[(t_train+1):t]; v_test <- cbind(z_l[(t_train+1):t,],w[(t_train+1):t,])
        d_test <- matrix(cbind(1,(t_train+1):t),ncol=2)
        t_test <- length(y_d_test)
        coefs <- rbind(output$thetas,output$gammas)
        resids <- matrix(rep(y_d_test,ncol(coefs)),t_test) - cbind(d_test,v_test)%*%coefs
        msfes <- colMeans(resids^2)

        #Obtain optimal solution according to CV result
        n_opt <- which.min(msfes)
        lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
        lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
        names(lambda_opt) <- c("lambda_g","lambda_i")
        fit_full <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,
                   lambda_g=lambda_opt[1],lambda_i=lambda_opt[2],thresh,max_iter_delta,max_iter_pi,max_iter_gamma)
        gamma_opt <- fit_full$gammas #optimal gamma
        theta_opt <- fit_full$theta #optimal deterministics

        #Add optimized values to list
        output[["gamma_opt"]] <- gamma_opt
        output[["theta_opt"]] <- theta_opt
        output[["lambda_opt"]] <- lambda_opt
        output <- output[sort(names(output))]
    }
    output
}

#'SPECS on pre-transformed data
#'
#'This function computes the Single-equation Penalized Error Correction Selector
#'as described in Smeekes and Wijler (2020) based on data that is already in the form of
#'a conditional error-correction model.
#'
#'@param y_d A vector containing the differences of the dependent variable.
#'@param z_l A matrix containing the lagged levels.
#'@param w A matrix containing the required difference
#'@param deterministics Indicates which deterministic variables should be added (0 = none, 1=constant, 2=constant and linear trend).
#'@param ADL Boolean indicating whether an ADL model without error-correction term should be estimated. Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Defaults is 1e5.
#'@param max_iter_pi Maximum number of updates for pi. Defaults is 1e5.
#'@param max_iter_gamma Maximum number of updates for gamma. Defaults is 1e5.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{gamma_opt}{A vector containing the estimated coefficients corresponding to the optimal model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{theta_opt}{The estimated coefficients for the constant and trend in the optimal model.}
#'\item{weights}{The vector of penalty weights.}
#'
#'@examples
#'
#'#Estimate a conditional error-correction model on pre-transformed data with a constant
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'y_d <- y[-1]-y[-100]
#'z_l <- cbind(y[-100],x[-100,])
#'w <- x[-1,]-x[-100,] #This w corresponds to a cecm with p=0 lagged differences
#'
#'my_specs <- specs_tr(y_d,z_l,w,deterministics="constant")
#'
#'#Estimate an ADL model on pre-transformed data with a constant
#'
#'my_specs <- specs_tr(y_d,NULL,w,ADL=TRUE,deterministics="constant")
#'@export
specs_tr <- function(y_d,z_l=NULL,w,deterministics = c("constant","trend","both","none"),
                       ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,
                       lambda_g=NULL,lambda_i=NULL,thresh=1e-4,
                       max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {

    #Data check + dimensions
    if(!is.null(z_l) & is.null(ncol(z_l))){
        z_l <- as.matrix(z_l)
    }else if(is.null(z_l)& !ADL ){
        stop("z_l cannot be missing when ADL=FALSE")
    }else if(is.null(z_l)){
        z_l <- matrix(0,length(y_d))
    }
    t <- length(y_d); n <- ncol(z_l); m <- n+ncol(w)

    #Set deterministic choice
    deterministics <- match.arg(deterministics)

    #Check group lambda sequence
    if(ADL){
        m <- ncol(w)
        lambda_g <- 0 #No group penalty for ADL model
    }else if(is.null(lambda_g)){
        lambda_g <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_g<0)){
        stop("lambda_g cannot contain negative values")
    }else if(!is.numeric(lambda_g)){
        stop("lambda_g has to be numeric")
    }

    #Check individual lambda sequence
    if(is.null(lambda_i)){
        lambda_i <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_i<0)){
        stop("lambda_i cannot contain negative values")
    }else if(!is.numeric(lambda_i)){
        stop("lambda_i has to be numeric")
    }

    #Pass weights onto Rcpp
    if(is.numeric(weights)){
        if(length(weights)!= m){
            stop(paste("Number of weights (",length(weights),
                       ") does not equal the number of stochastic variables (",m,")",sep=""))
        }else if(is.numeric(weights) & any(weights<0)){
            stop(paste("Negative weights are not allowed"))
        }
    }else if(is.character(weights)){
        weights <- match.arg(weights)
        if(weights == "ols" & t<(m-1)){
            stop("Dimension is too large to use ols as an initial estimator.")
        } else if(weights == "ols"){
            weights <- -1 #Rcpp will compute weights via ols or ridge
        } else if(weights == "ridge") {
            weights <- -2
        }else if(weights== "none"){
            weights <- matrix(1,m)
        }
    }else{
        stop("Invalid class of weights")
    }

    output <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
                         max_iter_delta,max_iter_pi,max_iter_gamma)
    output <- output[sort(names(output))]
    output
}

#'SPECS with data transformation and penalty optimization
#'
#'The same function as specs_tr(...), but on data that is pre-transformed to a CECM.
#'
#'@param y_d A vector containing the differences of the dependent variable.
#'@param z_l A matrix containing the lagged levels.
#'@param w A matrix containing the required difference.
#'@param rule A charcater object indicating which selection rule the optimal choice of the penalty parameters is based on.
#'Default is "BIC".
#'@param CV_cutoff A numeric value between 0 and 1 that decides the proportion of the training sample as a fraction of the complete
#'sample. Applies only when rule="TSCV". Default is 2/3.
#'@param deterministics A character object indicating which deterministic variables should be added
#'("none","constant","trend","both"). Default is "constant".
#'@param ADL Logical object indicating whether an ADL model without error-correction term should be estimated.
#'Default is FALSE.
#'@param weights Choice of penalty weights. The weights can be automatically generated by ridge regression (default)
#' or ols. Alternatively, a conformable vector of non-negative weights can be supplied.
#'@param k_delta The power to which the weights for delta should be raised, if weights are set to "ridge" or "ols".
#'@param k_pi The power to which the weights for pi should be raised, if weights are set to "ridge" or "ols".
#'@param lambda_g An optional user-specified grid for the group penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param lambda_i An optional user-specified grid for the individual penalty may be supplied. If left empty,
#'a 10-dimensional grid containing 0 as the minimum value is generated.
#'@param thresh The treshold for convergence.
#'@param max_iter_delta Maximum number of updates for delta. Default is \eqn{10^5}.
#'@param max_iter_pi Maximum number of updates for pi. Default is \eqn{10^5}.
#'@param max_iter_gamma Maximum number of updates for gamma. Default is \eqn{10^5}.
#'
#'@return
#'\item{D}{A matrix containing the deterministic variables included in the model.}
#'\item{gammas}{A matrix containing the estimated coefficients of
#'the stochastic variables in the conditional error-correction model.}
#'\item{gamma_opt}{A vector containing the estimated coefficients corresponding to the optimal model.}
#'\item{lambda_g}{The grid of group penalties.}
#'\item{lambda_i}{The grid of individual penalties.}
#'\item{theta}{The estimated coefficients for the constant and trend. If a deterministic component is excluded,
#'its coefficient is set to zero.}
#'\item{theta_opt}{The estimated coefficients for the constant and trend in the optimal model.}
#'\item{v}{A matrix containing the independent variables (excluding deterministic components).}
#'\item{weights}{The vector of penalty weights.}
#'\item{y_d}{A vector containing the dependent variable, i.e. the differences of y.}
#'
#'@examples
#'#Estimate a CECM with a constant, ols initial weights and penalty chosen by the minimum AIC
#'
#'#Organize data
#'y <- Unempl_GT[,1]
#'index_GT <- sample(c(2:ncol(Unempl_GT)),10)
#'x <- Unempl_GT[,index_GT]
#'y_d <- y[-1]-y[-100]
#'z_l <- cbind(y[-100],x[-100,])
#'w <- x[-1,]-x[-100,] #This w corresponds to a cecm with p=0 lagged differences
#'
#'my_specs <- specs_tr_opt(y_d,z_l,w,rule="AIC",weights="ols",deterministics="constant")
#'@export
specs_tr_opt <- function(y_d,z_l=NULL,w,rule=c("BIC","AIC","TSCV"),CV_cutoff=2/3,deterministics = c("constant","trend","both","none"),
                      ADL=FALSE,weights = c("ridge","ols","none"),k_delta=1,k_pi=1,lambda_g=NULL,lambda_i=NULL,
                      thresh=1e-4,max_iter_delta = 1e5, max_iter_pi = 1e5, max_iter_gamma = 1e5) {

    #Data check + dimensions
    if(!is.null(z_l) & is.null(ncol(z_l))){
        z_l <- as.matrix(z_l)
    }else if(is.null(z_l)& !ADL ){
        stop("z_l cannot be missing when ADL=FALSE")
    }else if(is.null(z_l)){
        z_l <- matrix(0,length(y_d))
    }
    z_l <- as.matrix(z_l); w <- as.matrix(w);
    t <- length(y_d); n <- ncol(z_l); m <- n+ncol(w)

    #Set deterministic choice
    deterministics <- match.arg(deterministics)

    #Check group lambda sequence
    if(ADL){
        m <- ncol(w)
        lambda_g <- 0 #No group penalty for ADL model
    }else if(is.null(lambda_g)){
        lambda_g <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_g<0)){
        stop("lambda_g cannot contain negative values")
    }else if(!is.numeric(lambda_g)){
        stop("lambda_g has to be numeric")
    }

    #Check individual lambda sequence
    if(is.null(lambda_i)){
        lambda_i <- -1 #This creates an automated sequence in Rcpp
    }else if(any(lambda_i<0)){
        stop("lambda_i cannot contain negative values")
    }else if(!is.numeric(lambda_i)){
        stop("lambda_i has to be numeric")
    }

    #Pass weights onto Rcpp
    if(is.numeric(weights)){
        if(length(weights)!= m){
            stop(paste("Number of weights (",length(weights),
                       ") does not equal the number of stochastic variables (",m,")",sep=""))
        }else if(is.numeric(weights) & any(weights<0)){
            stop(paste("Negative weights are not allowed"))
        }
    }else if(is.character(weights)){
        weights <- match.arg(weights)
        if(weights == "ols" & t<(m-1)){
            stop("Dimension is too large to use ols as an initial estimator.")
        } else if(weights == "ols"){
            weights <- -1 #Rcpp will compute weights via ols or ridge
        } else if(weights == "ridge") {
            weights <- -2
        }else if(weights== "none"){
            weights <- matrix(1,m)
        }
    }else{
        stop("Invalid class of weights")
    }

    #Obtain output for various selection rules
    rule = match.arg(rule)
    if(rule %in% c("BIC","AIC")){
        output <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,lambda_g,lambda_i,thresh,
                             max_iter_delta,max_iter_pi,max_iter_gamma)
        y_d <- output$y_d; X <- cbind(output$D,output$v);
        gammas <- output$gammas
        coefs <- rbind(output$thetas,gammas)
        resids <- matrix(rep(y_d,ncol(coefs)),length(y_d)) - X%*%coefs
        log_variances <- log(colMeans(resids^2))
        ps <- apply(gammas,2,function(x){sum(x!=0)})
        if(rule == "BIC"){
            ICs <- log_variances + ps*log(length(y_d))/length(y_d)
        }else{
            ICs <- log_variances + 2*ps/length(y_d)
        }
        n_opt <- which.min(ICs) #index of optimal coefficients and penalty
        lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
        gamma_opt <- gammas[,n_opt] #optimal gamma
        d_opt <- output$thetas[,n_opt] #optimal deterministics
        lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
        names(lambda_opt) <- c("lambda_g","lambda_i")

        #Add optimized values to list
        output[["gamma_opt"]] <- gamma_opt
        output[["d_opt"]] <- d_opt
        output[["lambda_opt"]] <- lambda_opt
        output <- output[sort(names(output))]

    }else{

        #Estimate SPECS on training data
        t <- length(y_d); t_train <- floor(length(y_d)*CV_cutoff)
        y_d_train <- y_d[1:t_train]; z_l_train <- z_l[1:t_train,]; w_train <- w[1:t_train,]#create training data
        output <- specs_tr_rcpp(y_d_train,z_l_train,w_train,deterministics,ADL,weights,k_delta,k_pi,
                                lambda_g,lambda_i,thresh,max_iter_delta,max_iter_pi,max_iter_gamma)

        #Evaluate performance on test data
        y_d_test <- y_d[(t_train+1):t]; v_test <- cbind(z_l[(t_train+1):t,],w[(t_train+1):t,])
        d_test <- matrix(cbind(1,(t_train+1):t),ncol=2)
        t_test <- length(y_d_test)
        coefs <- rbind(output$thetas,output$gammas)
        resids <- matrix(rep(y_d_test,ncol(coefs)),t_test) - cbind(d_test,v_test)%*%coefs
        msfes <- colMeans(resids^2)

        #Obtain optimal solution according to CV result
        n_opt <- which.min(msfes)
        lambda_g <- output$lambda_g; lambda_i <- output$lambda_i #lambda sequences
        lambda_opt <- as.numeric(expand.grid(lambda_i,lambda_g)[n_opt,c(2,1)]) #optimal penalties
        names(lambda_opt) <- c("lambda_g","lambda_i")
        fit_full <- specs_tr_rcpp(y_d,z_l,w,deterministics,ADL,weights,k_delta,k_pi,
                                  lambda_g=lambda_opt[1],lambda_i=lambda_opt[2],thresh,max_iter_delta,max_iter_pi,max_iter_gamma)
        gamma_opt <- fit_full$gammas #optimal gamma
        theta_opt <- fit_full$theta #optimal deterministics

        #Add optimized values to list
        output[["gamma_opt"]] <- gamma_opt
        output[["theta_opt"]] <- theta_opt
        output[["lambda_opt"]] <- lambda_opt
        output <- output[sort(names(output))]
    }
    output
}