R/midas_sp.R

Defines functions all_coef_full kfun midas_sp_fit g_np_mv g_np cv_np midas_pl_plain midas_si_plain prep_midas_sp midas_sp

Documented in midas_pl_plain midas_si_plain midas_sp

##' Semi-parametric MIDAS regression
##'
##' Estimate semi-parametric MIDAS regression using non-linear least squares.
##'
##' @param formula formula for restricted MIDAS regression or \code{midas_r} object. Formula must include \code{\link{fmls}} function
##' @param data a named list containing data with mixed frequencies
##' @param bws a bandwith specification. Note you need to supply logarithm value of the bandwith. 
##' @param start the starting values for optimisation. Must be a list with named elements.
##' @param degree the degree of local polynomial. 0 corresponds to local-constant, 1 local-linear. For univariate models higher values can be provided.
##' @param Ofunction the list with information which R function to use for optimisation. The list must have element named \code{Ofunction} which contains character string of chosen 
##' R function. Other elements of the list are the arguments passed to this function.  The default optimisation function is \code{\link{optim}} with arguments
##'  \code{method="Nelder-Mead"} and \code{control=list(maxit=5000)}. Other supported functions are \code{\link{nls}}, \code{\link{optimx}}.
##' 
##' @param ... additional arguments supplied to optimisation function
##' @return a \code{midas_sp} object which is the list with the following elements:
##' 
##' \item{coefficients}{the estimates of parameters of restrictions}
##' \item{midas_coefficients}{the estimates of MIDAS coefficients of MIDAS regression}
##' \item{model}{model data}
##' \item{unrestricted}{unrestricted regression estimated using \code{\link{midas_u}}}
##' \item{term_info}{the named list. Each element is a list with the information about the term, such as its frequency, function for weights, gradient function of weights, etc.}
##' \item{fn0}{optimisation function for non-linear least squares problem solved in restricted MIDAS regression}
##' \item{rhs}{the function which evaluates the right-hand side of the MIDAS regression}
##' \item{gen_midas_coef}{the function which generates the MIDAS coefficients of MIDAS regression}
##' \item{opt}{the output of optimisation procedure}
##' \item{argmap_opt}{the list containing the name of optimisation function together with arguments for optimisation function}
##' \item{start_opt}{the starting values used in optimisation}
##' \item{start_list}{the starting values as a list}
##' \item{call}{the call to the function}
##' \item{terms}{terms object}
##' \item{gradient}{gradient of NLS objective function}
##' \item{hessian}{hessian of NLS objective function}
##' \item{gradD}{gradient function of MIDAS weight functions} 
##' \item{Zenv}{the environment in which data is placed}
##' \item{nobs}{the number of effective observations}
##' \item{convergence}{the convergence message}
##' \item{fitted.values}{the fitted values of MIDAS regression}
##' \item{residuals}{the residuals of MIDAS regression}
##' 
##' @author Virmantas Kvedaras, Vaidotas Zemlys-Balevičius
##' @rdname midas_sp
##' @details Given MIDAS regression:
##'
##' \deqn{y_t = \sum_{j=1}^p\alpha_jy_{t-j} +\sum_{i=0}^{k}\sum_{j=0}^{l_i}\beta_{j}^{(i)}x_{tm_i-j}^{(i)} + u_t,}
##' 
##' estimate the parameters of the restriction
##'
##' \deqn{\beta_j^{(i)}=g^{(i)}(j,\lambda).}
##'
##' Such model is a generalisation of so called ADL-MIDAS regression. It is not required that all the coefficients should be restricted, i.e the function \eqn{g^{(i)}}
##' might be an identity function. The regressors \eqn{x_\tau^{(i)}} must be of higher
##' (or of the same) frequency as the dependent variable \eqn{y_t}. 
##'
##' @importFrom stats as.formula formula model.matrix model.response terms lsfit time
##' @importFrom zoo index index2char
##' @importFrom Formula Formula
##' @export
midas_sp <- function(formula, data, bws, start, degree = 1,  Ofunction="optim", ...) {
    
    Zenv <- new.env(parent = environment(formula))
    
    if (missing(data)) {
        ee <- NULL
    }
    else {
        ee <- data_to_env(data)
        parent.env(ee) <- parent.frame()
    }
    
    if (missing(start)) {
        stop("Please supply starting values.")
    } 
    
    assign("ee",ee,Zenv)
    formula <- Formula(formula)
    environment(formula) <- Zenv
    cl <- match.call()    
    mf <- match.call(expand.dots = FALSE)
    mf$formula <- formula
    m <- match(c("formula", "data"), names(mf), 0L)
    mf <- mf[c(1L, m)]
    mf[[1L]] <- as.name("model.frame")
    mf[[3L]] <- as.name("ee")   
    mf[[4L]] <- as.name("na.omit")
    names(mf)[c(2,3,4)] <- c("formula","data","na.action")
    
    mf <- eval(mf,Zenv)
    mt <- attr(mf, "terms")
    args <- list(...)
    y <- as.numeric(model.response(mf, "numeric"))
    X <- model.matrix(formula, data = mf, rhs = 1)
    if(length(attr(formula,"rhs")) > 1) {
        Z <- model.matrix(formula, data = mf, rhs = 2)
    } else Z <- NULL
    
    #Save ts/zoo information
    if(is.null(ee)) { 
        yy <- eval(formula[[2]], Zenv)
    }else {
        yy <- eval(formula[[2]], ee)
    }
    
    y_index <- 1:length(yy) 
    if(!is.null(attr(mf, "na.action"))) {
        y_index <- y_index[-attr(mf, "na.action")]
    }
    if(length(y_index)>1) {
        if(sum(abs(diff(y_index) - 1))>0) warning("There are NAs in the middle of the time series")                
    }
    
    ysave <- yy[y_index]
    
    if(inherits(yy, "ts")) {
        class(ysave) <- class(yy)
        attr(ysave, "tsp") <- c(time(yy)[range(y_index)], frequency(yy))
    }
    
    if(inherits(yy,c("zoo","ts"))) {
        y_start <- index2char(index(ysave)[1], frequency(ysave))
        y_end <- index2char(index(ysave)[length(ysave)], frequency(ysave))
    } else {
        y_start <- y_index[1]
        y_end <- y_index[length(y_index)]
    }
    
    prepmd <- prep_midas_sp(y, X, Z, bws, degree, formula, Zenv,cl,args,start,Ofunction)
    
    prepmd <- c(prepmd, list(lhs = ysave, lhs_start = y_start, lhs_end = y_end))
    
    class(prepmd) <- "midas_sp"
    
    midas_nlpr.fit(prepmd)    
}

## Prepare necessary objects for fitting of the non-linear parametric MIDAS regression
##
## y the response
## X the model matrix
## mt the terms of the formula
## Zenv the environment to evaluate the formula
## cl call of the function
## args additional argument
## start starting values
## Ofunction the optimisation function
## weight_gradients a list of gradient functions for weights
## lagsTable the lagstable from checkARstar
## unrestricted the unrestricted model
## guess_start if TRUE, get the initial values for non-MIDAS terms via OLS, if FALSE, initialize them with zero.
prep_midas_sp <- function(y, X, Z, bws, degree, f, Zenv, cl, args, start, Ofunction,  guess_start = TRUE) {
    
    
    start <- start[!sapply(start,is.null)]
    
    if (!is.null(args$guess_start)) {
        guess_start <- args$guess_start
        args$guess_start <- NULL
    }  
    if(is.null(Z)) {
        ##We are having pure SI model.
        Z <- X
        X <- 0
        mt1 <- NULL
        mt2 <- terms(f, rhs = 1)
    } else {
        mt1 <- terms(f, rhs = 1)
        mt2 <- terms(f, rhs = 2)
        
        if (attr(mt1,"intercept") == 1)  {
            X <- X[, -1, drop = FALSE]
        }
        
    }

    if (attr(mt2,"intercept") == 1)  {
        Z <- Z[, -1, drop = FALSE]
    }
    
    bws_length <- length(bws)
    
    if(!is.null(mt1)) {
        terms.rhs1 <- as.list(attr(mt1,"variables"))[-2:-1]
        rfd1 <- lapply(terms.rhs1, dterm_nlpr,  Zenv = Zenv)
        term_names1 <- sapply(rfd1,"[[","term_name")
        names(rfd1) <- term_names1
        rf1 <- lapply(rfd1,"[[","weight")
   
        start_default1 <- lapply(rfd1,"[[","start")
        start_default1[intersect(names(start), term_names1)] <- start[intersect(names(start), term_names1)]
        np1 <- cumsum(sapply(start_default1,length))
        pinds1 <- build_indices(np1,names(start_default1))
        fake_x_coef <- all_coef_full(unlist(start_default1), pinds1, rf1) 
        npx <- cumsum(sapply(fake_x_coef,length))
        xinds <- build_indices(npx,names(start_default1))
        pinds1 <- lapply(pinds1, function(x) x + bws_length)
        cf1 <- function(p) unlist(all_coef_full(p, pinds1, rf1))
        p2_offset <- max(pinds1[[length(pinds1)]])
    } else {
        cf1 <- function(p) return(0)
        p2_offset <- bws_length
        start_default1 <- NULL
    }
    
    terms.rhs2 <- as.list(attr(mt2,"variables"))[-2:-1]
    rfd2 <- lapply(terms.rhs2, dterm_nlpr,  Zenv = Zenv)
    term_names2 <- sapply(rfd2,"[[","term_name")
    names(rfd2) <- term_names2
    rf2 <- lapply(rfd2,"[[","weight")
    weight_names2 <- sapply(rfd2,"[[","weight_name")
    weight_inds2 <- which(weight_names2 != "")
    weight_names2 <- names(rf2)[weight_names2 != ""]
    
    start_fake2 <- lapply(rfd2,"[[","start")
    start_fake2[intersect(names(start), weight_names2)] <- start[intersect(names(start), weight_names2)]
    np_fake2 <- cumsum(sapply(start_fake2,length))
    pinds_fake2 <- build_indices(np_fake2,names(start_fake2)) 
    fake_z_coef <- all_coef_full(unlist(start_fake2), pinds_fake2, rf2) 
    npz <- cumsum(sapply(fake_z_coef,length))
    zinds <- build_indices(npz,names(start_fake2))
   
    start_default2 <- start_fake2[weight_names2]
    if (length(start_default2) > 0) {
        np2 <- cumsum(sapply(start_default2,length))
        pinds2 <- build_indices(np2,names(start_default2))
        pinds2 <- lapply(pinds2, function(x) x + p2_offset)
    } else start_default2 <- NULL
    
    cf0 <- function(p) p[1:bws_length]
   
    rhs_cv <- function(p) {
        h <- cf0(p)
        xi <- as.numeric(X %*% cf1(p))
        zi <- do.call("rbind", lapply(term_names2, function(t2) {
            if (t2 %in% weight_names2) Z[, zinds[[t2]]] %*% rf2[[t2]](p[pinds2[[t2]]])
            else Z[, zinds[[t2]], drop = FALSE]
        }))
        if (ncol(zi) == 1) zi <- as.numeric(zi)
        u <- y - xi
        xi + cv_np(u, zi, h, degree)
    }
    
    fn0 <- function(p,...) {
        if(any(cf0(p)<= 0)) return(NA) 
        else {
            r <- y - rhs_cv(p)
            return(sum(r^2))
        }
    }
    
    hess <- function(x)numDeriv::hessian(fn0,x)
    
    control <- c(list(Ofunction = Ofunction),args)
    ##The default method is "Nelder-Mead" and number of maximum iterations is 5000
    if (!("method" %in% names(control)) & Ofunction == "optim") {        
        control$method <- "Nelder-Mead"
        if (is.null(control$control$maxit)) control$control <- list(maxit=5000)
    }
    if (bws_length > 1) names(bws) <- paste0("bw", 1:bws_length)
    else names(bws) <- "bw"
    
    if (is.null(mt1)) {
        starto <- c(bws, unlist(start_default2)) 
        term_info1 <- NULL   
        model_matrix_map = list(y = 1,
                                X = NULL,
                                Z = 1 + 1:ncol(Z))
        model <- cbind(y, Z)
    } else {
        starto <- c(bws, unlist(start_default1), unlist(start_default2)) 
        term_info1 <- mapply(function(l, pind, xind) {
            l$coef_index <- pind
            l$midas_coef_index <- xind
            l$term_type <- "X"
            l
        },rfd1, pinds1, xinds, SIMPLIFY = FALSE)
        model_matrix_map = list(y = 1,
                                X = 1 + 1:ncol(X),
                                Z = ncol(X) + 1 + 1:ncol(Z))
        model <- cbind(y, X, Z)
    }
    
    term_info2 <- lapply(term_names2, function(t2) {
        res <- rfd2[[t2]]
        if(t2 %in% weight_names2) {
            res$coef_index <- pinds2[[t2]]
            res$midas_coef_index <- zinds[[t2]]
        } else {
            res$midas_coef_index <- zinds[[t2]]
        }
        res$term_type <- "Z"
        res
    })
    
    names(term_info2) <- term_names2
    term_info <- c(term_info1, term_info2)
    
    
    list(coefficients = starto,
         model = model, 
         fn0 = fn0,
         rhs = rhs_cv,
         coef_X  = cf1,
         model_matrix_map = model_matrix_map,
         opt = NULL,
         argmap_opt = control,
         start_opt = starto,
         start_list = c(list(bws = bws), start1 = start_default1, start2 = start_default2),
         call = cl,
         terms = mt1,
         terms2 = mt2,
         formula = f,
         bws = bws,
         term_info = term_info,
         hessian = hess,
         Zenv = Zenv,
         nobs = nrow(Z),
         degree = degree)   
}


##' MIDAS Single index regression
##'
##' Function for fitting SI MIDAS regression without the formula interface
##' @param y model response
##' @param X prepared matrix of high frequency variable lags for MMM term
##' @param p.ar length of AR part
##' @param weight the weight function
##' @param degree the degree of local polynomial
##' @param start_bws the starting values bandwith
##' @param start_x the starting values for weight function
##' @param start_ar the starting values for AR part. Should be the same length as \code{p}
##' @param method a method passed to \link{optim}, defaults to Nelder-Mead
##' @param ... additional parameters to \link{optim}
##' @return an object similar to \code{midas_r} object
##' @author Virmantas Kvedaras, Vaidotas Zemlys
##' 
##' @importFrom stats na.omit optim
##'
##' @export
##' 
midas_si_plain <- function(y, X, p.ar = NULL, weight, degree = 1, start_bws, start_x, start_ar = NULL, method = "Nelder-Mead", ...) {
    d <- ncol(X)
    
    yy <- NULL 
    if(!is.null(p.ar)) {
        p.ar <- as.integer(p.ar)
        if (length(start_ar)!=p.ar) stop("The length of starting values vector for AR terms must the same as the number of AR terms")
        yy <- as.matrix(mls(y, 1:p.ar, 1))
    } 
    
    model <- na.omit(cbind(y,X,yy))
    
    y <- model[,1]
    X <- model[, 2:(ncol(X) + 1)]
    
    if (is.null(yy)) { 
        yy <- 0
    } else {
        yy <- model[, (ncol(X) + 2):ncol(model)]
    }
    n <- nrow(model)
    
    sx <- length(start_x)
    
    rhs_cv <- function(p) {
        h <- p[1]
        pr <- p[2:(1 + sx)]
        if (is.null(yy)) pyy <- 0 else {
            pyy <- p[(2 + sx):length(p)]
        }
        yar <- yy %*% pyy
        u <- y - yar
        yar + cv_np(u, as.vector(X %*% weight(pr, ncol(X))), h, degree)
    }
    
    fn0 <- function(p) {
        sum((y - rhs_cv(p))^2)
    }
    
    start <- c(start_bws, start_x, start_ar)
    opt <- optim(start, fn0, method = method,...)
    par <- opt$par
    call <- match.call()
    
    rhs <- function(p) {
        h <- p[1]
        pr <- p[2:(1 + sx)]
        if (is.null(yy)) pyy <- 0 else {
            pyy <- p[(2 + sx):length(p)]
        }
        yar <- yy %*% pyy
        u <- y - yar
        xi <-  as.vector(X %*% weight(pr, ncol(X)))
        np <- g_np(u, xi,  xeval = xi, h, degree)
        list(fitted = yar + np, xi = yar, z = xi, y = y, g = np)
    }
    fitted.values <- as.vector(rhs(par)$fitted)
    names(par) <- c("h", paste0("x", 1:length(start_x)), paste0("y", 1:length(start_ar)))

    list(coefficients = par,
         midas_coefficients = weight(par[2:(1 + sx)],ncol(X)),
         bws = par[1],
         model = model,
         weights = weight,
         fn0 = fn0,
         rhs_cv = rhs_cv,
         rhs = rhs,
         opt = opt,
         call = call,
         hessian = function(x)numDeriv::hessian(fn0,x),
         fitted.values = fitted.values,
         residuals = as.vector(y - fitted.values),
         start = start)
    
}

##' MIDAS Partialy linear non-parametric regression
##'
##' Function for fitting PL MIDAS regression without the formula interface
##' @param y model response
##' @param X prepared matrix of high frequency variable lags for MMM term
##' @param z a vector, data for the non-parametric part
##' @param p.ar length of AR part
##' @param weight the weight function
##' @param degree the degree of local polynomial
##' @param start_bws the starting values bandwith
##' @param start_x the starting values for weight function
##' @param start_ar the starting values for AR part. Should be the same length as \code{p}
##' @param method a method passed to \link{optim}
##' @param ... additional parameters to \link{optim}
##' @return an object similar to \code{midas_r} object
##' @author Virmantas Kvedaras, Vaidotas Zemlys
##' 
##' @importFrom stats na.omit 
##'
##' @export
##' 
midas_pl_plain <- function(y, X, z, p.ar = NULL, weight, degree = 1, start_bws, start_x, start_ar = NULL, method = c("Nelder-Mead"), ...) {
    d <- ncol(X)
    
    yy <- NULL 
    
    if(!is.null(p.ar)) {
        p.ar <- as.integer(p.ar)
        if (length(start_ar)!=p.ar) stop("The length of starting values vector for AR terms must the same as the number of AR terms")
        yy <- as.matrix(mls(y, 1:p.ar, 1))
    } 
    z <- as.numeric(z)
    y <- as.numeric(y)
    if ((length(z) != length(y)) &  (length(y) != nrow(X))) stop("The dimensions of the data do not match, z and y must be vectors of equal length, which must coincide with number of rows of X")
    
    model <- na.omit(cbind(y, X, z, yy ))
    
    y <- model[,1]
    X <- model[, 2:(ncol(X) + 1)]
    z <- model[, ncol(X) + 2]
   
    if (is.null(yy)) { 
        yy <- 0
    } else {
        yy <- model[, (ncol(X) + 3):ncol(model)]
    }
    n <- nrow(model)
    
    sx <- length(start_x)
    
    rhs_cv <- function(p) {
        h <- p[1]
        pr <- p[2:(1 + sx)]
        if (is.null(yy)) pyy <- 0 else {
            pyy <- p[(2 + sx):length(p)]
        }
        yar <- yy %*% pyy
        xi <- as.vector(X %*% weight(pr, ncol(X)))
        u <- y - yar - xi
        yar + xi + cv_np(u, z, h, degree)
    }
    
    fn0 <- function(p) {
        sum((y - rhs_cv(p))^2)
    }
    start <- c(start_bws, start_x, start_ar)
    opt <- optim(start, fn0, method = method, ...)
    par <- opt$par
    call <- match.call()
    
    rhs <- function(p) {
        h <- p[1]
        pr <- p[2:(1 + sx)]
        if (is.null(yy)) pyy <- 0 else {
            pyy <- p[(2 + sx):length(p)]
        }
        yar <- yy %*% pyy
        xi <-  as.vector(X %*% weight(pr, ncol(X)))
        u <- y - yar  - xi
        
        np <- g_np(u, z, xeval = z, h, degree)
        list(fitted = yar + xi + np, xi = xi + yar, z = z, g = np, y = y)
    }
    fitted.values <- as.vector(rhs(par)$fitted)
    names(par) <- c("h", paste0("x", 1:length(start_x)), paste0("y", 1:length(start_ar)))
    
    list(coefficients = par,
         midas_coefficients = weight(par[2:(1 + sx)],ncol(X)),
         bws = par[1],
         model = model,
         weights = weight,
         fn0 = fn0,
         rhs_cv = rhs_cv,
         rhs = rhs,
         opt = opt,
         call = call,
         hessian = function(x)numDeriv::hessian(fn0,x),
         fitted.values = fitted.values,
         residuals = as.vector(y - fitted.values),
         start = start)
    
}

cv_np <- function(y, x, h, degree = 1) {
    cvg <- rep(NA, length(y))
    for (i in 1:length(y)) {
        if (is.null(dim(x))) w <- kfun(x[i], x, h)
        else  w <- kfun(x[i, ], x, h)
        if (degree > 0) {
            if (is.null(dim(x))) {
                xc <-  matrix(x - x[i], ncol = 1)
            } else xc <- sweep(x, 2, x[i, ], "-")
            if(degree > 1) xc <- poly(xc, degree = degree, raw = TRUE, simple = TRUE)
            cc <- suppressWarnings(lsfit(xc[-i,], y[-i], wt = w[-i], intercept = TRUE))
            cvg[i] <- coef(cc)[1]
        } else {
            cvg[i] <- sum(w[-i]*y[-i])/sum(w[-i])
        }
    }
    cvg
}

g_np <- function(y, x, xeval, h, degree = 1) {
    res <- rep(NA, length(xeval))
    for (i in 1:length(xeval)) {
        w <- kfun(xeval[i], x, h)
        if(degree > 0) {
            xc <- poly(x - xeval[i], degree = degree, raw = TRUE, simple = TRUE)
            cc <- lsfit(xc, y, wt = w, intercept = TRUE)
            res[i] <- coef(cc)[1]
        } else {
            res[i] <- sum(w*y)/sum(w)
        }
    }
    res
}

g_np_mv <- function(u, z, zeval, h, degree = 1) {
    res <- rep(NA, nrow(zeval))
    for (i in 1:nrow(zeval)) {
        w <- kfun(zeval[i, ], z, h)
        if(degree > 0) {
            zc <- sweep(z, 2, zeval[i, ], "-")
            if(degree > 1) zc <- poly(zc, degree = degree, raw = TRUE, simple = TRUE)
            cc <- lsfit(zc, u, wt = w, intercept = TRUE)
            res[i] <- coef(cc)[1]
        } else {
            res[i] <- sum(w*u)/sum(w)
        }
    }
    res
}

midas_sp_fit <- function(object, Xeval = NULL, Zeval = NULL) {
    
    p <- coef(object)
    
    y <- object$model[, 1]
    h <- p[1:length(object$bws)]
    Z <- object$model[, object$model_matrix_map$Z, drop = FALSE]

    zterms <- sapply(object$term_info, "[[", "term_type") == "Z"
    
    ti <- object$term_info[zterms]
    
    calc_zi <- function(term, ZZ) {
        if (is.null(term$coef_index)) zi <- ZZ[, term$midas_coef_index, drop = FALSE]
        else zi <- ZZ[, term$midas_coef_index] %*% term$weight(p[term$coef_index])
    }
    
    zi <- do.call("rbind", lapply(ti, calc_zi, ZZ = Z))
    if (is.null(Zeval)) zi_eval <- zi
    else zi_eval <- do.call("rbind", lapply(ti, calc_zi, ZZ = Zeval))
   
    if (is.null(object$model_matrix_map$X)) {
        xi <- 0  
    } else {
        xi <- object$model[, object$model_matrix_map$X] %*% object$coef_X(p)
    } 
    
    u <- y - xi
    ghat <- g_np_mv(u, zi, zi_eval, h, degree = object$degree)
    
    if(is.null(Xeval)) xi_eval <- xi
    else xi_eval <- Xeval %*% object$coef_X(p)
    
    fit <- xi_eval + ghat
    list(fitted.values = fit, xi = xi_eval, zi = zi_eval, g = ghat, y = y)
}


#' @importFrom stats dnorm
kfun <- function(z0, z, h) {
    if (is.null(ncol(z))) {
        dnorm((z - z0)/h)
    } else {
        z0 <- as.numeric(z0)
        h <- as.numeric(h)
        apply(dnorm(scale(z, center = as.numeric(z0), scale = h)), 1, prod)
    }
}

all_coef_full <- function(p, pinds, rf) {              
    pp <- lapply(pinds,function(x)p[x])     
    res <- mapply(function(fun,param)fun(param),rf ,pp,SIMPLIFY=FALSE)
    return(res)
}

Try the midasr package in your browser

Any scripts or data that you put into this service are public.

midasr documentation built on Feb. 23, 2021, 5:11 p.m.