R/lagr.tune.r

Defines functions lagr.tune

Documented in lagr.tune

#' Estimate the bandwidth parameter for a lagr model
#'
#' \code{lagr.tune} estimates the bandwidth parameter for a LAGR model.
#'
#' This method calls \code{lagr} repeatedly via the \code{optimize} function, searching for the bandwidth that minimizes a bandwidth selection criterion. It returns the profiled value of the selection criterion at each bandwidth that is used in the evaluation.
#'
#' @param formula symbolic representation of the model
#' @param data data frame containing observations of all the terms represented in the formula
#' @param weights vector of prior observation weights (due to, e.g., overdispersion). Not related to the kernel weights.
#' @param family exponential family distribution of the response
#' @param coords matrix of locations, with each row giving the location at which the corresponding row of data was observed
#' @param fit.loc matrix of locations where the local models should be fitted
#' @param longlat \code{TRUE} indicates that the coordinates are specified in longitude/latitude, \code{FALSE} indicates Cartesian coordinates. Default is \code{FALSE}.
#' @param kernel kernel function for generating the local observation weights
#' @param bw bandwidth for the kernel
#' @param bw.type type of bandwidth - options are \code{dist} for distance (the default), \code{knn} for nearest neighbors (bandwidth a proportion of \code{n}), and \code{nen} for nearest effective neighbors (bandwidth a proportion of the sum of squared residuals from a global model)
#' @param bwselect.method criterion to minimize when tuning bandwidth - options are \code{AICc}, \code{BICg}, and \code{GCV}
#' @param range allowable range of the bandwidth
#' @param tol.bw global error tolerance for minimizing the bandwidth selection criterion
#' @param tol.loc local error tolerance for converting an adaptive bandwidth (e.g. \code{knn} or \code{nen}) to a distance
#' @param varselect.method criterion to minimize in the regularization step of fitting local models - options are \code{AIC}, \code{AICc}, \code{BIC}, \code{GCV}
#' @param tuning logical indicating whether this model will be used to tune the bandwidth, in which case only the tuning criteria are returned
#' @param a pre-specified matrix of distances between locations
#' @param verbose print detailed information about our progress?
#'
#' @return \code{list(bw, trace)} where \code{bw} minimizes the bandwidth selection criterion and trace is a data frame of each bandwidth that was tried during the optimization, along with the resulting degrees of freedom used inthe LAGR model and the value of the bandwidth selection criterion.
#' 
#' @export
lagr.tune = function(formula, data, family=gaussian(), range=NULL, weights=NULL, coords, oracle=NULL, kernel=NULL, bw.type=c('dist','knn','nen'), varselect.method=c('AIC','BIC','AICc','jacknife','wAIC', 'wAICc'), verbose=FALSE, longlat=FALSE, tol.loc=.Machine$double.eps^0.25, tol.bw=.Machine$double.eps^0.25, bwselect.method=c('AIC', 'AICc','GCV','BIC'), lambda.min.ratio=0.001, n.lambda=50, lagr.convergence.tol=0.001, lagr.max.iter=20, na.action=na.fail, contrasts=NULL) {
    result = list()
    class(result) <- "lagr.bw"

    if (is.null(kernel))
        stop("Error: There is no kernel specified!")
    
    cl <- match.call()
    formula = eval.parent(substitute_q(formula, sys.frame(sys.parent())))
    mf = eval(lagr.parse.model.frame(formula, data, family, weights, coords, NULL, longlat, na.action, contrasts))  

    y = mf$y
    x = mf$x
    w = mf$w
    mt = mf$mt
    coords = mf$coords
    dist = mf$dist
    max.dist = mf$max.dist
    min.dist = mf$min.dist
    family = mf$family
    
    bw.type = match.arg(bw.type)
    varselect.method = match.arg(varselect.method)
    bwselect.method = match.arg(bwselect.method)
        
    if (!is.null(range)) {
        beta1 = min(range)
        beta2 = max(range)
    } else {
        if (bw.type == "dist") {
            beta1 <- min.dist
            beta2 <- max.dist
        } else if (bw.type == 'knn') {
            beta1 <- 0
            beta2 <- 1
        } else if (bw.type == 'nen') {
            meany = mean(y)
            if (family=='binomial') {beta2 = sum(w / (meany*(1-meany)) * (y-meany)**2)}
            else if (family=='poisson') {beta2 = sum(w / meany * (y-meany)**2)}
            else if (family=='gaussian') {beta2 = sum(w * (y-meany)**2)}
            beta1 = beta2 / 1000
        }
    }
    
    #Create a new environment, in which we will store the likelihood trace from bandwidth selection.
    oo = new.env()
    opt <- optimize(
        lagr.tune.bw,
        interval=c(beta1, beta2),
        tol=tol.bw,
        maximum=FALSE,
        x=x,
        y=y,
        coords=coords,
        dist=dist,
        weights=w,
        env=oo,
        oracle=oracle,
        family=family,
        varselect.method=varselect.method,
        kernel=kernel,
        verbose=verbose,
        bw.type=bw.type,
        tol.loc=tol.loc,
        min.dist=min.dist,
        max.dist=max.dist,
        bwselect.method=bwselect.method,
        lambda.min.ratio=lambda.min.ratio,
        n.lambda=n.lambda, 
        lagr.convergence.tol=lagr.convergence.tol,
        lagr.max.iter=lagr.max.iter
    )

    trace = oo$trace[!duplicated(oo$trace[,1]),]
    rm(oo)

    bdwt <- opt$minimum
    result[['bw']] <- bdwt
    result[['trace']] <- trace
    result[['bwselect.method']] <- bwselect.method
    result[['call']] <- cl
    result[['opt']] <- opt
    result[['bw.type']] = bw.type
    result[['dim']] = mf$dim
    result[['kernel']] = kernel
    
    return(result)
}
wrbrooks/lagr documentation built on May 4, 2019, 11:59 a.m.