R/all.R
In KernSmooth: Functions for Kernel Smoothing Supporting Wand & Jones (1995)

Documented in bkde bkde2D bkfe dpih dpik dpill locpoly

## file KernSmooth/R/all.R
## original file Copyright (C) M. P. Wand
## modifications for use with R copyright (C) B. D. Ripley
## Unlimited use and distribution (see LICENCE).

bkde <- function(x, kernel = "normal", canonical = FALSE, bandwidth,
                 gridsize = 401L, range.x, truncate = TRUE)
{
    ## Install safeguard against non-positive bandwidths:
    if (!missing(bandwidth) && bandwidth <= 0)
        stop("'bandwidth' must be strictly positive")

    kernel <- match.arg(kernel,
                        c("normal", "box", "epanech", "biweight", "triweight"))

    ## Rename common variables

    n <- length(x)
    M <- gridsize

    ## Set canonical scaling factors

    del0 <- switch(kernel,
                   "normal" = (1/(4*pi))^(1/10),
                   "box" = (9/2)^(1/5),
                   "epanech" = 15^(1/5),
                   "biweight" = 35^(1/5),
                   "triweight" = (9450/143)^(1/5))

    if (length(canonical) != 1L || !is.logical(canonical))
        stop("'canonical' must be a length-1 logical vector")

    ## Set default bandwidth

    h <- if (missing(bandwidth)) del0 * (243/(35*n))^(1/5)*sqrt(var(x))
    else if(canonical) del0 * bandwidth else bandwidth

    ## Set kernel support values

    tau <-  if (kernel == "normal") 4 else 1

    if (missing(range.x)) range.x <- c(min(x)-tau*h, max(x)+tau*h)
    a <- range.x[1L]
    b <- range.x[2L]

    ## Set up grid points and bin the data

    gpoints <- seq(a, b, length.out = M)
    gcounts <- linbin(x, gpoints, truncate)

    ## Compute kernel weights

    delta  <- (b - a)/(h * (M-1L))
    L <- min(floor(tau/delta), M)
    if (L == 0)
        warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'")

    lvec <- 0L:L
    kappa <- if (kernel == "normal")
        dnorm(lvec*delta)/(n*h)
    else if (kernel == "box")
        0.5*dbeta(0.5*(lvec*delta+1), 1, 1)/(n*h)
    else if (kernel == "epanech")
        0.5*dbeta(0.5*(lvec*delta+1), 2, 2)/(n*h)
    else if (kernel == "biweight")
        0.5*dbeta(0.5*(lvec*delta+1), 3, 3)/(n*h)
    else if (kernel == "triweight")
        0.5*dbeta(0.5*(lvec*delta+1), 4, 4)/(n*h)

    ## Now combine weight and counts to obtain estimate
    ## we need P >= 2L+1L, M: L <= M.
    P <- 2^(ceiling(log(M+L+1L)/log(2)))
    kappa <- c(kappa, rep(0, P-2L*L-1L), rev(kappa[-1L]))
    tot <- sum(kappa) * (b-a)/(M-1L) * n # should have total weight one
    gcounts <- c(gcounts, rep(0L, P-M))
    kappa <- fft(kappa/tot)
    gcounts <- fft(gcounts)
    list(x = gpoints, y = (Re(fft(kappa*gcounts, TRUE))/P)[1L:M])
}


bkde2D <-
    function(x, bandwidth, gridsize = c(51L, 51L), range.x, truncate = TRUE)
{
    ## Install safeguard against non-positive bandwidths:

    if (!missing(bandwidth) && min(bandwidth) <= 0)
        stop("'bandwidth' must be strictly positive")

    ## Rename common variables

    n <- nrow(x)
    M <- gridsize
    h <- bandwidth
    tau <- 3.4                          # For bivariate normal kernel.

    ## Use same bandwidth in each direction
    ## if only a single bandwidth is given.

    if (length(h) == 1L) h <- c(h, h)

    ## If range.x is not specified then set it at its default value.

    if (missing(range.x)) {
        range.x <- list(0, 0)
        for (id in (1L:2L))
            range.x[[id]] <- c(min(x[, id])-1.5*h[id], max(x[, id])+1.5*h[id])
    }

    a <- c(range.x[[1L]][1L], range.x[[2L]][1L])
    b <- c(range.x[[1L]][2L], range.x[[2L]][2L])

    ## Set up grid points and bin the data

    gpoints1 <- seq(a[1L], b[1L], length.out = M[1L])
    gpoints2 <- seq(a[2L], b[2L], length.out = M[2L])

    gcounts <- linbin2D(x, gpoints1, gpoints2)

    ## Compute kernel weights

    L <- numeric(2L)
    kapid <- list(0, 0)
    for (id in 1L:2L) {
        L[id] <- min(floor(tau*h[id]*(M[id]-1)/(b[id]-a[id])), M[id] - 1L)
        lvecid <- 0:L[id]
        facid <- (b[id] - a[id])/(h[id]*(M[id]-1L))
        z <- matrix(dnorm(lvecid*facid)/h[id])
        tot <- sum(c(z, rev(z[-1L]))) * facid * h[id]
        kapid[[id]] <- z/tot
    }
    kapp <- kapid[[1L]] %*% (t(kapid[[2L]]))/n

    if (min(L) == 0)
        warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'")

    ## Now combine weight and counts using the FFT to obtain estimate

    P <- 2^(ceiling(log(M+L)/log(2)))   # smallest powers of 2 >= M+L
    L1 <- L[1L] ; L2 <- L[2L]
    M1 <- M[1L] ; M2 <- M[2L]
    P1 <- P[1L] ; P2 <- P[2L]

    rp <- matrix(0, P1, P2)
    rp[1L:(L1+1), 1L:(L2+1)] <- kapp
    if (L1) rp[(P1-L1+1):P1, 1L:(L2+1)] <- kapp[(L1+1):2, 1L:(L2+1)]
    if (L2) rp[, (P2-L2+1):P2] <- rp[, (L2+1):2]
    ## wrap-around version of "kapp"

    sp <- matrix(0, P1, P2)
    sp[1L:M1, 1L:M2] <- gcounts
    ## zero-padded version of "gcounts"

    rp <- fft(rp)                       # Obtain FFT's of r and s
    sp <- fft(sp)
    rp <- Re(fft(rp*sp, inverse = TRUE)/(P1*P2))[1L:M1, 1L:M2]
    ## invert element-wise product of FFT's
    ## and truncate and normalise it

    ## Ensure that rp is non-negative

    rp <- rp * matrix(as.numeric(rp>0), nrow(rp), ncol(rp))

    list(x1 = gpoints1, x2 = gpoints2, fhat = rp)
}


bkfe <- function(x, drv, bandwidth, gridsize = 401L, range.x,
                 binned = FALSE, truncate = TRUE)
{
    ## Install safeguard against non-positive bandwidths:

    if (!missing(bandwidth) && bandwidth <= 0)
        stop("'bandwidth' must be strictly positive")

    if (missing(range.x) && !binned) range.x <- c(min(x), max(x))

    ## Rename variables

    M <- gridsize
    a <- range.x[1L]
    b <- range.x[2L]
    h <- bandwidth

    ## Bin the data if not already binned

    if (!binned) {
        gpoints <- seq(a, b, length.out = gridsize)
        gcounts <- linbin(x, gpoints, truncate)
    } else {
        gcounts <- x
        M <- length(gcounts)
        gpoints <- seq(a, b, length.out = M)
    }

    ## Set the sample size and bin width

    n <- sum(gcounts)
    delta <- (b-a)/(M-1)

    ## Obtain kernel weights

    tau <- 4 + drv
    L <- min(floor(tau*h/delta), M)

    if (L == 0)
        warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'")

    lvec <- 0L:L
    arg <- lvec*delta/h

    kappam <- dnorm(arg)/(h^(drv+1))
    hmold0 <- 1
    hmold1 <- arg
    hmnew <- 1
    if (drv >= 2L)
        for (i in (2L:drv)) {
            hmnew <- arg*hmold1 - (i-1)*hmold0
            hmold0 <- hmold1       # Compute mth degree Hermite polynomial
            hmold1 <- hmnew        # by recurrence.
        }
    kappam <- hmnew * kappam

    ## Now combine weights and counts to obtain estimate
    ## we need P >= 2L+1L, M: L <= M.
    P <- 2^(ceiling(log(M+L+1L)/log(2)))
    kappam <- c(kappam, rep(0,  P-2L*L-1L), rev(kappam[-1L]))
    Gcounts <- c(gcounts, rep(0, P-M))
    kappam <- fft(kappam)
    Gcounts <- fft(Gcounts)

    sum(gcounts * (Re(fft(kappam*Gcounts, TRUE))/P)[1L:M] )/(n^2)
}

## For obtaining preliminary estimates of
## quantities required for the "direct plug-in"
## regression bandwidth selector based on
## blocked qth degree polynomial fits.

blkest <- function(x, y, Nval, q)
{
    n <- length(x)

    ## Sort the (x, y) data with respect to
    ## the x's.

    datmat <- cbind(x, y)
    datmat <- datmat[sort.list(datmat[, 1L]), ]
    x <- datmat[, 1L]
    y <- datmat[, 2L]

    ## Set up arrays for FORTRAN programme "blkest"

    qq <- q + 1L
    xj <- rep(0, n)
    yj <- rep(0, n)
    coef <- rep(0, qq)
    Xmat <- matrix(0, n, qq)
    wk <- rep(0, n)
    qraux <- rep(0, qq)
    sigsqe <- 0
    th22e <- 0
    th24e <- 0

    out <- .Fortran(F_blkest, as.double(x), as.double(y), as.integer(n),
                    as.integer(q), as.integer(qq), as.integer(Nval), as.double(xj),
                    as.double(yj), as.double(coef), as.double(Xmat), as.double(wk),
                    as.double(qraux), as.double(sigsqe), as.double(th22e),
                    as.double(th24e))

    list(sigsqe = out[[13]], th22e = out[[14]], th24e = out[[15]])
}

## Chooses the number of blocks for the premilinary
## step of a plug-in rule using Mallows' C_p.
cpblock <- function(X, Y, Nmax, q)
{
    n <- length(X)

    ## Sort the (X, Y) data with respect tothe X's.

    datmat <- cbind(X, Y)
    datmat <- datmat[sort.list(datmat[, 1L]), ]
    X <- datmat[, 1L]
    Y <- datmat[, 2L]

    ## Set up arrays for FORTRAN subroutine "cp"

    qq <- q + 1L
    RSS <- rep(0, Nmax)
    Xj <- rep(0, n)
    Yj <- rep(0, n)
    coef <- rep(0, qq)
    Xmat <- matrix(0, n, qq)
    Cpvals <- rep(0, Nmax)
    wk <- rep(0, n)
    qraux <- rep(0, qq)

    ## remove unused 'q' 2007-07-10
    out <- .Fortran(F_cp, as.double(X), as.double(Y), as.integer(n),
                    as.integer(qq), as.integer(Nmax), as.double(RSS), as.double(Xj),
                    as.double(Yj), as.double(coef), as.double(Xmat), as.double(wk),
                    as.double(qraux), Cpvals = as.double(Cpvals))

    Cpvec <- out$Cpvals

    order(Cpvec)[1L]
}

dpih <- function(x, scalest = "minim", level = 2L, gridsize = 401L,
                 range.x = range(x), truncate = TRUE)
{
    if (level > 5L) stop("Level should be between 0 and 5")

    ## Rename variables

    n <- length(x)
    M <- gridsize
    a <- range.x[1L]
    b <- range.x[2L]

    ## Set up grid points and bin the data

    gpoints <- seq(a, b, length.out = M)
    gcounts <- linbin(x, gpoints, truncate)

    ## Compute scale estimate

    scalest <- match.arg(scalest, c("minim", "stdev", "iqr"))
    scalest <- switch(scalest,
                      "stdev" = sqrt(var(x)),
                      "iqr"= (quantile(x, 3/4)-quantile(x, 1/4))/1.349,
                      "minim" = min((quantile(x, 3/4)-quantile(x, 1/4))/1.349, sqrt(var(x))) )

    if (scalest == 0) stop("scale estimate is zero for input data")

    ## Replace input data by standardised data for numerical stability:

    sx <- (x-mean(x))/scalest
    sa <- (a-mean(x))/scalest ; sb <- (b-mean(x))/scalest

    ## Set up grid points and bin the data:

    gpoints <- seq(sa, sb, length.out = M)
    gcounts <- linbin(sx, gpoints, truncate)
##    delta <- (sb-sa)/(M - 1)

    ## Perform plug-in steps

    hpi <- if (level == 0L) (24*sqrt(pi)/n)^(1/3)
    else if (level == 1L) {
        alpha <- (2/(3*n))^(1/5)*sqrt(2) # bandwidth for psi_2
        psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE)
        (6/(-psi2hat*n))^(1/3)
    } else if (level == 2L) {
        alpha <- ((2/(5*n))^(1/7))*sqrt(2) # bandwidth for psi_4
        psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2
        psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE)
        (6/(-psi2hat*n))^(1/3)
    } else if (level == 3L) {
        alpha <- ((2/(7*n))^(1/9))*sqrt(2) # bandwidth for psi_6
        psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4
        psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2
        psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE)
        (6/(-psi2hat*n))^(1/3)
    } else if (level == 4L) {
        alpha <- ((2/(9*n))^(1/11))*sqrt(2) # bandwidth for psi_8
        psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6
        psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4
        psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2
        psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE)
        (6/(-psi2hat*n))^(1/3)
    } else if (level == 5L) {
        alpha <- ((2/(11*n))^(1/13))*sqrt(2) # bandwidth for psi_10
        psi10hat <- bkfe(gcounts, 10L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-105*sqrt(2/pi)/(psi10hat*n))^(1/11) # bandwidth for psi_8
        psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6
        psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4
        psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2
        psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE)
        (6/(-psi2hat*n))^(1/3)
    }

    scalest * hpi
}

dpik <- function(x, scalest = "minim", level = 2L, kernel = "normal",
                 canonical = FALSE, gridsize = 401L, range.x = range(x),
                 truncate = TRUE)
{
    if (level > 5L) stop("Level should be between 0 and 5")

    kernel <- match.arg(kernel,
                        c("normal", "box", "epanech", "biweight", "triweight"))

    ## Set kernel constants
    del0 <- if (canonical) 1 else switch(kernel,
                                         "normal" = 1/((4*pi)^(1/10)),
                                         "box" = (9/2)^(1/5),
                                         "epanech" = 15^(1/5),
                                         "biweight" = 35^(1/5),
                                         "triweight" = (9450/143)^(1/5))

    ## Rename variables
    n <- length(x)
    M <- gridsize
    a <- range.x[1L]
    b <- range.x[2L]

    ## Set up grid points and bin the data
    gpoints <- seq(a, b, length.out = M)
    gcounts <- linbin(x, gpoints, truncate)

    ## Compute scale estimate
    scalest <- match.arg(scalest, c("minim", "stdev", "iqr"))
    scalest <- switch(scalest,
                      "stdev" = sqrt(var(x)),
                      "iqr"= (quantile(x, 3/4)-quantile(x, 1/4))/1.349,
                      "minim" = min((quantile(x, 3/4)-quantile(x, 1/4))/1.349, sqrt(var(x))) )

    if (scalest == 0) stop("scale estimate is zero for input data")

    ## Replace input data by standardised data for numerical stability:
    sx <- (x-mean(x))/scalest
    sa <- (a-mean(x))/scalest ; sb <- (b-mean(x))/scalest

    ## Set up grid points and bin the data:
    gpoints <- seq(sa, sb, length.out = M)
    gcounts <- linbin(sx, gpoints, truncate)
##    delta <- (sb-sa)/(M-1)

    ## Perform plug-in steps:

    psi4hat <- if (level == 0L) 3/(8*sqrt(pi))
    else if (level == 1L) {
        alpha <- (2*(sqrt(2))^7/(5*n))^(1/7) # bandwidth for psi_4
        bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
    } else if (level == 2L) {
        alpha <- (2*(sqrt(2))^9/(7*n))^(1/9) # bandwidth for psi_6
        psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4
        bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
    } else if (level == 3L) {
        alpha <- (2*(sqrt(2))^11/(9*n))^(1/11) # bandwidth for psi_8
        psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6
        psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4
        bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
    } else if (level == 4L) {
        alpha <- (2*(sqrt(2))^13/(11*n))^(1/13) # bandwidth for psi_10
        psi10hat <- bkfe(gcounts, 10L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-105*sqrt(2/pi)/(psi10hat*n))^(1/11) # bandwidth for psi_8
        psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6
        psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4
        bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
    } else if (level == 5L) {
        alpha <- (2*(sqrt(2))^15/(13*n))^(1/15) # bandwidth for psi_12
        psi12hat <- bkfe(gcounts, 12L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (945*sqrt(2/pi)/(psi12hat*n))^(1/13) # bandwidth for psi_10
        psi10hat <- bkfe(gcounts, 10L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-105*sqrt(2/pi)/(psi10hat*n))^(1/11) # bandwidth for psi_8
        psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6
        psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE)
        alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4
        bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE)
    }

    scalest * del0 * (1/(psi4hat*n))^(1/5)
}

## Computes a direct plug-in selector of the
## bandwidth for local linear regression as
## described in the 1996 J. Amer. Statist. Assoc.
## paper by Ruppert, Sheather and Wand.
dpill <- function(x, y, blockmax = 5, divisor = 20, trim = 0.01,
                  proptrun = 0.05, gridsize = 401L,
                  range.x = range(x), truncate = TRUE)
{
    ## Trim the 100(trim)% of the data from each end (in the x-direction).

    xy <- cbind(x, y)
    xy <- xy[sort.list(xy[, 1L]), ]
    x <- xy[, 1L]
    y <- xy[, 2L]
    indlow <- floor(trim*length(x)) + 1
    indupp <- length(x) - floor(trim*length(x))

    x <- x[indlow:indupp]
    y <- y[indlow:indupp]

    ## Rename common parameters
    n <- length(x)
    M <- gridsize
    a <- range.x[1L]
    b <- range.x[2L]

    ## Bin the data

    gpoints <- seq(a, b, length.out = M)
    out <- rlbin(x, y, gpoints, truncate)
    xcounts <- out$xcounts
    ycounts <- out$ycounts

    ## Choose the value of N using Mallow's C_p
    Nmax <- max(min(floor(n/divisor), blockmax), 1)
    Nval <- cpblock(x, y, Nmax, 4)

    ## Estimate sig^2, theta_22 and theta_24 using quartic fits
    ## on "Nval" blocks.

    out <- blkest(x, y, Nval, 4)
    sigsqQ <- out$sigsqe
    th24Q <- out$th24e

    ## Estimate theta_22 using a local cubic fit
    ## with a "rule-of-thumb" bandwidth: "gamseh"

    gamseh <- (sigsqQ*(b-a)/(abs(th24Q)*n))
    if (th24Q < 0) gamseh <- (3*gamseh/(8*sqrt(pi)))^(1/7)
    if (th24Q > 0) gamseh <- (15*gamseh/(16*sqrt(pi)))^(1/7)

    mddest <- locpoly(xcounts, ycounts, drv=2L, bandwidth=gamseh,
                      range.x=range.x, binned=TRUE)$y

    llow <- floor(proptrun*M) + 1
    lupp <- M - floor(proptrun*M)
    th22kn <- sum((mddest[llow:lupp]^2)*xcounts[llow:lupp])/n

    ## Estimate sigma^2 using a local linear fit
    ## with a "direct plug-in" bandwidth: "lamseh"
    C3K <- (1/2) + 2*sqrt(2) - (4/3)*sqrt(3)
    C3K <- (4*C3K/(sqrt(2*pi)))^(1/9)
    lamseh <- C3K*(((sigsqQ^2)*(b-a)/((th22kn*n)^2))^(1/9))

    ## Now compute a local linear kernel estimate of
    ## the variance.
    mest <- locpoly(xcounts, ycounts, bandwidth=lamseh,
                    range.x=range.x, binned=TRUE)$y
    Sdg <- sdiag(xcounts, bandwidth=lamseh,
                 range.x=range.x, binned=TRUE)$y
    SSTdg <- sstdiag(xcounts, bandwidth=lamseh,
                     range.x=range.x, binned=TRUE)$y
    sigsqn <- sum(y^2) - 2*sum(mest*ycounts) + sum((mest^2)*xcounts)
    sigsqd <- n - 2*sum(Sdg*xcounts) + sum(SSTdg*xcounts)
    sigsqkn <- sigsqn/sigsqd

    ## Combine to obtain final answer.
    (sigsqkn*(b-a)/(2*sqrt(pi)*th22kn*n))^(1/5)
}

## For application of linear binning to a univariate data set.
linbin <- function(X, gpoints, truncate = TRUE)
{
    n <- length(X)
    M <- length(gpoints)
    trun <- if (truncate) 1L else 0L
    a <- gpoints[1L]
    b <- gpoints[M]
    .Fortran(F_linbin, as.double(X), as.integer(n),
             as.double(a), as.double(b), as.integer(M),
             as.integer(trun), double(M))[[7]]
}

## Creates the grid counts from a bivariate data set X
## over an equally-spaced set of grid points
## contained in "gpoints" using the linear
## binning strategy. Note that the FORTRAN subroutine
## "lbtwod" is called.
linbin2D <- function(X, gpoints1, gpoints2)
{
    n <- nrow(X)
    X <- c(X[, 1L], X[, 2L])
    M1 <- length(gpoints1)
    M2 <- length(gpoints2)
    a1 <- gpoints1[1L]
    a2 <- gpoints2[1L]
    b1 <- gpoints1[M1]
    b2 <- gpoints2[M2]
    out <- .Fortran(F_lbtwod, as.double(X), as.integer(n),
                    as.double(a1), as.double(a2), as.double(b1), as.double(b2),
                    as.integer(M1), as.integer(M2), double(M1*M2))
    matrix(out[[9L]], M1, M2)
}

## For computing a binned local polynomial
## regression estimator of a univariate regression
## function or its derivative.
## The data are discretised on an equally
## spaced grid. The bandwidths are discretised on a
## logarithmically spaced grid.
locpoly <- function(x, y, drv = 0L, degree, kernel = "normal",
                    bandwidth, gridsize = 401L, bwdisc = 25, range.x,
                    binned = FALSE, truncate = TRUE)

{
    ## Install safeguard against non-positive bandwidths:
    if (!missing(bandwidth) && any(bandwidth <= 0))
        stop("'bandwidth' must be strictly positive")

    drv <- as.integer(drv)
    if (missing(degree)) degree <- drv + 1L
    else degree <- as.integer(degree)

    if (missing(range.x) && !binned)
        if (missing(y)) {
            extra <- 0.05*(max(x) - min(x))
            range.x <- c(min(x)-extra,  max(x)+extra)
        } else range.x <- c(min(x), max(x))

    ## Rename common variables
    M <- gridsize
    Q <- as.integer(bwdisc)
    a <- range.x[1L]
    b <- range.x[2L]
    pp <- degree + 1L
    ppp <- 2L*degree + 1L
    tau <- 4

    ## Decide whether a density estimate or regressionestimate is required.

    if (missing(y))  {  # obtain density estimate
        y <- NULL
        n <- length(x)
        gpoints <- seq(a, b, length.out = M)
        xcounts <- linbin(x, gpoints, truncate)
        ycounts <- (M-1)*xcounts/(n*(b-a))
        xcounts <- rep(1, M)
    } else {            # obtain regression estimate
        ## Bin the data if not already binned
        if (!binned) {
            gpoints <- seq(a, b, length.out = M)
            out <- rlbin(x, y, gpoints, truncate)
            xcounts <- out$xcounts
            ycounts <- out$ycounts
        } else {
            xcounts <- x
            ycounts <- y
            M <- length(xcounts)
            gpoints <- seq(a, b, length.out = M)
        }
    }

    ## Set the bin width
    delta <- (b-a)/(M-1L)

    ## Discretise the bandwidths
    if (length(bandwidth) == M) {
        hlow <- sort(bandwidth)[1L]
        hupp <- sort(bandwidth)[M]
        hdisc <- exp(seq(log(hlow), log(hupp), length.out = Q))

        ## Determine value of L for each member of "hdisc"
        Lvec <- floor(tau*hdisc/delta)

        ## Determine index of closest entry of "hdisc"
        ## to each member of "bandwidth"
        indic <- if (Q > 1L) {
            lhdisc <- log(hdisc)
            gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1)
            if (gap == 0) rep(1, M)
            else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1)
        } else rep(1, M)
    } else if (length(bandwidth) == 1L) {
        indic <- rep(1, M)
        Q <- 1L
        Lvec <- rep(floor(tau*bandwidth/delta), Q)
        hdisc <- rep(bandwidth, Q)
    } else
        stop("'bandwidth' must be a scalar or an array of length 'gridsize'")

    if (min(Lvec) == 0)
        stop("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'")

    ## Allocate space for the kernel vector and final estimate

    dimfkap <- 2L * sum(Lvec) + Q
    fkap <- rep(0, dimfkap)
    curvest <- rep(0, M)
    midpts <- rep(0, Q)
    ss <- matrix(0, M, ppp)
    tt <- matrix(0, M, pp)
    Smat <- matrix(0, pp, pp)
    Tvec <- rep(0, pp)
    ipvt <- rep(0, pp)

    ## Call FORTRAN routine "locpol"

    out <- .Fortran(F_locpol, as.double(xcounts), as.double(ycounts),
                    as.integer(drv), as.double(delta), as.double(hdisc),
                    as.integer(Lvec), as.integer(indic), as.integer(midpts),
                    as.integer(M), as.integer(Q), as.double(fkap), as.integer(pp),
                    as.integer(ppp), as.double(ss), as.double(tt),
                    as.double(Smat), as.double(Tvec), as.integer(ipvt),
                    as.double(curvest))

    curvest <- gamma(drv+1) * out[[19L]]

    list(x = gpoints, y = curvest)
}

## For application of linear binning to a regression
## data set.
rlbin <- function(X, Y, gpoints, truncate = TRUE)
{
    n <- length(X)
    M <- length(gpoints)
    trun <- if (truncate) 1L else 0L
    a <- gpoints[1L]
    b <- gpoints[M]
    out <- .Fortran(F_rlbin, as.double(X), as.double(Y), as.integer(n),
                    as.double(a), as.double(b), as.integer(M), as.integer(trun),
                    double(M), double(M))
    list(xcounts = out[[8L]],  ycounts = out[[9L]])
}

## For computing the binned diagonal entries of a smoother
## matrix for local polynomial kernel regression.

sdiag <- function(x, drv = 0L, degree = 1L, kernel = "normal",
                    bandwidth, gridsize = 401L, bwdisc = 25, range.x,
                    binned = FALSE, truncate = TRUE)
{
    if (missing(range.x) && !binned) range.x <- c(min(x), max(x))

    ## Rename common variables

    M <- gridsize
    Q <- as.integer(bwdisc)
    a <- range.x[1L]
    b <- range.x[2L]
    pp <- degree + 1L
    ppp <- 2L*degree + 1L
    tau <- 4

    ## Bin the data if not already binned

    if (!binned) {
        gpoints <- seq(a, b, length.out = M)
        xcounts <- linbin(x, gpoints, truncate)
    } else {
        xcounts <- x
        M <- length(xcounts)
        gpoints <- seq(a, b, length.out = M)
    }

    ## Set the bin width

    delta <- (b-a)/(M-1L)

    ## Discretise the bandwidths

    if (length(bandwidth) == M) {
        hlow <- sort(bandwidth)[1L]
        hupp <- sort(bandwidth)[M]
        hdisc <- exp(seq(log(hlow), log(hupp), length.out = Q))

        ## Determine value of L for each member of "hdisc"
        Lvec <- floor(tau*hdisc/delta)

        ## Determine index of closest entry of "hdisc"
        ## to each member of "bandwidth"
        indic <- if (Q > 1L) {
            lhdisc <- log(hdisc)
            gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1)
            if (gap == 0) rep(1, M)
            else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1)
        } else rep(1, M)
    } else if (length(bandwidth) == 1L) {
        indic <- rep(1, M)
        Q <- 1L
        Lvec <- rep(floor(tau*bandwidth/delta), Q)
        hdisc <- rep(bandwidth, Q)
    } else
        stop("'bandwidth' must be a scalar or an array of length 'gridsize'")

    dimfkap <- 2L * sum(Lvec) + Q
    fkap <- rep(0, dimfkap)
    midpts <- rep(0, Q)
    ss <- matrix(0, M, ppp)
    Smat <- matrix(0, pp, pp)
    work <- rep(0, pp)
    det <- rep(0, 2L)
    ipvt <- rep(0, pp)
    Sdg <- rep(0, M)

    out <- .Fortran(F_sdiag, as.double(xcounts), as.double(delta),
                    as.double(hdisc), as.integer(Lvec), as.integer(indic),
                    as.integer(midpts), as.integer(M), as.integer(Q),
                    as.double(fkap), as.integer(pp), as.integer(ppp),
                    as.double(ss), as.double(Smat), as.double(work),
                    as.double(det), as.integer(ipvt), as.double(Sdg))

    list(x = gpoints,  y = out[[17L]])
}

## For computing the binned diagonal entries of SS^T
## where S is a smoother matrix for local polynomial
## kernel regression.
sstdiag <- function(x, drv = 0L, degree = 1L, kernel = "normal",
                    bandwidth, gridsize = 401L, bwdisc = 25, range.x,
                    binned = FALSE, truncate = TRUE)
{
    if (missing(range.x) && !binned) range.x <- c(min(x), max(x))

    ## Rename common variables
    M <- gridsize
    Q <- as.integer(bwdisc)
    a <- range.x[1L]
    b <- range.x[2L]
    pp <- degree + 1L
    ppp <- 2L*degree + 1L
    tau <- 4L

    ## Bin the data if not already binned
    if (!binned) {
        gpoints <- seq(a, b, length.out = M)
        xcounts <- linbin(x, gpoints, truncate)
    } else {
        xcounts <- x
        M <- length(xcounts)
        gpoints <- seq(a, b, length.out = M)
    }

    ## Set the bin width

    delta <- (b-a)/(M-1L)

    ## Discretise the bandwidths
    if (length(bandwidth) == M) {
        hlow <- sort(bandwidth)[1L]
        hupp <- sort(bandwidth)[M]
        hdisc <- exp(seq(log(hlow), log(hupp), length.out = Q))

        ## Determine value of L for each member of "hdisc"
        Lvec <- floor(tau*hdisc/delta)

        ## Determine index of closest entry of "hdisc"
        ## to each member of "bandwidth"
        indic <- if (Q > 1L) {
            lhdisc <- log(hdisc)
            gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1)
            if (gap == 0) rep(1, M)
            else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1)
        } else rep(1, M)
    } else if (length(bandwidth) == 1L) {
        indic <- rep(1, M)
        Q <- 1L
        Lvec <- rep(floor(tau*bandwidth/delta), Q)
        hdisc <- rep(bandwidth, Q)
    } else
        stop("'bandwidth' must be a scalar or an array of length 'gridsize'")

    dimfkap <- 2L * sum(Lvec) + Q
    fkap <- rep(0, dimfkap)
    midpts <- rep(0, Q)
    ss <- matrix(0, M, ppp)
    uu <- matrix(0, M, ppp)
    Smat <- matrix(0, pp, pp)
    Umat <- matrix(0, pp, pp)
    work <- rep(0, pp)
    det <- rep(0, 2L)
    ipvt <- rep(0, pp)
    SSTd <- rep(0, M)

    SSTd <- .Fortran(F_sstdg, as.double(xcounts), as.double(delta),
                    as.double(hdisc), as.integer(Lvec), as.integer(indic),
                    as.integer(midpts), as.integer(M), as.integer(Q),
                    as.double(fkap), as.integer(pp), as.integer(ppp),
                    as.double(ss), as.double(uu), as.double(Smat),
                    as.double(Umat), as.double(work), as.double(det),
                    as.integer(ipvt), as.double(SSTd))[[19L]]

    list(x = gpoints, y = SSTd)
}

.onAttach <- function(libname, pkgname)
   packageStartupMessage("KernSmooth 2.23 loaded\nCopyright M. P. Wand 1997-2009")

.onUnload <- function(libpath)
    library.dynam.unload("KernSmooth",  libpath)
Any scripts or data that you put into this service are public.
KernSmooth documentation built on April 4, 2025, 2:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
KernSmooth
Functions for Kernel Smoothing Supporting Wand & Jones (1995)

R/all.R
In KernSmooth: Functions for Kernel Smoothing Supporting Wand & Jones (1995)

Defines functions .onUnload .onAttach sstdiag sdiag rlbin locpoly linbin2D linbin dpill dpik dpih cpblock blkest bkfe bkde2D bkde

Documented in bkde bkde2D bkfe dpih dpik dpill locpoly

Try the KernSmooth package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

KernSmooth Functions for Kernel Smoothing Supporting Wand & Jones (1995)

R/all.R In KernSmooth: Functions for Kernel Smoothing Supporting Wand & Jones (1995)

Defines functions .onUnload .onAttach sstdiag sdiag rlbin locpoly linbin2D linbin dpill dpik dpih cpblock blkest bkfe bkde2D bkde

Documented in bkde bkde2D bkfe dpih dpik dpill locpoly

Try the KernSmooth package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

KernSmooth
Functions for Kernel Smoothing Supporting Wand & Jones (1995)

R/all.R
In KernSmooth: Functions for Kernel Smoothing Supporting Wand & Jones (1995)