#' Nadaraya-Watson estimator
#'
#' Estimate a regression function using the Nadaraya-Watson estimator, using a user-specified bandwidth \eqn{h}.
#'
#' @inheritParams local_average
#' @param kernel a kernel function. The package supplies \code{uniform}, \code{gauss}, \code{epanechnikov} and \code{biweight} (the default).
#' If the support of the kernel is bounded, ensure it is scaled to [-1,1] to ensure correct plotting of any discontinuities.
#' @inherit local_average return params
#' @export
#'
#' @examples
#' # simulate and plot some data
#' m <- function(x) (x^2+1)*sin(2*pi*x*((1-x) + 4*x))
#' x <- sort(runif(100))
#' y <- m(x) + rnorm(length(x), sd=0.1)
#' simdata <- data.frame(x=x,y=y)
#' plot(simdata)
#'
#' # calculate the estimator at x=0.1, with bandwidth 0.02
#' nw(simdata,h=0.02,t=0.1)
#'
#' # a specialised print method has been provided to make life easier
#' # however, we can still access the underlying numbers e.g.
#'
#' fit <- nw(simdata,h=0.02,t=0.1)
#' fit$mhat
#' print(fit) # the same output as before
#'
#' # plot the estimator with bandwidth 0.02 using default biweight kernel
#' plot(nw(simdata,h=0.02))
#'
#' # add a line for the estimator with bandwidth 0.4
#' lines(nw(simdata,h=0.4), col=2)
#'
#' # add a line for the estimator using Gaussian kernel
#' lines(nw(simdata,h=0.02,kernel=gauss), col=4)
#'
#' # NB the first plot is equivalent to the following:
#' fit <- nw(simdata,h=0.02)
#' plot(fit$data)
#' lines(fit$t,fit$mhat)
#'
#' # get smoother matrix
#' fit$A
nw <- function(data,
h,
t=NULL,
kernel=biweight,
empty_nhood=NaN) {
if (is.null(t)) t <- plotting_grid(data,h)
m <- length(t)
n <- length(data$x)
A <- matrix(0,nrow=m,ncol=n)
A <- outer(t, data$x, function(s,t) kernel((s-t)/h))
den <- rowSums(A)
A <- t(apply(A,1,function(x) x/sum(x)))
out <- list(t=t, h=h, mhat=ifelse(den==0, empty_nhood, A%*%data$y), data=data, A=A)
class(out) <- "npfit"
out
}
#' Local average estimator
#'
#' Estimate a regression function m(x) by local averaging. To calculate the estimate, we average the response values for design points within distance \eqn{h} of \eqn{x}. The quantity \eqn{h} is known as the bandwidth.
#'
#' The function calls \code{nw} using the uniform kernel.
#'
#' @param data the data used to fit the estimator. Must be a data frame with columns \code{x} and \code{y}, where \code{x} contains the design points \eqn{x_1,\ldots,x_n}
#' and \code{y} contains the response values \eqn{Y_1,\ldots,Y_n}
#' @param h a scalar giving the user-specified bandwidth (N.B. the cross-validation bandwidth can be computed using \code{find_hcv})
#' @param t (optional) a vector of points at which the estimator is evaluated. If unspecified, a sequence of 200 points is created that spans the range of the x-values in the data.
#' @param empty_nhood a scalar specfying a custom value to be returned at locations where the estimator is undefined (as occurs when there are no nearby data points to average).
#' Default is \code{NaN}.
#' @return An object of class \code{npfit}, which is a list with 5 items:
#' \item{t}{the vector of evaluation points}
#' \item{h}{the bandwidth used}
#' \item{mhat}{evaluations of the estimator \eqn{\hat{m}(t_1),\ldots, \hat{m}(t_n)}}
#' \item{data}{the data used to fit the estimator}
#' \item{A}{the smoother matrix, such that \eqn{\hat{m}=AY}.}
#' Specialised \code{print}, \code{plot}, and \code{lines} methods are available for these objects, to facilitate analysis. See examples below.
#' @export
#'
#' @examples
#' # simulate and plot some data
#' m <- function(x) (x^2+1)*sin(2*pi*x*((1-x) + 4*x))
#' x <- sort(runif(100))
#' y <- m(x) + rnorm(length(x), sd=0.1)
#' simdata <- data.frame(x=x,y=y)
#' plot(simdata)
#'
#' # calculate the estimator at x=0.1, with bandwidth 0.02
#' local_average(simdata,h=0.02,t=0.1)
#'
#' # a specialised print method has been provided to make life easier
#' # however, we can still access the underlying numbers e.g.
#'
#' fit <- local_average(simdata,h=0.02,t=0.1)
#' fit$mhat
#' print(fit) # the same output as before
#'
#' # plot the estimator with bandwidth 0.02
#' plot(local_average(simdata,h=0.02))
#'
#' # add a line for the estimator with bandwidth 0.4
#' lines(local_average(simdata,h=0.4), col=2)
#'
#' # NB the first plot is equivalent to the following:
#' fit <- local_average(simdata,h=0.02)
#' plot(fit$data)
#' lines(fit$t,fit$mhat)
#'
#' # get smoother matrix
#' fit$A
local_average <- function(data,
h,
t=NULL,
empty_nhood=NaN) { nw(data,h,t,uniform,empty_nhood) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.