#' Local linear quantile regression
#'
#' \code{llqr} estimates the \eqn{\tau}th conditional quantile of \code{y} given
#' \code{x} based on a local linear fit. The estimation is performed at each of
#' the design points or, if specified, at a single observation point \code{x0}.
#'
#' The function computes the local linear quantile regression fit for a specified
#' quantile level \eqn{\tau} at the design points of the matrix \code{x} or at a
#' pre-specified point \code{x0}. The estimation is based on a standard normal
#' kernel and a univariate bandwidth. The bandwidth, if not specified by the
#' user, is defined using either the rule-of-thumb given by Yu and Jones (1994)
#' or the cross-validation criterion.
#'
#' The estimation applies to univariate and multivariate predictor variables. For
#' the latter, the local linear fit uses the multivariate standard normal kernel.
#' Note that if the estimation is performed at a pre-specified point \code{x0},
#' then \code{x0} should be a scalar (for univariate predictor) or a vector (for
#' multivariate predictor).
#'
#' @section Warning:
#' The user needs to be careful about the bandwidth selection. When the
#' dimension of the predictor variable is large compared to the sample size,
#' local linear fitting meets the 'curse of dimensionality' problem. In
#' situations like that, the bandwidth selected by the rule-of-thumb or the
#' cross- validation criterion might be small and lead to a sparse
#' neighborhood. This will cause the function to fail. For these cases, we
#' advice the user to specify a bandwidth in the function. See the last example
#' below.
#'
#' @param x A design matrix (n x p). The rows represent observations and the
#' columns represent predictor variables.
#' @param y A vector of the response variable.
#' @param tau A quantile level, a number strictly between 0 and 1.
#' @param h A univariate bandwidth. If not specified, the bandwidth is estimated
#' using either "\code{rule}" or "\code{CV}". See \code{method} below for
#' details.
#' @param method A character string specifying the method to select the
#' bandwidth, if it is missing. Use "\code{rule}" for the rule-of-thumb
#' bandwidth of Yu and Jones (1994) or "\code{CV}" for the method of
#' cross-validation.
#' @param x0 A single observation for which to perform the estimation. It needs
#' to be a scalar (for a univariate predictor) or a vector (for a
#' multivariate predictor). If \code{x0} is missing, the estimation will be
#' performed on the design matrix \code{x}.
#' @return \code{llqr} computes the local linear \eqn{\tau}th conditional
#' quantile function of \code{y} given \code{x} and returns:
#' \itemize{
#' \item{ll_est: }{The estimated function value at the design points \code{x}
#' or, if specified, at the point \code{x0}.}
#'
#' \item{h: }{The bandwidth for the local linear quantile regression fit. If
#' not specified by the user, \code{h} is estimated using either the
#' rule-of-thumb given by Yu and Jones (1994) or the cross-validation
#' criterion.} }
#'
#' @references Yu, K., and Jones, M.C. (1998), Local linear quantile regression.
#' \emph{Journal of the American Statistical Association}, 93, 228-237.
#' @import stats
#' @include llqrcv.R
#' @examples
#'
#' # Example 1
#' # estimate the function at a specific quantile level for simulated data
#' set.seed(1234)
#' n <- 100
#' x <- rnorm(n)
#' error <- rnorm(n)
#' y <- (x + 1)^3 + 0.1 * (x - 2)^3 + error
#' tau <- 0.5
#' plot(x, y, main = tau)
#' points(x, llqr(x, y, tau = tau)$ll_est, col = 'red', pch = 16)
#'
#' # Example 2
#' # estimate the function at a point x0
#' set.seed(1234)
#' n <- 100
#' x <- rnorm(n)
#' error <- rnorm(n)
#' y <- (x + 1)^3 + 0.1 * (x - 2)^3 + error
#' tau <- 0.5
#' x0 <- 1
#' llqr(x, y, tau = tau, x0 = x0)
#'
#' # Example 3
#' # estimate the function for different quantile levels
#' data(mcycle, package = "MASS")
#' attach(mcycle)
#' plot(times, accel, xlab = "milliseconds", ylab = "acceleration")
#' taus <- c(0.1, 0.25, 0.5, 0.75, 0.9)
#' for(i in 1:length(taus)) {
#' fit <- llqr(times, accel, tau = taus[i])$ll_est
#' lines(times, fit, lty = i)
#' }
#' legend(45, -50, c("tau=0.1","tau=0.25","tau=0.5","tau=0.75", "tau=0.9"),
#' lty=1:length(taus))
#'
#' # Example 4
#' # demonstrate a situation where the dimension of the predictor is large and
#' # the local linear fitting meets the 'curse of dimensionality' problem
#' set.seed(1234)
#' n <- 100
#' p <- 10
#' x <- matrix(rnorm(n * p), n, p)
#' error <- rnorm(n)
#' y <- 3 * x[, 1] + x[, 2] + error
#' tau <- 0.5
#' # use the following instead of llqr(x, y, tau = tau)
#' fit.alt <- llqr(x, y, tau = tau, h=1)
#' fit.alt
#'
#' @export
llqr <- function(x, y, tau=0.5, h = NULL, method="rule", x0 = NULL) {
x <- as.matrix(x)
y <- as.matrix(y)
# compatibility checks
# checks if y is univariate
if (dim(y)[2] > 1) {
stop(paste("y needs to be a univariate response. y is a", dim(y)[2], "-dimensional response in this case."))
}
# checks if the number of observations for x and y agree
if (length(y) != dim(x)[1]) {
stop(paste("number of observations of y (", length(y), ") not equal to the number of rows of x (", dim(x)[1], ").", sep = ""))
}
# checks if the quantile level is one-dimensional
if (length(tau) > 1) {
stop(paste("quantile level needs to be one number."))
}
# checks if the quantile level is between 0 and 1 (strictly)
if (tau >= 1 | tau <= 0) {
stop(paste("quantile level needs to be a number strictly between 0 and 1."))
}
# checks for NAs
if (sum(is.na(y)) > 0 | sum(is.na(x)) > 0) {
stop(paste("Data include missing values."))
}
# checks if n>p
if (length(y) <= dim(x)[2]) {
stop(paste("number of observations of y (", length(y), ") should be greater than the number of columns of x (", dim(x)[2], ").", sep = ""))
}
n <- length(y)
p <- dim(x)[2]
# if bandwidth is missing, estimate it
if (is.null(h)) {
if (method == "CV") {
h <- llqrcv(x, y, tau)
}
if (method == "rule") {
red_dim <- floor(0.2 * n) # find how many observations correspond to a 20%
index_y <- order(y)[red_dim:(n - red_dim)] # subtract the smallest 20% and the largest 20% of the observations
h <- KernSmooth::dpill(x[index_y, ], y[index_y])
h <- 1.25 * h * (tau * (1 - tau) / (dnorm(qnorm(tau)))^2)^.2
if (h == 'NaN') {
h <- 1.25 * max(n^(-1 / (p + 4)), min(2, sd(y)) * n^(- 1 / (p + 4)))
}
}
} else {
h <- h
}
# if x0 is missing, perform estimation at the entire design matrix
if (is.null(x0)) {
ll_est <- list()
# if the dimension of x is one, use univariate kernel, otherwise
# use multivariate kernel
if (p == 1) {
# perform estimation at the design matrix x
for (i in 1:dim(x)[1]) {
z <- x - x[i, 1]
w <- dnorm(z / h)
q <- quantreg::rq(y ~ z, weights = w, tau = tau, ci = FALSE)
ll_est[i] <- q$coef[1]
}
} else {
for (i in 1:dim(x)[1]) {
z <- list()
z <- t(t(x) - x[i, ])
w <- mvtnorm::dmvnorm(z / h)
q <- quantreg::rq(y ~ z, weights = w, tau = tau, ci = FALSE)
ll_est[i] <- q$coef[1]
}
}
} else {
# checks if the dimension of x0 is the same as p
if (length(x0) != p) {
stop(paste("x0 needs to be a p-dimensional vector, where p is the dimension of x (",dim(x)[2], ")."))
}
if (p == 1) {
# perform estimation at the point x0
z <- x - x0
w <- dnorm(z / h)
q <- quantreg::rq(y ~ z, weights = w, tau = tau, ci = FALSE)
ll_est <- q$coef[1]
} else {
z <- t(t(x) - x0)
w <- mvtnorm::dmvnorm(z / h)
q <- quantreg::rq(y ~ z, weights = w, tau = tau, ci = FALSE)
ll_est <- q$coef[1]
}
}
list(ll_est = unlist(ll_est), h = h)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.