Nothing
### 2d kde new method
###
#' @importFrom utils methods
NULL
floor_os <- function(x, o = 0, s = 1) o + s*floor((x - o)/s)
ceiling_os <- function(x, o = 0, s = 1) o + s*ceiling((x - o)/s)
#' One-dimensional Kernel Density Estimate
#'
#' A pure R implementation of an approximate one-dimensional KDE, similar
#' to \code{\link[stats]{density}} but using a different algorithm not involving
#' \code{\link[stats]{fft}}. Two extra facilities are provided, namely
#' (a) the kernel may be given either as a character string to select one of a
#' number of kernel functions provided, or a user defined R function, and (b) the
#' kde may be fitted beyond the prescribed limits for the result, and folded back
#' to emulate the effect of having known bounds for the distribution.
#'
#' @param x A numeric vector for which the kde is required or (in methods)
#' an object of class \code{"kde_1d"}
#' @param bw The bandwidth or the bandwidth function.
#' @param kernel The kernel function, specified either as a character string or as
#' an R function. Partial matching of the character string is allowed.
#' @param n Integer, the number of equally-spaced values in the abscissa of the kde
#' @param limits numeric vector of length 2. Prescribed x-range limits for the
#' x-range of the result. May be infinite, but infinite values will be
#' pruned back to an appropriate value as determined by the data.
#' @param cut The number of bandwidths beyond the range of the input x-values to use
#' @param na.rm Logical value: should any missing values in x be silently removed?
#' @param adjust numeric value: a multiplier to be applied to the computed bandwidth.
#' @param fold Logical value: should the kde be estimated beyond the prescribed limits
#' for the result and 'folded back' to emulate the effect of having known
#' range boundaries for the underlying distribution?
#' @param las,col,xlab,ylab base graphics parameters
#' @param ... currently ignored, except in method functions
#'
#' @return A list of results specifying the result of the kde computation, of class \code{"kde_1d"}
#' @export
#'
#' @examples
#' set.seed(1234)
#' u <- runif(5000)
#' kdeu0 <- kde_1d(u, limits = c(-Inf, Inf))
#' kdeu1 <- kde_1d(u, limits = 0:1, kernel = "epan", fold = TRUE)
#' plot(kdeu0, col = 4)
#' lines(kdeu1, col = "dark green")
#' fun <- function(x) (0 < x & x < 1) + 0
#' curve(fun, add=TRUE, col = "grey", n = 1000)
kde_1d <- function(x, bw = bw.nrd0,
kernel = c("gaussian", "biweight", "cosine", "epanechnikov",
"logistic", "optCosine", "rectangular", "squaredCosine",
"triangular", "tricube", "triweight", "uniform"),
n = 512, limits = c(rx[1] - cut*bw, rx[2] + cut*bw),
cut = 3, na.rm = FALSE, adjust = 1, fold = FALSE, ...) {
stopifnot(non_numeric_x = is.numeric(x),
short_x = length(unique(x)) > 2)
data_name <- deparse(substitute(x))
if(is.function(bw)) {
bw <- bw(x)
}
stopifnot(bad_bw = is.numeric(bw) && length(bw) == 1 && bw > 0,
bad_adjust = is.numeric(adjust) && length(adjust) == 1 && adjust > 0)
bw <- bw * adjust
if(is.character(kernel)) {
kernel <- match.arg(kernel)
substring(kernel, 0, 1) <- toupper(substring(kernel, 0, 1))
kernel <- get(paste0("kernel", kernel), mode = "function")
} else {
stopifnot(kernel_not_a_function = is.function(kernel),
bad_arguments = identical(args(kernel), args(kernelGaussian)))
}
stopifnot(bad_n = is.numeric(n) && length(n) == 1 && (n %% 1 == 0) && n > 2)
rx <- range(x, na.rm = na.rm)
if(fold) {
limits[1] <- max(limits[1], rx[1] - cut*bw)
limits[2] <- min(limits[2], rx[2] + cut*bw)
stopifnot(bad_limits = all(x >= limits[1]) && all(x <= limits[2]))
dx <- diff(limits)/(n-1)
lo <- floor_os(limits[1] - min(diff(limits), 5*bw), limits[1], dx)
up <- ceiling_os(limits[2] + min(diff(limits), 5*bw), limits[2], dx)
n_star <- 1 + round((up - lo)/dx)
xo <- seq(lo, up, by = dx)
# fr <- tabulate(1 + pmin(floor((x - lo)/dx), n_star - 1), nbins = n_star)
} else {
lo <- max(limits[1], rx[1] - 5*bw)
up <- min(limits[2], rx[2] + 5*bw)
stopifnot(bad_limits = all(x >= lo) && all(x <= up))
dx <- (up - lo)/(n-1)
xo <- seq(lo, up, by = dx)
# fr <- tabulate(1 + pmin(floor((x - lo)/dx), n - 1), nbins = n)
}
fr <- linbin1d(x, xo)
yo <- outer(xo, xo, function(x, y) kernel(x, y, bw)) %*% fr
if(fold) {
if(any(low <- (xo < limits[1]+dx/10))) {
k <- which(low)
kl <- k[length(k)]
k0 <- 2*kl - k
yo[k0] <- yo[k0] + yo[k]
}
if(any(upp <- (limits[2]-dx/10 < xo))) {
k <- which(upp)
ku <- k[1]
k0 <- 2*ku - k
yo[k0] <- yo[k0] + yo[k]
}
keep <- kl:ku
yo <- yo[keep]
xo <- xo[keep]
lo <- limits[1]
up <- limits[2]
}
structure(list(x = xo, y = yo/(sum(yo)*dx), bw = bw, n = length(x),
lower = lo, upper = up, data_name = data_name),
class = "kde_1d")
}
linbin1d <- function(x, gx) {
dx <- mean(diff(gx))
i0 <- findInterval(x, gx, all.inside = TRUE)
i <- unique(i0)
wt <- crossprod(outer(i0, i, "=="), cbind(gx[i0+1] - x, x - gx[i0]))/dx
fr <- numeric(length(gx))
fr[i ] <- fr[i ] + wt[, 1]
fr[i + 1] <- fr[i + 1] + wt[, 2]
fr
}
#' @rdname kde_1d
#' @export
print.kde_1d <- function(x, ...) {
cat("A Kernel Density Estimate of class ", class(x), "\n\n")
labs <- c("Response", "Sample size", "Range", "Bandwidth", "Resolution")
values <- with(x, c(data_name, n,
paste0(signif(lower, 4), " < x < ",
signif(upper, 4)),
signif(bw, 4),
length(x)))
cat(paste0(format(labs), ": ", values), sep = "\n")
generics <- attr(utils::methods(class = "kde_1d"), "info")$generic
cat("\nMethods exist for generics:",
paste(generics, collapse = ", "), "\n")
invisible(x)
}
#' @rdname kde_1d
#' @export
plot.kde_1d <- function(x, ..., col = "steel blue", las = 1,
xlab = bquote(x == italic(.(x$data_name))),
ylab = expression(kde(italic(x)))) {
with(x, {
plot(x = c(lower, x, upper), y = c(0, y, 0),
type = "l", xlab = xlab, ylab = ylab, axes = FALSE,
col = col, panel.first = grid(), las = las, ...)
axis(1, las = las)
axis(2, las = las)
})
invisible(x)
}
#' @import grDevices
NULL
#' A Two-dimensional Kernel Density Estimate
#'
#' A pure R implementation of an approximate two-dimensional kde computation, where
#' the approximation depends on the x- and y-resolution being fine, i.e. the number
#' of both x- and y-points should be reasonably large, at least 256. The coding
#' follows the same idea as used in \code{\link[MASS]{kde2d}}, but scales much better
#' for large data sets.
#'
#' @param x,y Numeric vectors of the same length specified in any way acceptable
#' to \code{\link[grDevices]{xy.coords}}. In methods, \code{x} will be
#' an object of class \code{"kde_2d"}
#' @param bw bandwidths. May be a numeric vector of length 1 or 2, or a function,
#' or list of two bandwidth computation functions. Short entities will
#' be repeated to length 1. The first relates to the x-coordinate and
#' the second to the y.
#' @param kernel As for \code{\link{kde_1d}} though 1 or 2 values may be specified
#' relating to x- and y-coordinates respectively. Short entities will
#' be repeated to length 2
#' @param n positive integer vector of length 1 or 2 specifying the resolution required
#' in the x- and y-coordinates respectively. Short values will be repeated to
#' length 2.
#' @param x_limits,y_limits Numeric vectors specifying the limits required for the result
#' @param cut The number of bandwidths beyond the x- and y-range limits for the resuls.
#' @param na.rm Should missing values be silently removed?
#' @param adjust A factor to adjust both bandwidths to regulate smoothness
#' @param las,col,xlab,ylab base graphics parameters
#' @param ... currently ignored, except in method functions
#'
#' @return A list of results of class \code{"kde_2d"}. The result may be used directly
#' in \code{\link[graphics]{image}} or \code{\link[graphics]{contour}}.
#' @export
#'
#' @examples
#' krc <- with(Boston, {
#' criminality <- log(crim)
#' spaciousness <- sqrt(rm)
#' kde_2d(criminality, spaciousness, n = 128, kernel = "biweight")
#' })
#' plot(krc, xlab = expression(italic(Criminality)), ylab = expression(italic(Spaciousness)))
#' levs <- hr_levels(krc)
#' contour(krc, add = TRUE, levels = levs, labels = names(levs))
#'
#' with(krc, persp(x, 10*y, 3*z, border="transparent", col = "powder blue",
#' theta = 30, phi = 15, r = 20, scale = FALSE, shade = TRUE,
#' xlab = "Criminality", ylab = "Spaciousness", zlab = "density"))
kde_2d <- function (x, y = NULL, bw = list(x = bw.nrd0, y = bw.nrd0),
kernel = c("gaussian", "biweight", "cosine", "epanechnikov",
"logistic", "optCosine", "rectangular", "squaredCosine",
"triangular", "tricube", "triweight", "uniform"),
n = 128,
x_limits = c(rx[1] - cut * bw["x"], rx[2] + cut * bw["x"]),
y_limits = c(ry[1] - cut * bw["y"], ry[2] + cut * bw["y"]),
cut = 1, na.rm = FALSE, adjust = 53/45, ...) {
xy <- grDevices::xy.coords(x, y)
x <- xy$x
y <- xy$y
if (na.rm) {
if (any(nax <- is.na(x)) | any(nay <- is.na(y))) {
x <- x[!(nax | nay)]
y <- y[!(nax | nay)]
}
}
stopifnot(x_has_nas = !any(is.na(x)),
y_has_nas = !any(is.na(y)),
x_y_lengths_unequal = length(x) == length(y),
bad_n = !is.integer(n) || all(n > 2))
knames <- if (is.null(names(kernel)))
c("x", "y")
else names(kernel)
if (is.character(kernel)) {
kernel <- match.arg(kernel, several.ok = TRUE)
kernel <- rep(kernel, length.out = 2)
substring(kernel, 0, 1) <- toupper(substring(kernel, 0, 1))
kernel <- paste0("kernel", kernel)
kernel = list(x = get(kernel[1], mode = "function"),
y = get(kernel[2], mode = "function"))
}
else if (is.function(kernel)) {
if (length(kernel) == 1) {
kernel <- rep(list(kernel), 2)
}
}
kernel <- setNames(kernel, knames)
if (is.function(bw)) {
bw <- rep(list(bw), 2)
}
if(is.list(bw)) {
bw <- mapply(FUN = function(x, y) x(y),
bw, list(x = x, y = y))
} else {
bw <- rep(bw, length.out = 2)
}
stopifnot(bad_bw = is.numeric(bw) && all(bw > 0),
bad_adjust = is.numeric(adjust) && length(adjust) <= 2 && all(adjust > 0))
bw <- bw * adjust
if (is.null(names(bw))) {
bw <- setNames(bw, c("x", "y"))
}
nnames <- if (is.null(names(n)))
c("x", "y")
else names(n)
n <- setNames(rep(n, length.out = 2), nnames)
rx <- range(x)
ry <- range(y)
x_limits <- c(max(x_limits[1], rx[1] - 5 * bw["x"]),
min(x_limits[2], rx[2] + 5 * bw["x"]))
y_limits <- c(max(y_limits[1], ry[1] - 5 * bw["y"]),
min(y_limits[2], ry[2] + 5 * bw["y"]))
dx <- diff(x_limits)/(n["x"] - 1)
dy <- diff(y_limits)/(n["y"] - 1)
xo <- seq(x_limits[1], x_limits[2], by = dx)
yo <- seq(y_limits[1], y_limits[2], by = dy)
Fr <- linbin2d(x, y, xo, yo)
Hx <- outer(xo, xo, kernel[["x"]], sd = bw["x"])
Hy <- outer(yo, yo, kernel[["y"]], sd = bw["y"])
z <- Hx %*% Fr %*% Hy
z <- z/(sum(z) * dx * dy)
structure(list(x = xo, y = yo, z = z, bw = bw, n_sample = length(x),
lower = c(x = min(xo), y = min(yo)),
upper = c(x = max(xo), y = max(yo)),
n_out = n,
data_name = with(xy, c(x = ifelse(is.null(xlab), "x", xlab),
y = ifelse(is.null(ylab), "y", ylab)))),
class = "kde_2d")
}
linbin2d <- function(x, y, gx, gy) {
dx <- mean(diff(gx))
dy <- mean(diff(gy))
i <- findInterval(x, gx, all.inside = TRUE)
j <- findInterval(y, gy, all.inside = TRUE)
lx <- (gx[i + 1] - x)
ux <- (x - gx[i])
ly <- (gy[j + 1] - y)
uy <- (y - gy[j])
ij <- paste(i, j, sep = ":")
uij <- unique(ij)
wt <- crossprod(outer(ij, uij, "=="),
cbind(lx*ly, lx*uy, ux*ly, ux*uy))/(dx*dy)
Fr <- matrix(0, length(gx), length(gy))
i <- as.integer(sub(":.*$", "", uij))
j <- as.integer(sub("^.*:", "", uij))
Fr[cbind(i , j )] <- Fr[cbind(i , j )] + wt[,1]
Fr[cbind(i , j + 1)] <- Fr[cbind(i , j + 1)] + wt[,2]
Fr[cbind(i + 1, j )] <- Fr[cbind(i + 1, j )] + wt[,3]
Fr[cbind(i + 1, j + 1)] <- Fr[cbind(i + 1, j + 1)] + wt[,4]
Fr
}
#' @rdname kde_2d
#' @export
print.kde_2d <- function(x, ...) {
cat("A Two-dimensional Kernel Density Estimate\n\n")
labs <- c("Responses", "Sample size", "Ranges", "Bandwidths", "Resolution")
values <- with(x, c(paste(data_name, collapse = ", "),
n_sample,
paste(paste0(signif(lower["x"], 4), " < x < ",
signif(upper["x"], 4)),
paste0(signif(lower["y"], 4), " < y < ",
signif(upper["y"], 4)),
sep = ", "),
paste(signif(bw, 4), collapse = ", "),
paste(length(x), length(y), sep = ", ")))
cat(paste0(format(labs), ": ", values), sep = "\n")
generics <- attr(methods(class = "kde_2d"), "info")$generic
cat("\nMethods exist for generics:",
paste(generics, collapse = ", "), "\n")
invisible(x)
}
#' @rdname kde_2d
#' @export
plot.kde_2d <- function(x, ..., las = 1,
xlab = bquote(italic(.(x$data_name[["x"]]))),
ylab = bquote(italic(.(x$data_name[["y"]]))),
col = hcl.colors(50, "YlOrRd", rev = TRUE)) {
with(x, {
image(x, y, z, las = las, xlab = xlab, ylab = ylab, col = col, ...)
})
invisible(x)
}
#' @import demoKde
NULL
#' #' @rdname kde_1d
#' #' @export
#' kernelBiweight <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(7)*sd
#' ifelse((z <- abs(x-mean)) < h, 15/16*(1 - (z/h)^2)^2/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelCosine <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(1/(1-8/pi^2))*sd
#' ifelse((z <- abs(x-mean)) < h, pi/4*cos((pi*z)/(2*h))/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelEpanechnikov <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(5)*sd
#' ifelse((z <- abs(x-mean)) < h, 3/4*(1 - (z/h)^2)/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelGaussian <- function(x, mean = 0, sd = 1)
#' dnorm(x, mean = mean, sd = sd)
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelLogistic <- function(x, mean = 0, sd = 1)
#' stats::dlogis(x, mean, sqrt(3)/pi*sd)
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelOptCosine <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(1/(1-8/pi^2))*sd
#' ifelse((z <- abs(x-mean)) < h, pi/4*cos((pi*z)/(2*h))/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelRectangular <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(3)*sd
#' ifelse(abs(x-mean) < h, 1/(2*h), 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelSquaredCosine <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(3/(1-6/pi^2))*sd
#' ifelse((z <- abs(x-mean)) < h, cos(pi*z/(2*h))^2/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelTriangular <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(24)*sd/2
#' ifelse((z <- abs(x-mean)) < h, (1 - z/h)/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelTricube <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(243/35)*sd
#' ifelse((z <- abs(x - mean)) < h, 70/81*(1 - (z/h)^3)^3/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelTriweight <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(9)*sd
#' ifelse((z <- abs(x-mean)) < h, 35/32*(1 - (z/h)^2)^3/h, 0)
#' }
#'
#' #' @rdname kde_1d
#' #' @export
#' kernelUniform <- function(x, mean = 0, sd = 1) {
#' h <- sqrt(3)*sd
#' ifelse(abs(x-mean) < h, 1/(2*h), 0)
#' }
#' Home Range levels
#'
#' For an object representing a 2-dimensional kernel density estimate
#' find the level(s) defining a central "home range" region, that is,
#' a region of probability content p for which all density points
#' within the region are higher than any density point outside the region.
#' This makes it a region of probability p with smallest area.
#'
#' @param x an object whose \code{z} component represents the KDE
#' @param p a vector of probability levels
#' @param ... extra arguments (currently not used)
#'
#' @return A vector of density levels defining the home range contours
#' @export
#'
#' @examples
#' krc <- with(Boston, {
#' criminality <- log(crim)
#' spaciousness <- sqrt(rm)
#' kde_2d(criminality, spaciousness)
#' })
#' plot(krc, xlab = expression(italic(Criminality)),
#' ylab = expression(italic(Spaciousness)))
#' home <- hr_levels(krc, p = 0.5)
#' contour(krc, add = TRUE, levels = home, labels = "50%")
hr_levels <- function(x, ...) {
UseMethod("hr_levels")
}
#' @rdname hr_levels
#' @export
hr_levels.default <- function(x, p = (1:9)/10, ...) {
stopifnot(bad_object = is.list(x) && is.numeric(x$z) &&
is.matrix(x$z) && all(x$z >= 0),
bad_p_levels = is.numeric(p) && all(0 <= p & p <= 1))
sz <- sort(x$z)
cz <- cumsum(sz)/sum(sz)
k <- sapply(p, function(pr) sum(cz <= (1 - pr)))
k <- pmax(1, k)
setNames(sz[k], format(p))
}
#' @rdname hr_levels
#' @export
hr_levels.kde_2d <- function(x, ...) {
x <- unclass(x)
NextMethod("hr_levels", x, ...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.