#' Fit ed model to data
#'
#' Calculate raw ed density estimates and fit ed model to data using loess
#'
#' @param x data, a vector
#' @param k the gap width (in number of observations) with which to compute the raw estimates
#' @param disjoint should non-overlapping gaps be computed? (default \code{TRUE})
#' @param degree degree of loess fitting (see \code{\link{loess}})
#' @param span span of loess fitting (see \code{\link{loess}})
#' @param xgrid number of equally-spaced points along the support of \code{x} at which to compute the fit
#' @param bounds bounds at which to fit the density (see details)
#' @param f a function providing a true density or hypothesized density, with which the ed estimate can be compared (optional)
#' @param family loess parameter (\code{\link{loess}})
#' @param normalize should the resulting density be normalized so that it integrates to one?
#' @param delta grid augmentation parameter (experimental). A value of 0 (default) disables grid augmentation.
#' @param lower grid augmentation parameter (experimental)
#' @param control loess parameter (see \code{\link{loess.control}})
#'
#' @return a list with a lot of things (to be documented...). For now, look at str(result) to get an idea.
#'
#' @details
#' bounds...
#'
#' @note This function is provided as a convenience, but often you may want to simply compute the raw estimates using \code{\link{ed_raw}} and iteratively figure out how to fit the raw estimates with whatever nonparametric method you like.
#'
#' @seealso \code{\link{ed_raw}}, \code{\link{ed_plot}}
#'
#' @export
ed <- function(x, k = 10, disjoint = TRUE, degree = 2, span = 0.3,
xgrid = 500, bounds = c(min(x), max(x)), f = NULL,
family = "gaussian", normalize = FALSE, delta = 0, lower = NULL,
control = loess.control()) {
# TODO: check that arguments are valid...
n <- length(x)
xs <- sort(x)
f_log <- f
u_bounds <- NULL
# add grid augmentation if requested
if (delta > 0) {
x_range <- range(x)
n_neighbors <- ceiling(span * n)
b1 <- lower
if (is.null(b1)) b1 <- x_range[1] - diff(xs[c(1, n_neighbors)])
b2 <- x_range[2] + diff(xs[c(n - n_neighbors + 1, n)])
u_bounds <- c(b1, b2)
n_unif <- floor(delta * n / (1 - delta))
# want to add n_unif points to the grid specified by 'bounds', but
# also want uniform points outside the range for smoothing at boundaries
# n_unif <- ceiling(n_unif1 + (n_unif1/diff(x_range)) * (diff(xs[c(1, n_neighbors)] + diff(xs[c(n - n_neighbors + 1, n)]))))
# span needs to be adjusted because additional points
# outside the range shouldn't count
# span <- span*(n + n_unif1)/(n + n_unif)
u <- seq(u_bounds[1], u_bounds[2], length = n_unif)
x <- c(x, u)
xs <- sort(x)
n <- n + n_unif
# s <- seq(u_bounds[1], u_bounds[2], length = xgrid)
# update delta so the regions where the uniform dominates agree with true density
u_min <- log(k / (n * (u[k + 1] - u[1]))) - log(k) + digamma(k)
delta <- diff(u_bounds) * exp(u_min)
if (!is.null(f)) {
f_log <- function(x)
(1 - delta) * f(x) + delta * dunif(x, u_bounds[1], u_bounds[2])
}
}
if (disjoint) {
m <- floor((n - 1) / k)
ind <- (c(1:m) - 1) * k + 1
} else {
m <- n - k
ind <- c(1:m)
}
ind2 <- ind + k
x_eval <- (xs[ind2] + xs[ind]) / 2
dists <- xs[ind2] - xs[ind]
balloon <- k / (n * dists)
# add points that will get zero weight so we can interpolate with loess
ii <- 1
w <- rep(1, m)
if (bounds[1] < min(x_eval)) {
x_eval <- c(bounds[1], x_eval)
w <- c(0, w)
balloon <- c(0.1234, balloon)
ii <- ii + 1
}
if (bounds[2] > max(x_eval)) {
x_eval <- c(bounds[2], x_eval)
w <- c(0, w)
balloon <- c(0.1234, balloon)
ii <- ii + 1
}
s <- seq(bounds[1], bounds[2], length = xgrid)
raw <- log(balloon) - log(k) + digamma(k)
raw_loess <- loess(raw ~ x_eval, weights = w, degree = degree,
span = span, family = family, control = control)
p_raw_loess <- predict(raw_loess)
ellhat <- predict(raw_loess, newdata = s)
if (delta > 0) {
ellhat[ellhat < u_min] <- u_min
fhat <- (exp(ellhat) - delta * dunif(s, u_bounds[1], u_bounds[2])) / (1 - delta)
# fhat[fhat < 0] <- 0
p_raw_loess[p_raw_loess < u_min] <- u_min
} else {
fhat <- exp(ellhat)
}
c <- sum(fhat) * (s[2] - s[1])
if (normalize) fhat <- fhat / c
no_endpoints <- ii:(m + ii - 1)
# if (delta > 0) {
# no_endpoints <- which(x_eval < bounds[2] & x_eval > bounds[1])
# }
# "integrated" mse, if f is supplied
if (is.null(f)) {
mse <- NULL
} else {
mse <- sum((f(s) - fhat) ^ 2) * (s[2] - s[1])
}
res <- list(
surface = data.frame(f = as.numeric(fhat), x = as.numeric(s), logf = ellhat),
dat = data.frame(
x = x_eval[no_endpoints],
raw = raw[no_endpoints],
fhat = exp(p_raw_loess)[no_endpoints],
resid = (raw - p_raw_loess)[no_endpoints]),
c = c,
f = f,
mse = mse,
params = list(k = k, span = span, degree = degree, delta = delta),
f_log = f_log,
bounds = bounds,
u_bounds = u_bounds,
n = n,
lo = raw_loess,
normalize = normalize
)
class(res) <- c("ed", "list")
return(res)
}
#' @export
print.ed <- function(x, ...) {
message("ed object...\nUse str() to see structure of this object.")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.