#' Regression Slope by a Simple Comparison of Average Values
#' in the Upper and Lower Quantiles
#'
#' @param x,y variables.
#' @param frac fraction of data to be kept in the upper
#' and lower range of x. Values of frac should be
#' greater than 0 and lower or equal than 0.5.
#'
#' @references
#' Gelman, A., & Park, D.K. (2009). Splitting a predictor at the
#' upper quarter or third and the lower quarter or third.
#' The American Statistician, 63(1).
#'
#' @examples
#' attach(mtcars)
#' lm.simple(mpg, disp)
#' lm.simple(mpg, disp, frac = .25)
#'
#' @export
# p-value: t-test, resampling
lm.simple <- function(x, y, frac = "auto", na.rm = FALSE) {
if (frac != "auto" && (frac <= 0 || frac > 0.5))
stop("Inappopriate values of frac.")
call <- match.call()
vars <- as.list(call[2:3])
xraw <- x
yraw <- y
ord <- order(x)
x <- x[ord]
y <- y[ord]
N <- length(x)
optf <- function(frac) {
n <- ceiling(N*frac)
ratio <- (mean(x[(N-n+1):N], na.rm = na.rm) - mean(x[1:n], na.rm = na.rm)) / (2*(x[N-n+1] - x[n]))
abs(1-ratio)
}
if (frac == "auto") {
ftmp <- seq(from = 1/N, to = 0.5, by = 1/N)
frac <- ftmp[which.min(vapply(ftmp, optf, numeric(1)))]
#f <- optimize(optf, interval = c(1/N, 0.5))$minimum
}
n <- ceiling(N*frac)
grp <- rep("Mid", N)
grp[1:n] <- "Lo"
grp[(N-n+1):N] <- "Hi"
#coef <- lm.simple.fit(x, y, grp, na.rm = na.rm)
xm <- tapply(x, grp, mean, na.rm = na.rm)
ym <- tapply(y, grp, mean, na.rm = na.rm)
xmean <- mean(x, na.rm = na.rm)
ymean <- mean(y, na.rm = na.rm)
beta <- unname((ym["Hi"]-ym["Lo"]) / (xm["Hi"]-xm["Lo"]))
alpha <- ymean - xmean * beta
coef <- c(alpha, beta)
yhat <- coef[1] + coef[2] * xraw
resid <- yraw-yhat
resid.var <- var(resid, na.rm = na.rm)
beta.var <- unname(resid.var/N * 2/((xm["Hi"]-xm["Lo"])^2 * frac))
alpha.var <- resid.var * (1/N + (xmean^2)/var(x, na.rm = na.rm) )
variance <- c(alpha.var, beta.var)
names(coef) <- names(variance) <- c("(Intercept)", as.character(vars$x))
boundries <- c(x[n], x[N-n+1])
names(boundries) <- c("Lo", "Hi")
means <- list(xm[c("Lo", "Hi")],
ym[c("Lo", "Hi")])
model <- data.frame(xraw, yraw)
names(means) <- colnames(model) <- as.character(vars)
attr(model, "variables") <- vars
attr(model, "order") <- ord
attr(model, "groups") <- grp
attr(model, "group.boundries") <- boundries
attr(model, "group.sizes") <- n
attr(model, "frac") <- frac
attr(model, "na.rm") <- na.rm
structure(
list(
coefficients = coef,
fitted.values = yhat,
residuals = resid,
variance = variance,
means = means,
call = call,
model = model
), class = "lm.simple")
}
#' @export
#' @rdname lm.simple
print.lm.simple <- function(x, ...) {
cat("\nCall:\n")
print(x$call)
cat("\nGroup means:\n")
print(x$means[[1]])
cat("\nCoefficients:\n")
print(x$coefficients)
}
#' @export
#' @rdname lm.simple
predict.lm.simple <- function(object, newdata) {
if (missing(newdata)) {
object$fitted.values
} else {
object$coefficients[1] + object$coefficients[2] * newdata
}
}
#' @export
#' @rdname lm.simple
summary.lm.simple <- function(x, ...) {
cat("\nCall:\n")
print(x$call)
cat("\nGroup means:\n")
print(x$means[[1]])
cat("\nRediduals:\n")
quant <- quantile(x$residuals, probs = seq(0, 1, 0.25))
names(quant) <- c("Min", "1Q", "Median", "3Q", "Max")
print(quant)
cat("\nCoefficients:\n")
stderr <- sqrt(x$variance)
print(cbind(Estimate = x$coefficients,
"Std. Error" = stderr,
"t value" = x$coefficients / stderr), na.print = "")
cat(sprintf("\nResidual Std. dev.: %f, Group sizes: %d%% (n=%d)",
sd(x$residuals), attr(x$model, "frac")*100, attr(x$model, "group.sizes")))
}
#' @export
#' @rdname lm.simple
plot.lm.simple <- function(x, type = c("fitted", "residuals"), ...) {
type <- match.arg(type, several.ok = TRUE)
if ("fitted" %in% type) {
plot(x$model, main = "Model fit")
points(x$means$x, x$means$y, col = "red", pch = 4, cex = 1.5)
abline(v = attr(x$model, "group.boundries"),
lty = 2, col = "darkgray")
abline(a = x$coefficients[1], b = x$coefficients[2])
}
if (length(type) > 1)
readline("Hit <Return> to see next plot: ")
if ("residuals" %in% type) {
plot(x$fitted.values, x$residuals, main = "Residuals vs Fitted",
xlab = "Fitted values", ylab = "Residuals")
abline(h = 0, lty = 2, col = "darkgray")
i <- order(x$fitted.values)
loe <- predict(loess(x$residuals ~ x$fitted.values))
lines(x$fitted.values[i], loe[i], col = "red")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.