R/lm-simple.R

Defines functions lm.simple print.lm.simple predict.lm.simple summary.lm.simple plot.lm.simple

Documented in lm.simple plot.lm.simple predict.lm.simple print.lm.simple summary.lm.simple

#' Regression Slope by a Simple Comparison of Average Values
#' in the Upper and Lower Quantiles
#' 
#' @param x,y  variables.
#' @param frac fraction of data to be kept in the upper
#'             and lower range of x. Values of frac should be
#'             greater than 0 and lower or equal than 0.5.
#' 
#' @references 
#' Gelman, A., & Park, D.K. (2009). Splitting a predictor at the
#' upper quarter or third and the lower quarter or third.
#' The American Statistician, 63(1).
#' 
#' @examples
#' attach(mtcars)
#' lm.simple(mpg, disp)
#' lm.simple(mpg, disp, frac = .25)
#' 
#' @export


# p-value: t-test, resampling

lm.simple <- function(x, y, frac = "auto", na.rm = FALSE) {
  
  if (frac != "auto" && (frac <= 0 || frac > 0.5))
    stop("Inappopriate values of frac.")
  
  call <- match.call()
  vars <- as.list(call[2:3])
  
  xraw <- x
  yraw <- y
  
  ord <- order(x)
  x <- x[ord]
  y <- y[ord]
  
  N <- length(x)

  optf <- function(frac) {
    n <- ceiling(N*frac)
    ratio <- (mean(x[(N-n+1):N], na.rm = na.rm) - mean(x[1:n], na.rm = na.rm)) / (2*(x[N-n+1] - x[n]))
    abs(1-ratio)
  }
  
  if (frac == "auto") {
    ftmp <- seq(from = 1/N, to = 0.5, by = 1/N)
    frac <- ftmp[which.min(vapply(ftmp, optf, numeric(1)))]
    #f <- optimize(optf, interval = c(1/N, 0.5))$minimum
  }
  
  n <- ceiling(N*frac)
  grp <- rep("Mid", N)
  grp[1:n] <- "Lo"
  grp[(N-n+1):N] <- "Hi"
  
  #coef <- lm.simple.fit(x, y, grp, na.rm = na.rm)

  xm <- tapply(x, grp, mean, na.rm = na.rm)
  ym <- tapply(y, grp, mean, na.rm = na.rm)
  
  xmean <- mean(x, na.rm = na.rm)
  ymean <- mean(y, na.rm = na.rm)
  
  beta <- unname((ym["Hi"]-ym["Lo"]) / (xm["Hi"]-xm["Lo"]))
  alpha <- ymean - xmean * beta
  
  coef <- c(alpha, beta)
  
  yhat <- coef[1] + coef[2] * xraw
  resid <- yraw-yhat
  
  resid.var <- var(resid, na.rm = na.rm)
  beta.var <- unname(resid.var/N * 2/((xm["Hi"]-xm["Lo"])^2 * frac))
  alpha.var <- resid.var * (1/N + (xmean^2)/var(x, na.rm = na.rm) )
  
  variance <- c(alpha.var, beta.var)
  names(coef) <- names(variance) <- c("(Intercept)", as.character(vars$x))

  boundries <- c(x[n], x[N-n+1])
  names(boundries) <- c("Lo", "Hi")
  
  means <- list(xm[c("Lo", "Hi")],
                ym[c("Lo", "Hi")])

  model <- data.frame(xraw, yraw)
  names(means) <- colnames(model) <- as.character(vars)
  attr(model, "variables") <- vars
  attr(model, "order") <- ord
  attr(model, "groups") <- grp
  attr(model, "group.boundries") <- boundries
  attr(model, "group.sizes") <- n
  attr(model, "frac") <- frac
  attr(model, "na.rm") <- na.rm
  
  structure(
    list(
      coefficients = coef,
      fitted.values = yhat,
      residuals = resid,
      variance = variance,
      means = means,
      call = call,
      model = model
  ), class = "lm.simple")
  
}


#' @export
#' @rdname lm.simple

print.lm.simple <- function(x, ...) {
  cat("\nCall:\n")
  print(x$call)
  cat("\nGroup means:\n")
  print(x$means[[1]])
  cat("\nCoefficients:\n")
  print(x$coefficients)
}


#' @export
#' @rdname lm.simple

predict.lm.simple <- function(object, newdata) {
  if (missing(newdata)) {
    object$fitted.values
  } else {
    object$coefficients[1] + object$coefficients[2] * newdata
  }
}


#' @export
#' @rdname lm.simple

summary.lm.simple <- function(x, ...) {
  cat("\nCall:\n")
  print(x$call)
  cat("\nGroup means:\n")
  print(x$means[[1]])
  cat("\nRediduals:\n")
  quant <- quantile(x$residuals, probs = seq(0, 1, 0.25))
  names(quant) <- c("Min", "1Q", "Median", "3Q", "Max")
  print(quant)
  cat("\nCoefficients:\n")
  stderr <- sqrt(x$variance)
  print(cbind(Estimate = x$coefficients,
              "Std. Error" = stderr,
              "t value" = x$coefficients / stderr), na.print = "")
  cat(sprintf("\nResidual Std. dev.: %f,  Group sizes: %d%% (n=%d)",
              sd(x$residuals), attr(x$model, "frac")*100, attr(x$model, "group.sizes")))
}


#' @export
#' @rdname lm.simple

plot.lm.simple <- function(x, type = c("fitted", "residuals"), ...) {
  
  type <- match.arg(type, several.ok = TRUE)
  
  if ("fitted" %in% type) {
    
    plot(x$model, main = "Model fit")
    points(x$means$x, x$means$y, col = "red", pch = 4, cex = 1.5)
    abline(v = attr(x$model, "group.boundries"),
           lty = 2, col = "darkgray")
    abline(a = x$coefficients[1], b = x$coefficients[2])
    
  }
  
  if (length(type) > 1)
    readline("Hit <Return> to see next plot: ")
  
  if ("residuals" %in% type) {
    
    plot(x$fitted.values, x$residuals, main = "Residuals vs Fitted",
         xlab = "Fitted values", ylab = "Residuals")
    abline(h = 0, lty = 2, col = "darkgray")
    i <- order(x$fitted.values)
    loe <- predict(loess(x$residuals ~ x$fitted.values))
    lines(x$fitted.values[i], loe[i], col = "red")
    
  }
  
}
twolodzko/twextras documentation built on May 3, 2019, 1:52 p.m.