lm.hw4: Linear Regression

Documented in lm_summary

#'lm_summary
#'
#'Summary method for the result list of lm_fit.
#'
#'@param object a result of lm_fit; only support cases when the input response variable is a vector or a n * 1 matrix and the method for fitting the model is "qr".
#'@param correlation logical;if TRUE, the correlation matrix of the estimated parameters is returned and printed.
#'
#'@return
#' lm_summary returns a list containing following response values.
#'\item{residuals}{a vector of residuals, which equals response minus fitted values.}
#'\item{coefficients}{a named vector of coefficients.}
#'\item{aliased}{a named logical vector showing if the original coefficients are aliased.}
#'\item{sigma}{the square root of the estimated variance of the random error}
#'\item{df}{degrees of freedom, a 3-vector (p, n-p, p*), the first being the number of non-aliased coefficients, the last being the total number of coefficients.}
#'\item{fstatistics}{(for models including non-intercept terms) a 3-vector with the value of the F-statistic with its numerator and denominator degrees of freedom.}
#'\item{r.squared}{R^2, the fraction of variance explained by the model.}
#'\item{adj.r.squared}{adjusted R^2.}
#'\item{cov.unscaled}{a p * p matrix covariances of the coefficients.}
#'\item{correlation}{a p * p correlation matrix of the coefficients.}
#'
#'@examples
#'n = 10; p = 5; q = 2;
#'x = matrix(rnorm(n * p), n, p) # no intercept
#'y1 = rnorm(n)
#'y2 = matrix(rnorm(n * q), n, q)
#'
#'z1 = lm_fit(x = x, y = y1)
#'z2 = lm_fit(x = x, y = y2)
#'
#'# no correlation matrix is returned
#'z_summary1 = lm_summary(z1)
#'z_summary2 = lm_summary(z1, correlation = TRUE) #return correlation matrix
#'# lm_summary function doesn't support cases lm_summary(z2)
#'# because the response value y2 is a matrix with more than 1 columns.
#'
#'@export
#'


lm_summary <- function(object, correlation = FALSE){
  z = object
  coef = z$coefficients
  ans = NULL
  df.r = z$df.residual
  r = z$residuals
  f = z$fitted.values

  if (!is.null(nrow(coef))){
    stop("only support cases when the input response variable is a vector or a n * 1 matrix")
  }
  if (z$method != "qr"){
    stop("only support 'qr' method")
  }

  SSE = sum(r^2)
  SSR = ifelse(z$add.intercept, sum((f - mean(f))^2), sum(f^2))
  MSE = SSE/df.r
  R = chol2inv(z$qr$qr[1 : z$rank,1 : z$rank, drop = FALSE])
  SE = sqrt(diag(R) * MSE)
  EST = z$coefficients[z$qr$pivot[1 : z$rank]]
  t.value = EST / SE
  p.value = 2*pt(abs(t.value), df.r, lower.tail = FALSE)

  if (z$add.intercept){
    name = c("(Intercept)",paste0("x", 1 : (ncol(z$qr$qr)-1)))
  }else{
    name = paste0("x", 1 : ncol(z$qr$qr))
  }

  ans$residuals = drop(r)
  names(ans$residuals) = c(1 : length(r))
  ans$coefficients = cbind(coef, SE, t.value, p.value)
  rownames(ans$coefficients) = name
  colnames(ans$coefficients) = c("Estimate","Std. Error", "t value", "Pr(>|t|)")

  ans$aliased = is.na(coef(z))
  ans$sigma = sqrt(MSE)
  ans$df = c(z$rank, df.r, ncol(z$qr$qr))
  ans$r.squared = SSR/(SSE + SSR)
  ans$adj.r.squared = 1 - (1 - ans$r.squared) * ((nrow(z$qr$qr) - z$add.intercept)/df.r)
  ans$fstatistic = c(value = SSR/(z$rank - z$add.intercept)/MSE, numdf = z$rank - z$add.intercept, dendf = df.r)
  ans$cov.unscaled = R
  colnames(ans$cov.unscaled) = name
  rownames(ans$cov.unscaled) = name

  if (correlation) {
    ans$correlation = (R * MSE)/outer(SE, SE)
    colnames(ans$correlation) = name
    rownames(ans$correlation) = name
  }

  ans
}